1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs geode_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (2), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (4), /* HI */
344 COSTS_N_INSNS (7), /* SI */
345 COSTS_N_INSNS (7), /* DI */
346 COSTS_N_INSNS (7)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (23), /* HI */
350 COSTS_N_INSNS (39), /* SI */
351 COSTS_N_INSNS (39), /* DI */
352 COSTS_N_INSNS (39)}, /* other */
353 COSTS_N_INSNS (1), /* cost of movsx */
354 COSTS_N_INSNS (1), /* cost of movzx */
355 8, /* "large" insn */
357 1, /* cost for loading QImode using movzbl */
358 {1, 1, 1}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {1, 1, 1}, /* cost of storing integer registers */
362 1, /* cost of reg,reg fld/fst */
363 {1, 1, 1}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 6, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
368 1, /* cost of moving MMX register */
369 {1, 1}, /* cost of loading MMX registers
370 in SImode and DImode */
371 {1, 1}, /* cost of storing MMX registers
372 in SImode and DImode */
373 1, /* cost of moving SSE register */
374 {1, 1, 1}, /* cost of loading SSE registers
375 in SImode, DImode and TImode */
376 {1, 1, 1}, /* cost of storing SSE registers
377 in SImode, DImode and TImode */
378 1, /* MMX or SSE register to integer */
379 32, /* size of prefetch block */
380 1, /* number of parallel prefetches */
382 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
383 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
384 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
385 COSTS_N_INSNS (1), /* cost of FABS instruction. */
386 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
387 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
391 struct processor_costs k6_cost = {
392 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (2), /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
397 COSTS_N_INSNS (3), /* HI */
398 COSTS_N_INSNS (3), /* SI */
399 COSTS_N_INSNS (3), /* DI */
400 COSTS_N_INSNS (3)}, /* other */
401 0, /* cost of multiply per each bit set */
402 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
403 COSTS_N_INSNS (18), /* HI */
404 COSTS_N_INSNS (18), /* SI */
405 COSTS_N_INSNS (18), /* DI */
406 COSTS_N_INSNS (18)}, /* other */
407 COSTS_N_INSNS (2), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */
411 3, /* cost for loading QImode using movzbl */
412 {4, 5, 4}, /* cost of loading integer registers
413 in QImode, HImode and SImode.
414 Relative to reg-reg move (2). */
415 {2, 3, 2}, /* cost of storing integer registers */
416 4, /* cost of reg,reg fld/fst */
417 {6, 6, 6}, /* cost of loading fp registers
418 in SFmode, DFmode and XFmode */
419 {4, 4, 4}, /* cost of storing fp registers
420 in SFmode, DFmode and XFmode */
421 2, /* cost of moving MMX register */
422 {2, 2}, /* cost of loading MMX registers
423 in SImode and DImode */
424 {2, 2}, /* cost of storing MMX registers
425 in SImode and DImode */
426 2, /* cost of moving SSE register */
427 {2, 2, 8}, /* cost of loading SSE registers
428 in SImode, DImode and TImode */
429 {2, 2, 8}, /* cost of storing SSE registers
430 in SImode, DImode and TImode */
431 6, /* MMX or SSE register to integer */
432 32, /* size of prefetch block */
433 1, /* number of parallel prefetches */
435 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
436 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
437 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
440 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
444 struct processor_costs athlon_cost = {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (2), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (5), /* HI */
451 COSTS_N_INSNS (5), /* SI */
452 COSTS_N_INSNS (5), /* DI */
453 COSTS_N_INSNS (5)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (26), /* HI */
457 COSTS_N_INSNS (42), /* SI */
458 COSTS_N_INSNS (74), /* DI */
459 COSTS_N_INSNS (74)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
464 4, /* cost for loading QImode using movzbl */
465 {3, 4, 3}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {3, 4, 3}, /* cost of storing integer registers */
469 4, /* cost of reg,reg fld/fst */
470 {4, 4, 12}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {6, 6, 8}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {4, 4}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {4, 4}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {4, 4, 6}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {4, 4, 5}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 5, /* MMX or SSE register to integer */
485 64, /* size of prefetch block */
486 6, /* number of parallel prefetches */
488 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
497 struct processor_costs k8_cost = {
498 COSTS_N_INSNS (1), /* cost of an add instruction */
499 COSTS_N_INSNS (2), /* cost of a lea instruction */
500 COSTS_N_INSNS (1), /* variable shift costs */
501 COSTS_N_INSNS (1), /* constant shift costs */
502 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (4), /* HI */
504 COSTS_N_INSNS (3), /* SI */
505 COSTS_N_INSNS (4), /* DI */
506 COSTS_N_INSNS (5)}, /* other */
507 0, /* cost of multiply per each bit set */
508 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
509 COSTS_N_INSNS (26), /* HI */
510 COSTS_N_INSNS (42), /* SI */
511 COSTS_N_INSNS (74), /* DI */
512 COSTS_N_INSNS (74)}, /* other */
513 COSTS_N_INSNS (1), /* cost of movsx */
514 COSTS_N_INSNS (1), /* cost of movzx */
515 8, /* "large" insn */
517 4, /* cost for loading QImode using movzbl */
518 {3, 4, 3}, /* cost of loading integer registers
519 in QImode, HImode and SImode.
520 Relative to reg-reg move (2). */
521 {3, 4, 3}, /* cost of storing integer registers */
522 4, /* cost of reg,reg fld/fst */
523 {4, 4, 12}, /* cost of loading fp registers
524 in SFmode, DFmode and XFmode */
525 {6, 6, 8}, /* cost of storing fp registers
526 in SFmode, DFmode and XFmode */
527 2, /* cost of moving MMX register */
528 {3, 3}, /* cost of loading MMX registers
529 in SImode and DImode */
530 {4, 4}, /* cost of storing MMX registers
531 in SImode and DImode */
532 2, /* cost of moving SSE register */
533 {4, 3, 6}, /* cost of loading SSE registers
534 in SImode, DImode and TImode */
535 {4, 4, 5}, /* cost of storing SSE registers
536 in SImode, DImode and TImode */
537 5, /* MMX or SSE register to integer */
538 64, /* size of prefetch block */
539 6, /* number of parallel prefetches */
541 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
542 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
543 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
544 COSTS_N_INSNS (2), /* cost of FABS instruction. */
545 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
546 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
550 struct processor_costs pentium4_cost = {
551 COSTS_N_INSNS (1), /* cost of an add instruction */
552 COSTS_N_INSNS (3), /* cost of a lea instruction */
553 COSTS_N_INSNS (4), /* variable shift costs */
554 COSTS_N_INSNS (4), /* constant shift costs */
555 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
556 COSTS_N_INSNS (15), /* HI */
557 COSTS_N_INSNS (15), /* SI */
558 COSTS_N_INSNS (15), /* DI */
559 COSTS_N_INSNS (15)}, /* other */
560 0, /* cost of multiply per each bit set */
561 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
562 COSTS_N_INSNS (56), /* HI */
563 COSTS_N_INSNS (56), /* SI */
564 COSTS_N_INSNS (56), /* DI */
565 COSTS_N_INSNS (56)}, /* other */
566 COSTS_N_INSNS (1), /* cost of movsx */
567 COSTS_N_INSNS (1), /* cost of movzx */
568 16, /* "large" insn */
570 2, /* cost for loading QImode using movzbl */
571 {4, 5, 4}, /* cost of loading integer registers
572 in QImode, HImode and SImode.
573 Relative to reg-reg move (2). */
574 {2, 3, 2}, /* cost of storing integer registers */
575 2, /* cost of reg,reg fld/fst */
576 {2, 2, 6}, /* cost of loading fp registers
577 in SFmode, DFmode and XFmode */
578 {4, 4, 6}, /* cost of storing fp registers
579 in SFmode, DFmode and XFmode */
580 2, /* cost of moving MMX register */
581 {2, 2}, /* cost of loading MMX registers
582 in SImode and DImode */
583 {2, 2}, /* cost of storing MMX registers
584 in SImode and DImode */
585 12, /* cost of moving SSE register */
586 {12, 12, 12}, /* cost of loading SSE registers
587 in SImode, DImode and TImode */
588 {2, 2, 8}, /* cost of storing SSE registers
589 in SImode, DImode and TImode */
590 10, /* MMX or SSE register to integer */
591 64, /* size of prefetch block */
592 6, /* number of parallel prefetches */
594 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (2), /* cost of FABS instruction. */
598 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
603 struct processor_costs nocona_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 COSTS_N_INSNS (1), /* cost of a lea instruction */
606 COSTS_N_INSNS (1), /* variable shift costs */
607 COSTS_N_INSNS (1), /* constant shift costs */
608 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
609 COSTS_N_INSNS (10), /* HI */
610 COSTS_N_INSNS (10), /* SI */
611 COSTS_N_INSNS (10), /* DI */
612 COSTS_N_INSNS (10)}, /* other */
613 0, /* cost of multiply per each bit set */
614 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
615 COSTS_N_INSNS (66), /* HI */
616 COSTS_N_INSNS (66), /* SI */
617 COSTS_N_INSNS (66), /* DI */
618 COSTS_N_INSNS (66)}, /* other */
619 COSTS_N_INSNS (1), /* cost of movsx */
620 COSTS_N_INSNS (1), /* cost of movzx */
621 16, /* "large" insn */
623 4, /* cost for loading QImode using movzbl */
624 {4, 4, 4}, /* cost of loading integer registers
625 in QImode, HImode and SImode.
626 Relative to reg-reg move (2). */
627 {4, 4, 4}, /* cost of storing integer registers */
628 3, /* cost of reg,reg fld/fst */
629 {12, 12, 12}, /* cost of loading fp registers
630 in SFmode, DFmode and XFmode */
631 {4, 4, 4}, /* cost of storing fp registers
632 in SFmode, DFmode and XFmode */
633 6, /* cost of moving MMX register */
634 {12, 12}, /* cost of loading MMX registers
635 in SImode and DImode */
636 {12, 12}, /* cost of storing MMX registers
637 in SImode and DImode */
638 6, /* cost of moving SSE register */
639 {12, 12, 12}, /* cost of loading SSE registers
640 in SImode, DImode and TImode */
641 {12, 12, 12}, /* cost of storing SSE registers
642 in SImode, DImode and TImode */
643 8, /* MMX or SSE register to integer */
644 128, /* size of prefetch block */
645 8, /* number of parallel prefetches */
647 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
648 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
649 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
650 COSTS_N_INSNS (3), /* cost of FABS instruction. */
651 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
652 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
655 /* Generic64 should produce code tuned for Nocona and K8. */
657 struct processor_costs generic64_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 /* On all chips taken into consideration lea is 2 cycles and more. With
660 this cost however our current implementation of synth_mult results in
661 use of unnecessary temporary registers causing regression on several
662 SPECfp benchmarks. */
663 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
664 COSTS_N_INSNS (1), /* variable shift costs */
665 COSTS_N_INSNS (1), /* constant shift costs */
666 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (4), /* HI */
668 COSTS_N_INSNS (3), /* SI */
669 COSTS_N_INSNS (4), /* DI */
670 COSTS_N_INSNS (2)}, /* other */
671 0, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (26), /* HI */
674 COSTS_N_INSNS (42), /* SI */
675 COSTS_N_INSNS (74), /* DI */
676 COSTS_N_INSNS (74)}, /* other */
677 COSTS_N_INSNS (1), /* cost of movsx */
678 COSTS_N_INSNS (1), /* cost of movzx */
679 8, /* "large" insn */
681 4, /* cost for loading QImode using movzbl */
682 {4, 4, 4}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {4, 4, 4}, /* cost of storing integer registers */
686 4, /* cost of reg,reg fld/fst */
687 {12, 12, 12}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {6, 6, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {8, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {8, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {8, 8, 8}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {8, 8, 8}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 5, /* MMX or SSE register to integer */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
705 is increased to perhaps more appropriate value of 5. */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
717 struct processor_costs generic32_cost = {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (4), /* HI */
724 COSTS_N_INSNS (3), /* SI */
725 COSTS_N_INSNS (4), /* DI */
726 COSTS_N_INSNS (2)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
737 4, /* cost for loading QImode using movzbl */
738 {4, 4, 4}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {4, 4, 4}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {12, 12, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {8, 8}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {8, 8}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {8, 8, 8}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {8, 8, 8}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of prefetch block */
759 6, /* number of parallel prefetches */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
769 const struct processor_costs *ix86_cost = &pentium_cost;
771 /* Processor feature/optimization bitmasks. */
772 #define m_386 (1<<PROCESSOR_I386)
773 #define m_486 (1<<PROCESSOR_I486)
774 #define m_PENT (1<<PROCESSOR_PENTIUM)
775 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
776 #define m_GEODE (1<<PROCESSOR_GEODE)
777 #define m_K6_GEODE (m_K6 | m_GEODE)
778 #define m_K6 (1<<PROCESSOR_K6)
779 #define m_ATHLON (1<<PROCESSOR_ATHLON)
780 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
781 #define m_K8 (1<<PROCESSOR_K8)
782 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
783 #define m_NOCONA (1<<PROCESSOR_NOCONA)
784 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
785 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
786 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
788 /* Generic instruction choice should be common subset of supported CPUs
789 (PPro/PENT4/NOCONA/Athlon/K8). */
791 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
792 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
793 generic because it is not working well with PPro base chips. */
794 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
795 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
796 const int x86_zero_extend_with_and = m_486 | m_PENT;
797 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
798 const int x86_double_with_add = ~m_386;
799 const int x86_use_bit_test = m_386;
800 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
801 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
802 const int x86_3dnow_a = m_ATHLON_K8;
803 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
804 /* Branch hints were put in P4 based on simulation result. But
805 after P4 was made, no performance benefit was observed with
806 branch hints. It also increases the code size. As the result,
807 icc never generates branch hints. */
808 const int x86_branch_hints = 0;
809 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
810 /* We probably ought to watch for partial register stalls on Generic32
811 compilation setting as well. However in current implementation the
812 partial register stalls are not eliminated very well - they can
813 be introduced via subregs synthesized by combine and can happen
814 in caller/callee saving sequences.
815 Because this option pays back little on PPro based chips and is in conflict
816 with partial reg. dependencies used by Athlon/P4 based chips, it is better
817 to leave it off for generic32 for now. */
818 const int x86_partial_reg_stall = m_PPRO;
819 const int x86_partial_flag_reg_stall = m_GENERIC;
820 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
821 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
822 const int x86_use_mov0 = m_K6;
823 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
824 const int x86_read_modify_write = ~m_PENT;
825 const int x86_read_modify = ~(m_PENT | m_PPRO);
826 const int x86_split_long_moves = m_PPRO;
827 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
828 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
829 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
830 const int x86_qimode_math = ~(0);
831 const int x86_promote_qi_regs = 0;
832 /* On PPro this flag is meant to avoid partial register stalls. Just like
833 the x86_partial_reg_stall this option might be considered for Generic32
834 if our scheme for avoiding partial stalls was more effective. */
835 const int x86_himode_math = ~(m_PPRO);
836 const int x86_promote_hi_regs = m_PPRO;
837 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
838 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
839 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
840 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
841 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
842 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
843 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
845 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
846 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
847 const int x86_shift1 = ~m_486;
848 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
850 that thread 128bit SSE registers as single units versus K8 based chips that
851 divide SSE registers to two 64bit halves.
852 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
853 to allow register renaming on 128bit SSE units, but usually results in one
854 extra microop on 64bit SSE units. Experimental results shows that disabling
855 this option on P4 brings over 20% SPECfp regression, while enabling it on
856 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
858 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
859 /* Set for machines where the type and dependencies are resolved on SSE
860 register parts instead of whole registers, so we may maintain just
861 lower part of scalar values in proper format leaving the upper part
863 const int x86_sse_split_regs = m_ATHLON_K8;
864 const int x86_sse_typeless_stores = m_ATHLON_K8;
865 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
866 const int x86_use_ffreep = m_ATHLON_K8;
867 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
868 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
870 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
871 integer data in xmm registers. Which results in pretty abysmal code. */
872 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
874 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
875 /* Some CPU cores are not able to predict more than 4 branch instructions in
876 the 16 byte window. */
877 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
878 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
879 const int x86_use_bt = m_ATHLON_K8;
880 /* Compare and exchange was added for 80486. */
881 const int x86_cmpxchg = ~m_386;
882 /* Compare and exchange 8 bytes was added for pentium. */
883 const int x86_cmpxchg8b = ~(m_386 | m_486);
884 /* Compare and exchange 16 bytes was added for nocona. */
885 const int x86_cmpxchg16b = m_NOCONA;
886 /* Exchange and add was added for 80486. */
887 const int x86_xadd = ~m_386;
888 /* Byteswap was added for 80486. */
889 const int x86_bswap = ~m_386;
890 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
892 /* In case the average insn count for single function invocation is
893 lower than this constant, emit fast (but longer) prologue and
895 #define FAST_PROLOGUE_INSN_COUNT 20
897 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
898 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
899 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
900 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
902 /* Array of the smallest class containing reg number REGNO, indexed by
903 REGNO. Used by REGNO_REG_CLASS in i386.h. */
905 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
908 AREG, DREG, CREG, BREG,
910 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
912 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
913 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
916 /* flags, fpsr, fpcr, dirflag, frame */
917 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
918 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
920 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
922 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
923 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
924 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
928 /* The "default" register map used in 32bit mode. */
930 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
932 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
933 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
934 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
935 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
936 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
937 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
938 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
941 static int const x86_64_int_parameter_registers[6] =
943 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
944 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
947 static int const x86_64_int_return_registers[4] =
949 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
952 /* The "default" register map used in 64bit mode. */
953 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
955 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
956 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
957 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
958 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
959 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
960 8,9,10,11,12,13,14,15, /* extended integer registers */
961 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
964 /* Define the register numbers to be used in Dwarf debugging information.
965 The SVR4 reference port C compiler uses the following register numbers
966 in its Dwarf output code:
967 0 for %eax (gcc regno = 0)
968 1 for %ecx (gcc regno = 2)
969 2 for %edx (gcc regno = 1)
970 3 for %ebx (gcc regno = 3)
971 4 for %esp (gcc regno = 7)
972 5 for %ebp (gcc regno = 6)
973 6 for %esi (gcc regno = 4)
974 7 for %edi (gcc regno = 5)
975 The following three DWARF register numbers are never generated by
976 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
977 believes these numbers have these meanings.
978 8 for %eip (no gcc equivalent)
979 9 for %eflags (gcc regno = 17)
980 10 for %trapno (no gcc equivalent)
981 It is not at all clear how we should number the FP stack registers
982 for the x86 architecture. If the version of SDB on x86/svr4 were
983 a bit less brain dead with respect to floating-point then we would
984 have a precedent to follow with respect to DWARF register numbers
985 for x86 FP registers, but the SDB on x86/svr4 is so completely
986 broken with respect to FP registers that it is hardly worth thinking
987 of it as something to strive for compatibility with.
988 The version of x86/svr4 SDB I have at the moment does (partially)
989 seem to believe that DWARF register number 11 is associated with
990 the x86 register %st(0), but that's about all. Higher DWARF
991 register numbers don't seem to be associated with anything in
992 particular, and even for DWARF regno 11, SDB only seems to under-
993 stand that it should say that a variable lives in %st(0) (when
994 asked via an `=' command) if we said it was in DWARF regno 11,
995 but SDB still prints garbage when asked for the value of the
996 variable in question (via a `/' command).
997 (Also note that the labels SDB prints for various FP stack regs
998 when doing an `x' command are all wrong.)
999 Note that these problems generally don't affect the native SVR4
1000 C compiler because it doesn't allow the use of -O with -g and
1001 because when it is *not* optimizing, it allocates a memory
1002 location for each floating-point variable, and the memory
1003 location is what gets described in the DWARF AT_location
1004 attribute for the variable in question.
1005 Regardless of the severe mental illness of the x86/svr4 SDB, we
1006 do something sensible here and we use the following DWARF
1007 register numbers. Note that these are all stack-top-relative
1009 11 for %st(0) (gcc regno = 8)
1010 12 for %st(1) (gcc regno = 9)
1011 13 for %st(2) (gcc regno = 10)
1012 14 for %st(3) (gcc regno = 11)
1013 15 for %st(4) (gcc regno = 12)
1014 16 for %st(5) (gcc regno = 13)
1015 17 for %st(6) (gcc regno = 14)
1016 18 for %st(7) (gcc regno = 15)
1018 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1020 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1021 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1022 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1023 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1024 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1025 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1026 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1029 /* Test and compare insns in i386.md store the information needed to
1030 generate branch and scc insns here. */
1032 rtx ix86_compare_op0 = NULL_RTX;
1033 rtx ix86_compare_op1 = NULL_RTX;
1034 rtx ix86_compare_emitted = NULL_RTX;
1036 /* Size of the register save area. */
1037 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1039 /* Define the structure for the machine field in struct function. */
1041 struct stack_local_entry GTY(())
1043 unsigned short mode;
1046 struct stack_local_entry *next;
1049 /* Structure describing stack frame layout.
1050 Stack grows downward:
1056 saved frame pointer if frame_pointer_needed
1057 <- HARD_FRAME_POINTER
1062 [va_arg registers] (
1063 > to_allocate <- FRAME_POINTER
1073 HOST_WIDE_INT frame;
1075 int outgoing_arguments_size;
1078 HOST_WIDE_INT to_allocate;
1079 /* The offsets relative to ARG_POINTER. */
1080 HOST_WIDE_INT frame_pointer_offset;
1081 HOST_WIDE_INT hard_frame_pointer_offset;
1082 HOST_WIDE_INT stack_pointer_offset;
1084 /* When save_regs_using_mov is set, emit prologue using
1085 move instead of push instructions. */
1086 bool save_regs_using_mov;
1089 /* Code model option. */
1090 enum cmodel ix86_cmodel;
1092 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1094 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1096 /* Which unit we are generating floating point math for. */
1097 enum fpmath_unit ix86_fpmath;
1099 /* Which cpu are we scheduling for. */
1100 enum processor_type ix86_tune;
1101 /* Which instruction set architecture to use. */
1102 enum processor_type ix86_arch;
1104 /* true if sse prefetch instruction is not NOOP. */
1105 int x86_prefetch_sse;
1107 /* ix86_regparm_string as a number */
1108 static int ix86_regparm;
1110 /* -mstackrealign option */
1111 extern int ix86_force_align_arg_pointer;
1112 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1114 /* Preferred alignment for stack boundary in bits. */
1115 unsigned int ix86_preferred_stack_boundary;
1117 /* Values 1-5: see jump.c */
1118 int ix86_branch_cost;
1120 /* Variables which are this size or smaller are put in the data/bss
1121 or ldata/lbss sections. */
1123 int ix86_section_threshold = 65536;
1125 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1126 char internal_label_prefix[16];
1127 int internal_label_prefix_len;
1129 static bool ix86_handle_option (size_t, const char *, int);
1130 static void output_pic_addr_const (FILE *, rtx, int);
1131 static void put_condition_code (enum rtx_code, enum machine_mode,
1133 static const char *get_some_local_dynamic_name (void);
1134 static int get_some_local_dynamic_name_1 (rtx *, void *);
1135 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1136 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1138 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1139 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1141 static rtx get_thread_pointer (int);
1142 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1143 static void get_pc_thunk_name (char [32], unsigned int);
1144 static rtx gen_push (rtx);
1145 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1146 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1147 static struct machine_function * ix86_init_machine_status (void);
1148 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1149 static int ix86_nsaved_regs (void);
1150 static void ix86_emit_save_regs (void);
1151 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1152 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1153 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1154 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1155 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1156 static rtx ix86_expand_aligntest (rtx, int);
1157 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1158 static int ix86_issue_rate (void);
1159 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1160 static int ia32_multipass_dfa_lookahead (void);
1161 static void ix86_init_mmx_sse_builtins (void);
1162 static rtx x86_this_parameter (tree);
1163 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1164 HOST_WIDE_INT, tree);
1165 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1166 static void x86_file_start (void);
1167 static void ix86_reorg (void);
1168 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1169 static tree ix86_build_builtin_va_list (void);
1170 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1172 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1173 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1174 static bool ix86_vector_mode_supported_p (enum machine_mode);
1176 static int ix86_address_cost (rtx);
1177 static bool ix86_cannot_force_const_mem (rtx);
1178 static rtx ix86_delegitimize_address (rtx);
1180 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1182 struct builtin_description;
1183 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1185 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1187 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1188 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1189 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1190 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1191 static rtx safe_vector_operand (rtx, enum machine_mode);
1192 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1193 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1194 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1195 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1196 static int ix86_fp_comparison_cost (enum rtx_code code);
1197 static unsigned int ix86_select_alt_pic_regnum (void);
1198 static int ix86_save_reg (unsigned int, int);
1199 static void ix86_compute_frame_layout (struct ix86_frame *);
1200 static int ix86_comp_type_attributes (tree, tree);
1201 static int ix86_function_regparm (tree, tree);
1202 const struct attribute_spec ix86_attribute_table[];
1203 static bool ix86_function_ok_for_sibcall (tree, tree);
1204 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1205 static int ix86_value_regno (enum machine_mode, tree, tree);
1206 static bool contains_128bit_aligned_vector_p (tree);
1207 static rtx ix86_struct_value_rtx (tree, int);
1208 static bool ix86_ms_bitfield_layout_p (tree);
1209 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1210 static int extended_reg_mentioned_1 (rtx *, void *);
1211 static bool ix86_rtx_costs (rtx, int, int, int *);
1212 static int min_insn_size (rtx);
1213 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1214 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1215 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1217 static void ix86_init_builtins (void);
1218 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1219 static const char *ix86_mangle_fundamental_type (tree);
1220 static tree ix86_stack_protect_fail (void);
1221 static rtx ix86_internal_arg_pointer (void);
1222 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1224 /* This function is only used on Solaris. */
1225 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1228 /* Register class used for passing given 64bit part of the argument.
1229 These represent classes as documented by the PS ABI, with the exception
1230 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1231 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1233 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1234 whenever possible (upper half does contain padding).
1236 enum x86_64_reg_class
1239 X86_64_INTEGER_CLASS,
1240 X86_64_INTEGERSI_CLASS,
1247 X86_64_COMPLEX_X87_CLASS,
1250 static const char * const x86_64_reg_class_name[] = {
1251 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1252 "sseup", "x87", "x87up", "cplx87", "no"
1255 #define MAX_CLASSES 4
1257 /* Table of constants used by fldpi, fldln2, etc.... */
1258 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1259 static bool ext_80387_constants_init = 0;
1260 static void init_ext_80387_constants (void);
1261 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1262 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1263 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1264 static section *x86_64_elf_select_section (tree decl, int reloc,
1265 unsigned HOST_WIDE_INT align)
1268 /* Initialize the GCC target structure. */
1269 #undef TARGET_ATTRIBUTE_TABLE
1270 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1272 # undef TARGET_MERGE_DECL_ATTRIBUTES
1273 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1276 #undef TARGET_COMP_TYPE_ATTRIBUTES
1277 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1279 #undef TARGET_INIT_BUILTINS
1280 #define TARGET_INIT_BUILTINS ix86_init_builtins
1281 #undef TARGET_EXPAND_BUILTIN
1282 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1284 #undef TARGET_ASM_FUNCTION_EPILOGUE
1285 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1287 #undef TARGET_ENCODE_SECTION_INFO
1288 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1289 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1291 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1294 #undef TARGET_ASM_OPEN_PAREN
1295 #define TARGET_ASM_OPEN_PAREN ""
1296 #undef TARGET_ASM_CLOSE_PAREN
1297 #define TARGET_ASM_CLOSE_PAREN ""
1299 #undef TARGET_ASM_ALIGNED_HI_OP
1300 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1301 #undef TARGET_ASM_ALIGNED_SI_OP
1302 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1304 #undef TARGET_ASM_ALIGNED_DI_OP
1305 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1308 #undef TARGET_ASM_UNALIGNED_HI_OP
1309 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1310 #undef TARGET_ASM_UNALIGNED_SI_OP
1311 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1312 #undef TARGET_ASM_UNALIGNED_DI_OP
1313 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1315 #undef TARGET_SCHED_ADJUST_COST
1316 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1317 #undef TARGET_SCHED_ISSUE_RATE
1318 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1319 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1320 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1321 ia32_multipass_dfa_lookahead
1323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1324 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1327 #undef TARGET_HAVE_TLS
1328 #define TARGET_HAVE_TLS true
1330 #undef TARGET_CANNOT_FORCE_CONST_MEM
1331 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1332 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1333 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1335 #undef TARGET_DELEGITIMIZE_ADDRESS
1336 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1338 #undef TARGET_MS_BITFIELD_LAYOUT_P
1339 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1342 #undef TARGET_BINDS_LOCAL_P
1343 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1346 #undef TARGET_ASM_OUTPUT_MI_THUNK
1347 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1348 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1349 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1351 #undef TARGET_ASM_FILE_START
1352 #define TARGET_ASM_FILE_START x86_file_start
1354 #undef TARGET_DEFAULT_TARGET_FLAGS
1355 #define TARGET_DEFAULT_TARGET_FLAGS \
1357 | TARGET_64BIT_DEFAULT \
1358 | TARGET_SUBTARGET_DEFAULT \
1359 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1361 #undef TARGET_HANDLE_OPTION
1362 #define TARGET_HANDLE_OPTION ix86_handle_option
1364 #undef TARGET_RTX_COSTS
1365 #define TARGET_RTX_COSTS ix86_rtx_costs
1366 #undef TARGET_ADDRESS_COST
1367 #define TARGET_ADDRESS_COST ix86_address_cost
1369 #undef TARGET_FIXED_CONDITION_CODE_REGS
1370 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1371 #undef TARGET_CC_MODES_COMPATIBLE
1372 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1374 #undef TARGET_MACHINE_DEPENDENT_REORG
1375 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1377 #undef TARGET_BUILD_BUILTIN_VA_LIST
1378 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1380 #undef TARGET_MD_ASM_CLOBBERS
1381 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1383 #undef TARGET_PROMOTE_PROTOTYPES
1384 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1385 #undef TARGET_STRUCT_VALUE_RTX
1386 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1387 #undef TARGET_SETUP_INCOMING_VARARGS
1388 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1389 #undef TARGET_MUST_PASS_IN_STACK
1390 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1391 #undef TARGET_PASS_BY_REFERENCE
1392 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1393 #undef TARGET_INTERNAL_ARG_POINTER
1394 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1395 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1396 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1398 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1399 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1401 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1402 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1405 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1408 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1409 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1412 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1413 #undef TARGET_INSERT_ATTRIBUTES
1414 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1417 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1418 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1420 #undef TARGET_STACK_PROTECT_FAIL
1421 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1423 #undef TARGET_FUNCTION_VALUE
1424 #define TARGET_FUNCTION_VALUE ix86_function_value
1426 struct gcc_target targetm = TARGET_INITIALIZER;
1429 /* The svr4 ABI for the i386 says that records and unions are returned
1431 #ifndef DEFAULT_PCC_STRUCT_RETURN
1432 #define DEFAULT_PCC_STRUCT_RETURN 1
1435 /* Implement TARGET_HANDLE_OPTION. */
1438 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1445 target_flags &= ~MASK_3DNOW_A;
1446 target_flags_explicit |= MASK_3DNOW_A;
1453 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1454 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1461 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1462 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1469 target_flags &= ~MASK_SSE3;
1470 target_flags_explicit |= MASK_SSE3;
1479 /* Sometimes certain combinations of command options do not make
1480 sense on a particular target machine. You can define a macro
1481 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1482 defined, is executed once just after all the command options have
1485 Don't use this macro to turn on various extra optimizations for
1486 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1489 override_options (void)
1492 int ix86_tune_defaulted = 0;
1494 /* Comes from final.c -- no real reason to change it. */
1495 #define MAX_CODE_ALIGN 16
1499 const struct processor_costs *cost; /* Processor costs */
1500 const int target_enable; /* Target flags to enable. */
1501 const int target_disable; /* Target flags to disable. */
1502 const int align_loop; /* Default alignments. */
1503 const int align_loop_max_skip;
1504 const int align_jump;
1505 const int align_jump_max_skip;
1506 const int align_func;
1508 const processor_target_table[PROCESSOR_max] =
1510 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1511 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1512 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1513 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1514 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1515 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1516 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1517 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1518 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1520 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1521 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1524 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1527 const char *const name; /* processor name or nickname. */
1528 const enum processor_type processor;
1529 const enum pta_flags
1535 PTA_PREFETCH_SSE = 16,
1542 const processor_alias_table[] =
1544 {"i386", PROCESSOR_I386, 0},
1545 {"i486", PROCESSOR_I486, 0},
1546 {"i586", PROCESSOR_PENTIUM, 0},
1547 {"pentium", PROCESSOR_PENTIUM, 0},
1548 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1549 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1550 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1551 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1552 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1553 {"i686", PROCESSOR_PENTIUMPRO, 0},
1554 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1555 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1556 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1557 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1558 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1559 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1560 | PTA_MMX | PTA_PREFETCH_SSE},
1561 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1562 | PTA_MMX | PTA_PREFETCH_SSE},
1563 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1564 | PTA_MMX | PTA_PREFETCH_SSE},
1565 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1566 | PTA_MMX | PTA_PREFETCH_SSE},
1567 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1569 {"k6", PROCESSOR_K6, PTA_MMX},
1570 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1571 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1572 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1574 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1575 | PTA_3DNOW | PTA_3DNOW_A},
1576 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1577 | PTA_3DNOW_A | PTA_SSE},
1578 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1579 | PTA_3DNOW_A | PTA_SSE},
1580 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1581 | PTA_3DNOW_A | PTA_SSE},
1582 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1583 | PTA_SSE | PTA_SSE2 },
1584 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1585 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1586 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1587 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1588 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1589 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1590 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1591 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1592 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1593 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1596 int const pta_size = ARRAY_SIZE (processor_alias_table);
1598 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1599 SUBTARGET_OVERRIDE_OPTIONS;
1602 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1603 SUBSUBTARGET_OVERRIDE_OPTIONS;
1606 /* -fPIC is the default for x86_64. */
1607 if (TARGET_MACHO && TARGET_64BIT)
1610 /* Set the default values for switches whose default depends on TARGET_64BIT
1611 in case they weren't overwritten by command line options. */
1614 /* Mach-O doesn't support omitting the frame pointer for now. */
1615 if (flag_omit_frame_pointer == 2)
1616 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1617 if (flag_asynchronous_unwind_tables == 2)
1618 flag_asynchronous_unwind_tables = 1;
1619 if (flag_pcc_struct_return == 2)
1620 flag_pcc_struct_return = 0;
1624 if (flag_omit_frame_pointer == 2)
1625 flag_omit_frame_pointer = 0;
1626 if (flag_asynchronous_unwind_tables == 2)
1627 flag_asynchronous_unwind_tables = 0;
1628 if (flag_pcc_struct_return == 2)
1629 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1632 /* Need to check -mtune=generic first. */
1633 if (ix86_tune_string)
1635 if (!strcmp (ix86_tune_string, "generic")
1636 || !strcmp (ix86_tune_string, "i686")
1637 /* As special support for cross compilers we read -mtune=native
1638 as -mtune=generic. With native compilers we won't see the
1639 -mtune=native, as it was changed by the driver. */
1640 || !strcmp (ix86_tune_string, "native"))
1643 ix86_tune_string = "generic64";
1645 ix86_tune_string = "generic32";
1647 else if (!strncmp (ix86_tune_string, "generic", 7))
1648 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1652 if (ix86_arch_string)
1653 ix86_tune_string = ix86_arch_string;
1654 if (!ix86_tune_string)
1656 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1657 ix86_tune_defaulted = 1;
1660 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1661 need to use a sensible tune option. */
1662 if (!strcmp (ix86_tune_string, "generic")
1663 || !strcmp (ix86_tune_string, "x86-64")
1664 || !strcmp (ix86_tune_string, "i686"))
1667 ix86_tune_string = "generic64";
1669 ix86_tune_string = "generic32";
1672 if (!strcmp (ix86_tune_string, "x86-64"))
1673 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1674 "-mtune=generic instead as appropriate.");
1676 if (!ix86_arch_string)
1677 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1678 if (!strcmp (ix86_arch_string, "generic"))
1679 error ("generic CPU can be used only for -mtune= switch");
1680 if (!strncmp (ix86_arch_string, "generic", 7))
1681 error ("bad value (%s) for -march= switch", ix86_arch_string);
1683 if (ix86_cmodel_string != 0)
1685 if (!strcmp (ix86_cmodel_string, "small"))
1686 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1687 else if (!strcmp (ix86_cmodel_string, "medium"))
1688 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1690 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1691 else if (!strcmp (ix86_cmodel_string, "32"))
1692 ix86_cmodel = CM_32;
1693 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1694 ix86_cmodel = CM_KERNEL;
1695 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1696 ix86_cmodel = CM_LARGE;
1698 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1702 ix86_cmodel = CM_32;
1704 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1706 if (ix86_asm_string != 0)
1709 && !strcmp (ix86_asm_string, "intel"))
1710 ix86_asm_dialect = ASM_INTEL;
1711 else if (!strcmp (ix86_asm_string, "att"))
1712 ix86_asm_dialect = ASM_ATT;
1714 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1716 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1717 error ("code model %qs not supported in the %s bit mode",
1718 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1719 if (ix86_cmodel == CM_LARGE)
1720 sorry ("code model %<large%> not supported yet");
1721 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1722 sorry ("%i-bit mode not compiled in",
1723 (target_flags & MASK_64BIT) ? 64 : 32);
1725 for (i = 0; i < pta_size; i++)
1726 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1728 ix86_arch = processor_alias_table[i].processor;
1729 /* Default cpu tuning to the architecture. */
1730 ix86_tune = ix86_arch;
1731 if (processor_alias_table[i].flags & PTA_MMX
1732 && !(target_flags_explicit & MASK_MMX))
1733 target_flags |= MASK_MMX;
1734 if (processor_alias_table[i].flags & PTA_3DNOW
1735 && !(target_flags_explicit & MASK_3DNOW))
1736 target_flags |= MASK_3DNOW;
1737 if (processor_alias_table[i].flags & PTA_3DNOW_A
1738 && !(target_flags_explicit & MASK_3DNOW_A))
1739 target_flags |= MASK_3DNOW_A;
1740 if (processor_alias_table[i].flags & PTA_SSE
1741 && !(target_flags_explicit & MASK_SSE))
1742 target_flags |= MASK_SSE;
1743 if (processor_alias_table[i].flags & PTA_SSE2
1744 && !(target_flags_explicit & MASK_SSE2))
1745 target_flags |= MASK_SSE2;
1746 if (processor_alias_table[i].flags & PTA_SSE3
1747 && !(target_flags_explicit & MASK_SSE3))
1748 target_flags |= MASK_SSE3;
1749 if (processor_alias_table[i].flags & PTA_SSSE3
1750 && !(target_flags_explicit & MASK_SSSE3))
1751 target_flags |= MASK_SSSE3;
1752 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1753 x86_prefetch_sse = true;
1754 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1755 error ("CPU you selected does not support x86-64 "
1761 error ("bad value (%s) for -march= switch", ix86_arch_string);
1763 for (i = 0; i < pta_size; i++)
1764 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1766 ix86_tune = processor_alias_table[i].processor;
1767 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1769 if (ix86_tune_defaulted)
1771 ix86_tune_string = "x86-64";
1772 for (i = 0; i < pta_size; i++)
1773 if (! strcmp (ix86_tune_string,
1774 processor_alias_table[i].name))
1776 ix86_tune = processor_alias_table[i].processor;
1779 error ("CPU you selected does not support x86-64 "
1782 /* Intel CPUs have always interpreted SSE prefetch instructions as
1783 NOPs; so, we can enable SSE prefetch instructions even when
1784 -mtune (rather than -march) points us to a processor that has them.
1785 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1786 higher processors. */
1787 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1788 x86_prefetch_sse = true;
1792 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1795 ix86_cost = &size_cost;
1797 ix86_cost = processor_target_table[ix86_tune].cost;
1798 target_flags |= processor_target_table[ix86_tune].target_enable;
1799 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1801 /* Arrange to set up i386_stack_locals for all functions. */
1802 init_machine_status = ix86_init_machine_status;
1804 /* Validate -mregparm= value. */
1805 if (ix86_regparm_string)
1807 i = atoi (ix86_regparm_string);
1808 if (i < 0 || i > REGPARM_MAX)
1809 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1815 ix86_regparm = REGPARM_MAX;
1817 /* If the user has provided any of the -malign-* options,
1818 warn and use that value only if -falign-* is not set.
1819 Remove this code in GCC 3.2 or later. */
1820 if (ix86_align_loops_string)
1822 warning (0, "-malign-loops is obsolete, use -falign-loops");
1823 if (align_loops == 0)
1825 i = atoi (ix86_align_loops_string);
1826 if (i < 0 || i > MAX_CODE_ALIGN)
1827 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1829 align_loops = 1 << i;
1833 if (ix86_align_jumps_string)
1835 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1836 if (align_jumps == 0)
1838 i = atoi (ix86_align_jumps_string);
1839 if (i < 0 || i > MAX_CODE_ALIGN)
1840 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1842 align_jumps = 1 << i;
1846 if (ix86_align_funcs_string)
1848 warning (0, "-malign-functions is obsolete, use -falign-functions");
1849 if (align_functions == 0)
1851 i = atoi (ix86_align_funcs_string);
1852 if (i < 0 || i > MAX_CODE_ALIGN)
1853 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1855 align_functions = 1 << i;
1859 /* Default align_* from the processor table. */
1860 if (align_loops == 0)
1862 align_loops = processor_target_table[ix86_tune].align_loop;
1863 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1865 if (align_jumps == 0)
1867 align_jumps = processor_target_table[ix86_tune].align_jump;
1868 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1870 if (align_functions == 0)
1872 align_functions = processor_target_table[ix86_tune].align_func;
1875 /* Validate -mbranch-cost= value, or provide default. */
1876 ix86_branch_cost = ix86_cost->branch_cost;
1877 if (ix86_branch_cost_string)
1879 i = atoi (ix86_branch_cost_string);
1881 error ("-mbranch-cost=%d is not between 0 and 5", i);
1883 ix86_branch_cost = i;
1885 if (ix86_section_threshold_string)
1887 i = atoi (ix86_section_threshold_string);
1889 error ("-mlarge-data-threshold=%d is negative", i);
1891 ix86_section_threshold = i;
1894 if (ix86_tls_dialect_string)
1896 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1897 ix86_tls_dialect = TLS_DIALECT_GNU;
1898 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1899 ix86_tls_dialect = TLS_DIALECT_GNU2;
1900 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1901 ix86_tls_dialect = TLS_DIALECT_SUN;
1903 error ("bad value (%s) for -mtls-dialect= switch",
1904 ix86_tls_dialect_string);
1907 /* Keep nonleaf frame pointers. */
1908 if (flag_omit_frame_pointer)
1909 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1910 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1911 flag_omit_frame_pointer = 1;
1913 /* If we're doing fast math, we don't care about comparison order
1914 wrt NaNs. This lets us use a shorter comparison sequence. */
1915 if (flag_finite_math_only)
1916 target_flags &= ~MASK_IEEE_FP;
1918 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1919 since the insns won't need emulation. */
1920 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1921 target_flags &= ~MASK_NO_FANCY_MATH_387;
1923 /* Likewise, if the target doesn't have a 387, or we've specified
1924 software floating point, don't use 387 inline intrinsics. */
1926 target_flags |= MASK_NO_FANCY_MATH_387;
1928 /* Turn on SSE3 builtins for -mssse3. */
1930 target_flags |= MASK_SSE3;
1932 /* Turn on SSE2 builtins for -msse3. */
1934 target_flags |= MASK_SSE2;
1936 /* Turn on SSE builtins for -msse2. */
1938 target_flags |= MASK_SSE;
1940 /* Turn on MMX builtins for -msse. */
1943 target_flags |= MASK_MMX & ~target_flags_explicit;
1944 x86_prefetch_sse = true;
1947 /* Turn on MMX builtins for 3Dnow. */
1949 target_flags |= MASK_MMX;
1953 if (TARGET_ALIGN_DOUBLE)
1954 error ("-malign-double makes no sense in the 64bit mode");
1956 error ("-mrtd calling convention not supported in the 64bit mode");
1958 /* Enable by default the SSE and MMX builtins. Do allow the user to
1959 explicitly disable any of these. In particular, disabling SSE and
1960 MMX for kernel code is extremely useful. */
1962 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1963 & ~target_flags_explicit);
1967 /* i386 ABI does not specify red zone. It still makes sense to use it
1968 when programmer takes care to stack from being destroyed. */
1969 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1970 target_flags |= MASK_NO_RED_ZONE;
1973 /* Validate -mpreferred-stack-boundary= value, or provide default.
1974 The default of 128 bits is for Pentium III's SSE __m128. We can't
1975 change it because of optimize_size. Otherwise, we can't mix object
1976 files compiled with -Os and -On. */
1977 ix86_preferred_stack_boundary = 128;
1978 if (ix86_preferred_stack_boundary_string)
1980 i = atoi (ix86_preferred_stack_boundary_string);
1981 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1982 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1983 TARGET_64BIT ? 4 : 2);
1985 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1988 /* Accept -msseregparm only if at least SSE support is enabled. */
1989 if (TARGET_SSEREGPARM
1991 error ("-msseregparm used without SSE enabled");
1993 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1995 if (ix86_fpmath_string != 0)
1997 if (! strcmp (ix86_fpmath_string, "387"))
1998 ix86_fpmath = FPMATH_387;
1999 else if (! strcmp (ix86_fpmath_string, "sse"))
2003 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2004 ix86_fpmath = FPMATH_387;
2007 ix86_fpmath = FPMATH_SSE;
2009 else if (! strcmp (ix86_fpmath_string, "387,sse")
2010 || ! strcmp (ix86_fpmath_string, "sse,387"))
2014 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2015 ix86_fpmath = FPMATH_387;
2017 else if (!TARGET_80387)
2019 warning (0, "387 instruction set disabled, using SSE arithmetics");
2020 ix86_fpmath = FPMATH_SSE;
2023 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2026 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2029 /* If the i387 is disabled, then do not return values in it. */
2031 target_flags &= ~MASK_FLOAT_RETURNS;
2033 if ((x86_accumulate_outgoing_args & TUNEMASK)
2034 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2036 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2038 /* ??? Unwind info is not correct around the CFG unless either a frame
2039 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2040 unwind info generation to be aware of the CFG and propagating states
2042 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2043 || flag_exceptions || flag_non_call_exceptions)
2044 && flag_omit_frame_pointer
2045 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2047 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2048 warning (0, "unwind tables currently require either a frame pointer "
2049 "or -maccumulate-outgoing-args for correctness");
2050 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2053 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2056 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2057 p = strchr (internal_label_prefix, 'X');
2058 internal_label_prefix_len = p - internal_label_prefix;
2062 /* When scheduling description is not available, disable scheduler pass
2063 so it won't slow down the compilation and make x87 code slower. */
2064 if (!TARGET_SCHEDULE)
2065 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2068 /* switch to the appropriate section for output of DECL.
2069 DECL is either a `VAR_DECL' node or a constant of some sort.
2070 RELOC indicates whether forming the initial value of DECL requires
2071 link-time relocations. */
2074 x86_64_elf_select_section (tree decl, int reloc,
2075 unsigned HOST_WIDE_INT align)
2077 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2078 && ix86_in_large_data_p (decl))
2080 const char *sname = NULL;
2081 unsigned int flags = SECTION_WRITE;
2082 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2087 case SECCAT_DATA_REL:
2088 sname = ".ldata.rel";
2090 case SECCAT_DATA_REL_LOCAL:
2091 sname = ".ldata.rel.local";
2093 case SECCAT_DATA_REL_RO:
2094 sname = ".ldata.rel.ro";
2096 case SECCAT_DATA_REL_RO_LOCAL:
2097 sname = ".ldata.rel.ro.local";
2101 flags |= SECTION_BSS;
2104 case SECCAT_RODATA_MERGE_STR:
2105 case SECCAT_RODATA_MERGE_STR_INIT:
2106 case SECCAT_RODATA_MERGE_CONST:
2110 case SECCAT_SRODATA:
2117 /* We don't split these for medium model. Place them into
2118 default sections and hope for best. */
2123 /* We might get called with string constants, but get_named_section
2124 doesn't like them as they are not DECLs. Also, we need to set
2125 flags in that case. */
2127 return get_section (sname, flags, NULL);
2128 return get_named_section (decl, sname, reloc);
2131 return default_elf_select_section (decl, reloc, align);
2134 /* Build up a unique section name, expressed as a
2135 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2136 RELOC indicates whether the initial value of EXP requires
2137 link-time relocations. */
2140 x86_64_elf_unique_section (tree decl, int reloc)
2142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2143 && ix86_in_large_data_p (decl))
2145 const char *prefix = NULL;
2146 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2147 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2149 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2152 case SECCAT_DATA_REL:
2153 case SECCAT_DATA_REL_LOCAL:
2154 case SECCAT_DATA_REL_RO:
2155 case SECCAT_DATA_REL_RO_LOCAL:
2156 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2159 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2162 case SECCAT_RODATA_MERGE_STR:
2163 case SECCAT_RODATA_MERGE_STR_INIT:
2164 case SECCAT_RODATA_MERGE_CONST:
2165 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2167 case SECCAT_SRODATA:
2174 /* We don't split these for medium model. Place them into
2175 default sections and hope for best. */
2183 plen = strlen (prefix);
2185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2186 name = targetm.strip_name_encoding (name);
2187 nlen = strlen (name);
2189 string = alloca (nlen + plen + 1);
2190 memcpy (string, prefix, plen);
2191 memcpy (string + plen, name, nlen + 1);
2193 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2197 default_unique_section (decl, reloc);
2200 #ifdef COMMON_ASM_OP
2201 /* This says how to output assembler code to declare an
2202 uninitialized external linkage data object.
2204 For medium model x86-64 we need to use .largecomm opcode for
2207 x86_elf_aligned_common (FILE *file,
2208 const char *name, unsigned HOST_WIDE_INT size,
2211 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2212 && size > (unsigned int)ix86_section_threshold)
2213 fprintf (file, ".largecomm\t");
2215 fprintf (file, "%s", COMMON_ASM_OP);
2216 assemble_name (file, name);
2217 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2218 size, align / BITS_PER_UNIT);
2221 /* Utility function for targets to use in implementing
2222 ASM_OUTPUT_ALIGNED_BSS. */
2225 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2226 const char *name, unsigned HOST_WIDE_INT size,
2229 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2230 && size > (unsigned int)ix86_section_threshold)
2231 switch_to_section (get_named_section (decl, ".lbss", 0));
2233 switch_to_section (bss_section);
2234 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2235 #ifdef ASM_DECLARE_OBJECT_NAME
2236 last_assemble_variable_decl = decl;
2237 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2239 /* Standard thing is just output label for the object. */
2240 ASM_OUTPUT_LABEL (file, name);
2241 #endif /* ASM_DECLARE_OBJECT_NAME */
2242 ASM_OUTPUT_SKIP (file, size ? size : 1);
2247 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2249 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2250 make the problem with not enough registers even worse. */
2251 #ifdef INSN_SCHEDULING
2253 flag_schedule_insns = 0;
2257 /* The Darwin libraries never set errno, so we might as well
2258 avoid calling them when that's the only reason we would. */
2259 flag_errno_math = 0;
2261 /* The default values of these switches depend on the TARGET_64BIT
2262 that is not known at this moment. Mark these values with 2 and
2263 let user the to override these. In case there is no command line option
2264 specifying them, we will set the defaults in override_options. */
2266 flag_omit_frame_pointer = 2;
2267 flag_pcc_struct_return = 2;
2268 flag_asynchronous_unwind_tables = 2;
2269 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2270 SUBTARGET_OPTIMIZATION_OPTIONS;
2274 /* Table of valid machine attributes. */
2275 const struct attribute_spec ix86_attribute_table[] =
2277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2278 /* Stdcall attribute says callee is responsible for popping arguments
2279 if they are not variable. */
2280 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2281 /* Fastcall attribute says callee is responsible for popping arguments
2282 if they are not variable. */
2283 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2284 /* Cdecl attribute says the callee is a normal C declaration */
2285 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2286 /* Regparm attribute specifies how many integer arguments are to be
2287 passed in registers. */
2288 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2289 /* Sseregparm attribute says we are using x86_64 calling conventions
2290 for FP arguments. */
2291 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2292 /* force_align_arg_pointer says this function realigns the stack at entry. */
2293 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2294 false, true, true, ix86_handle_cconv_attribute },
2295 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2296 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2297 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2298 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2300 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2301 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2302 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2303 SUBTARGET_ATTRIBUTE_TABLE,
2305 { NULL, 0, 0, false, false, false, NULL }
2308 /* Decide whether we can make a sibling call to a function. DECL is the
2309 declaration of the function being targeted by the call and EXP is the
2310 CALL_EXPR representing the call. */
2313 ix86_function_ok_for_sibcall (tree decl, tree exp)
2318 /* If we are generating position-independent code, we cannot sibcall
2319 optimize any indirect call, or a direct call to a global function,
2320 as the PLT requires %ebx be live. */
2321 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2328 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2329 if (POINTER_TYPE_P (func))
2330 func = TREE_TYPE (func);
2333 /* Check that the return value locations are the same. Like
2334 if we are returning floats on the 80387 register stack, we cannot
2335 make a sibcall from a function that doesn't return a float to a
2336 function that does or, conversely, from a function that does return
2337 a float to a function that doesn't; the necessary stack adjustment
2338 would not be executed. This is also the place we notice
2339 differences in the return value ABI. Note that it is ok for one
2340 of the functions to have void return type as long as the return
2341 value of the other is passed in a register. */
2342 a = ix86_function_value (TREE_TYPE (exp), func, false);
2343 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2345 if (STACK_REG_P (a) || STACK_REG_P (b))
2347 if (!rtx_equal_p (a, b))
2350 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2352 else if (!rtx_equal_p (a, b))
2355 /* If this call is indirect, we'll need to be able to use a call-clobbered
2356 register for the address of the target function. Make sure that all
2357 such registers are not used for passing parameters. */
2358 if (!decl && !TARGET_64BIT)
2362 /* We're looking at the CALL_EXPR, we need the type of the function. */
2363 type = TREE_OPERAND (exp, 0); /* pointer expression */
2364 type = TREE_TYPE (type); /* pointer type */
2365 type = TREE_TYPE (type); /* function type */
2367 if (ix86_function_regparm (type, NULL) >= 3)
2369 /* ??? Need to count the actual number of registers to be used,
2370 not the possible number of registers. Fix later. */
2375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2376 /* Dllimport'd functions are also called indirectly. */
2377 if (decl && DECL_DLLIMPORT_P (decl)
2378 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2382 /* If we forced aligned the stack, then sibcalling would unalign the
2383 stack, which may break the called function. */
2384 if (cfun->machine->force_align_arg_pointer)
2387 /* Otherwise okay. That also includes certain types of indirect calls. */
2391 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2392 calling convention attributes;
2393 arguments as in struct attribute_spec.handler. */
2396 ix86_handle_cconv_attribute (tree *node, tree name,
2398 int flags ATTRIBUTE_UNUSED,
2401 if (TREE_CODE (*node) != FUNCTION_TYPE
2402 && TREE_CODE (*node) != METHOD_TYPE
2403 && TREE_CODE (*node) != FIELD_DECL
2404 && TREE_CODE (*node) != TYPE_DECL)
2406 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2407 IDENTIFIER_POINTER (name));
2408 *no_add_attrs = true;
2412 /* Can combine regparm with all attributes but fastcall. */
2413 if (is_attribute_p ("regparm", name))
2417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2419 error ("fastcall and regparm attributes are not compatible");
2422 cst = TREE_VALUE (args);
2423 if (TREE_CODE (cst) != INTEGER_CST)
2425 warning (OPT_Wattributes,
2426 "%qs attribute requires an integer constant argument",
2427 IDENTIFIER_POINTER (name));
2428 *no_add_attrs = true;
2430 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2432 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2433 IDENTIFIER_POINTER (name), REGPARM_MAX);
2434 *no_add_attrs = true;
2438 && lookup_attribute (ix86_force_align_arg_pointer_string,
2439 TYPE_ATTRIBUTES (*node))
2440 && compare_tree_int (cst, REGPARM_MAX-1))
2442 error ("%s functions limited to %d register parameters",
2443 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2451 warning (OPT_Wattributes, "%qs attribute ignored",
2452 IDENTIFIER_POINTER (name));
2453 *no_add_attrs = true;
2457 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2458 if (is_attribute_p ("fastcall", name))
2460 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2462 error ("fastcall and cdecl attributes are not compatible");
2464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2466 error ("fastcall and stdcall attributes are not compatible");
2468 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2470 error ("fastcall and regparm attributes are not compatible");
2474 /* Can combine stdcall with fastcall (redundant), regparm and
2476 else if (is_attribute_p ("stdcall", name))
2478 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2480 error ("stdcall and cdecl attributes are not compatible");
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2484 error ("stdcall and fastcall attributes are not compatible");
2488 /* Can combine cdecl with regparm and sseregparm. */
2489 else if (is_attribute_p ("cdecl", name))
2491 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2493 error ("stdcall and cdecl attributes are not compatible");
2495 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2497 error ("fastcall and cdecl attributes are not compatible");
2501 /* Can combine sseregparm with all attributes. */
2506 /* Return 0 if the attributes for two types are incompatible, 1 if they
2507 are compatible, and 2 if they are nearly compatible (which causes a
2508 warning to be generated). */
2511 ix86_comp_type_attributes (tree type1, tree type2)
2513 /* Check for mismatch of non-default calling convention. */
2514 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2516 if (TREE_CODE (type1) != FUNCTION_TYPE)
2519 /* Check for mismatched fastcall/regparm types. */
2520 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2521 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2522 || (ix86_function_regparm (type1, NULL)
2523 != ix86_function_regparm (type2, NULL)))
2526 /* Check for mismatched sseregparm types. */
2527 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2528 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2531 /* Check for mismatched return types (cdecl vs stdcall). */
2532 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2533 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2539 /* Return the regparm value for a function with the indicated TYPE and DECL.
2540 DECL may be NULL when calling function indirectly
2541 or considering a libcall. */
2544 ix86_function_regparm (tree type, tree decl)
2547 int regparm = ix86_regparm;
2548 bool user_convention = false;
2552 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2555 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2556 user_convention = true;
2559 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2562 user_convention = true;
2565 /* Use register calling convention for local functions when possible. */
2566 if (!TARGET_64BIT && !user_convention && decl
2567 && flag_unit_at_a_time && !profile_flag)
2569 struct cgraph_local_info *i = cgraph_local_info (decl);
2572 int local_regparm, globals = 0, regno;
2574 /* Make sure no regparm register is taken by a global register
2576 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2577 if (global_regs[local_regparm])
2579 /* We can't use regparm(3) for nested functions as these use
2580 static chain pointer in third argument. */
2581 if (local_regparm == 3
2582 && decl_function_context (decl)
2583 && !DECL_NO_STATIC_CHAIN (decl))
2585 /* If the function realigns its stackpointer, the
2586 prologue will clobber %ecx. If we've already
2587 generated code for the callee, the callee
2588 DECL_STRUCT_FUNCTION is gone, so we fall back to
2589 scanning the attributes for the self-realigning
2591 if ((DECL_STRUCT_FUNCTION (decl)
2592 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2593 || (!DECL_STRUCT_FUNCTION (decl)
2594 && lookup_attribute (ix86_force_align_arg_pointer_string,
2595 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2597 /* Each global register variable increases register preassure,
2598 so the more global reg vars there are, the smaller regparm
2599 optimization use, unless requested by the user explicitly. */
2600 for (regno = 0; regno < 6; regno++)
2601 if (global_regs[regno])
2604 = globals < local_regparm ? local_regparm - globals : 0;
2606 if (local_regparm > regparm)
2607 regparm = local_regparm;
2614 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2615 in SSE registers for a function with the indicated TYPE and DECL.
2616 DECL may be NULL when calling function indirectly
2617 or considering a libcall. Otherwise return 0. */
2620 ix86_function_sseregparm (tree type, tree decl)
2622 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2623 by the sseregparm attribute. */
2624 if (TARGET_SSEREGPARM
2626 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2631 error ("Calling %qD with attribute sseregparm without "
2632 "SSE/SSE2 enabled", decl);
2634 error ("Calling %qT with attribute sseregparm without "
2635 "SSE/SSE2 enabled", type);
2642 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2643 in SSE registers even for 32-bit mode and not just 3, but up to
2644 8 SSE arguments in registers. */
2645 if (!TARGET_64BIT && decl
2646 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2648 struct cgraph_local_info *i = cgraph_local_info (decl);
2650 return TARGET_SSE2 ? 2 : 1;
2656 /* Return true if EAX is live at the start of the function. Used by
2657 ix86_expand_prologue to determine if we need special help before
2658 calling allocate_stack_worker. */
2661 ix86_eax_live_at_start_p (void)
2663 /* Cheat. Don't bother working forward from ix86_function_regparm
2664 to the function type to whether an actual argument is located in
2665 eax. Instead just look at cfg info, which is still close enough
2666 to correct at this point. This gives false positives for broken
2667 functions that might use uninitialized data that happens to be
2668 allocated in eax, but who cares? */
2669 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2672 /* Value is the number of bytes of arguments automatically
2673 popped when returning from a subroutine call.
2674 FUNDECL is the declaration node of the function (as a tree),
2675 FUNTYPE is the data type of the function (as a tree),
2676 or for a library call it is an identifier node for the subroutine name.
2677 SIZE is the number of bytes of arguments passed on the stack.
2679 On the 80386, the RTD insn may be used to pop them if the number
2680 of args is fixed, but if the number is variable then the caller
2681 must pop them all. RTD can't be used for library calls now
2682 because the library is compiled with the Unix compiler.
2683 Use of RTD is a selectable option, since it is incompatible with
2684 standard Unix calling sequences. If the option is not selected,
2685 the caller must always pop the args.
2687 The attribute stdcall is equivalent to RTD on a per module basis. */
2690 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2692 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2694 /* Cdecl functions override -mrtd, and never pop the stack. */
2695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2697 /* Stdcall and fastcall functions will pop the stack if not
2699 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2700 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2704 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2705 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2706 == void_type_node)))
2710 /* Lose any fake structure return argument if it is passed on the stack. */
2711 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2713 && !KEEP_AGGREGATE_RETURN_POINTER)
2715 int nregs = ix86_function_regparm (funtype, fundecl);
2718 return GET_MODE_SIZE (Pmode);
2724 /* Argument support functions. */
2726 /* Return true when register may be used to pass function parameters. */
2728 ix86_function_arg_regno_p (int regno)
2732 return (regno < REGPARM_MAX
2733 || (TARGET_MMX && MMX_REGNO_P (regno)
2734 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2735 || (TARGET_SSE && SSE_REGNO_P (regno)
2736 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2738 if (TARGET_SSE && SSE_REGNO_P (regno)
2739 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2741 /* RAX is used as hidden argument to va_arg functions. */
2744 for (i = 0; i < REGPARM_MAX; i++)
2745 if (regno == x86_64_int_parameter_registers[i])
2750 /* Return if we do not know how to pass TYPE solely in registers. */
2753 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2755 if (must_pass_in_stack_var_size_or_pad (mode, type))
2758 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2759 The layout_type routine is crafty and tries to trick us into passing
2760 currently unsupported vector types on the stack by using TImode. */
2761 return (!TARGET_64BIT && mode == TImode
2762 && type && TREE_CODE (type) != VECTOR_TYPE);
2765 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2766 for a call to a function whose data type is FNTYPE.
2767 For a library call, FNTYPE is 0. */
2770 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2771 tree fntype, /* tree ptr for function decl */
2772 rtx libname, /* SYMBOL_REF of library name or 0 */
2775 static CUMULATIVE_ARGS zero_cum;
2776 tree param, next_param;
2778 if (TARGET_DEBUG_ARG)
2780 fprintf (stderr, "\ninit_cumulative_args (");
2782 fprintf (stderr, "fntype code = %s, ret code = %s",
2783 tree_code_name[(int) TREE_CODE (fntype)],
2784 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2786 fprintf (stderr, "no fntype");
2789 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2794 /* Set up the number of registers to use for passing arguments. */
2795 cum->nregs = ix86_regparm;
2797 cum->sse_nregs = SSE_REGPARM_MAX;
2799 cum->mmx_nregs = MMX_REGPARM_MAX;
2800 cum->warn_sse = true;
2801 cum->warn_mmx = true;
2802 cum->maybe_vaarg = false;
2804 /* Use ecx and edx registers if function has fastcall attribute,
2805 else look for regparm information. */
2806 if (fntype && !TARGET_64BIT)
2808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2814 cum->nregs = ix86_function_regparm (fntype, fndecl);
2817 /* Set up the number of SSE registers used for passing SFmode
2818 and DFmode arguments. Warn for mismatching ABI. */
2819 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2821 /* Determine if this function has variable arguments. This is
2822 indicated by the last argument being 'void_type_mode' if there
2823 are no variable arguments. If there are variable arguments, then
2824 we won't pass anything in registers in 32-bit mode. */
2826 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2828 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2829 param != 0; param = next_param)
2831 next_param = TREE_CHAIN (param);
2832 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2842 cum->float_in_sse = 0;
2844 cum->maybe_vaarg = true;
2848 if ((!fntype && !libname)
2849 || (fntype && !TYPE_ARG_TYPES (fntype)))
2850 cum->maybe_vaarg = true;
2852 if (TARGET_DEBUG_ARG)
2853 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2858 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2859 But in the case of vector types, it is some vector mode.
2861 When we have only some of our vector isa extensions enabled, then there
2862 are some modes for which vector_mode_supported_p is false. For these
2863 modes, the generic vector support in gcc will choose some non-vector mode
2864 in order to implement the type. By computing the natural mode, we'll
2865 select the proper ABI location for the operand and not depend on whatever
2866 the middle-end decides to do with these vector types. */
2868 static enum machine_mode
2869 type_natural_mode (tree type)
2871 enum machine_mode mode = TYPE_MODE (type);
2873 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2875 HOST_WIDE_INT size = int_size_in_bytes (type);
2876 if ((size == 8 || size == 16)
2877 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2878 && TYPE_VECTOR_SUBPARTS (type) > 1)
2880 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2882 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2883 mode = MIN_MODE_VECTOR_FLOAT;
2885 mode = MIN_MODE_VECTOR_INT;
2887 /* Get the mode which has this inner mode and number of units. */
2888 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2889 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2890 && GET_MODE_INNER (mode) == innermode)
2900 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2901 this may not agree with the mode that the type system has chosen for the
2902 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2903 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2906 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2911 if (orig_mode != BLKmode)
2912 tmp = gen_rtx_REG (orig_mode, regno);
2915 tmp = gen_rtx_REG (mode, regno);
2916 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2917 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2923 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2924 of this code is to classify each 8bytes of incoming argument by the register
2925 class and assign registers accordingly. */
2927 /* Return the union class of CLASS1 and CLASS2.
2928 See the x86-64 PS ABI for details. */
2930 static enum x86_64_reg_class
2931 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2933 /* Rule #1: If both classes are equal, this is the resulting class. */
2934 if (class1 == class2)
2937 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2939 if (class1 == X86_64_NO_CLASS)
2941 if (class2 == X86_64_NO_CLASS)
2944 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2945 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2946 return X86_64_MEMORY_CLASS;
2948 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2949 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2950 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2951 return X86_64_INTEGERSI_CLASS;
2952 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2953 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2954 return X86_64_INTEGER_CLASS;
2956 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2958 if (class1 == X86_64_X87_CLASS
2959 || class1 == X86_64_X87UP_CLASS
2960 || class1 == X86_64_COMPLEX_X87_CLASS
2961 || class2 == X86_64_X87_CLASS
2962 || class2 == X86_64_X87UP_CLASS
2963 || class2 == X86_64_COMPLEX_X87_CLASS)
2964 return X86_64_MEMORY_CLASS;
2966 /* Rule #6: Otherwise class SSE is used. */
2967 return X86_64_SSE_CLASS;
2970 /* Classify the argument of type TYPE and mode MODE.
2971 CLASSES will be filled by the register class used to pass each word
2972 of the operand. The number of words is returned. In case the parameter
2973 should be passed in memory, 0 is returned. As a special case for zero
2974 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2976 BIT_OFFSET is used internally for handling records and specifies offset
2977 of the offset in bits modulo 256 to avoid overflow cases.
2979 See the x86-64 PS ABI for details.
2983 classify_argument (enum machine_mode mode, tree type,
2984 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2986 HOST_WIDE_INT bytes =
2987 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2988 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2990 /* Variable sized entities are always passed/returned in memory. */
2994 if (mode != VOIDmode
2995 && targetm.calls.must_pass_in_stack (mode, type))
2998 if (type && AGGREGATE_TYPE_P (type))
3002 enum x86_64_reg_class subclasses[MAX_CLASSES];
3004 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3008 for (i = 0; i < words; i++)
3009 classes[i] = X86_64_NO_CLASS;
3011 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3012 signalize memory class, so handle it as special case. */
3015 classes[0] = X86_64_NO_CLASS;
3019 /* Classify each field of record and merge classes. */
3020 switch (TREE_CODE (type))
3023 /* And now merge the fields of structure. */
3024 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3026 if (TREE_CODE (field) == FIELD_DECL)
3030 if (TREE_TYPE (field) == error_mark_node)
3033 /* Bitfields are always classified as integer. Handle them
3034 early, since later code would consider them to be
3035 misaligned integers. */
3036 if (DECL_BIT_FIELD (field))
3038 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3039 i < ((int_bit_position (field) + (bit_offset % 64))
3040 + tree_low_cst (DECL_SIZE (field), 0)
3043 merge_classes (X86_64_INTEGER_CLASS,
3048 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3049 TREE_TYPE (field), subclasses,
3050 (int_bit_position (field)
3051 + bit_offset) % 256);
3054 for (i = 0; i < num; i++)
3057 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3059 merge_classes (subclasses[i], classes[i + pos]);
3067 /* Arrays are handled as small records. */
3070 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3071 TREE_TYPE (type), subclasses, bit_offset);
3075 /* The partial classes are now full classes. */
3076 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3077 subclasses[0] = X86_64_SSE_CLASS;
3078 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3079 subclasses[0] = X86_64_INTEGER_CLASS;
3081 for (i = 0; i < words; i++)
3082 classes[i] = subclasses[i % num];
3087 case QUAL_UNION_TYPE:
3088 /* Unions are similar to RECORD_TYPE but offset is always 0.
3090 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3092 if (TREE_CODE (field) == FIELD_DECL)
3096 if (TREE_TYPE (field) == error_mark_node)
3099 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3100 TREE_TYPE (field), subclasses,
3104 for (i = 0; i < num; i++)
3105 classes[i] = merge_classes (subclasses[i], classes[i]);
3114 /* Final merger cleanup. */
3115 for (i = 0; i < words; i++)
3117 /* If one class is MEMORY, everything should be passed in
3119 if (classes[i] == X86_64_MEMORY_CLASS)
3122 /* The X86_64_SSEUP_CLASS should be always preceded by
3123 X86_64_SSE_CLASS. */
3124 if (classes[i] == X86_64_SSEUP_CLASS
3125 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3126 classes[i] = X86_64_SSE_CLASS;
3128 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3129 if (classes[i] == X86_64_X87UP_CLASS
3130 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3131 classes[i] = X86_64_SSE_CLASS;
3136 /* Compute alignment needed. We align all types to natural boundaries with
3137 exception of XFmode that is aligned to 64bits. */
3138 if (mode != VOIDmode && mode != BLKmode)
3140 int mode_alignment = GET_MODE_BITSIZE (mode);
3143 mode_alignment = 128;
3144 else if (mode == XCmode)
3145 mode_alignment = 256;
3146 if (COMPLEX_MODE_P (mode))
3147 mode_alignment /= 2;
3148 /* Misaligned fields are always returned in memory. */
3149 if (bit_offset % mode_alignment)
3153 /* for V1xx modes, just use the base mode */
3154 if (VECTOR_MODE_P (mode)
3155 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3156 mode = GET_MODE_INNER (mode);
3158 /* Classification of atomic types. */
3163 classes[0] = X86_64_SSE_CLASS;
3166 classes[0] = X86_64_SSE_CLASS;
3167 classes[1] = X86_64_SSEUP_CLASS;
3176 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3177 classes[0] = X86_64_INTEGERSI_CLASS;
3179 classes[0] = X86_64_INTEGER_CLASS;
3183 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3188 if (!(bit_offset % 64))
3189 classes[0] = X86_64_SSESF_CLASS;
3191 classes[0] = X86_64_SSE_CLASS;
3194 classes[0] = X86_64_SSEDF_CLASS;
3197 classes[0] = X86_64_X87_CLASS;
3198 classes[1] = X86_64_X87UP_CLASS;
3201 classes[0] = X86_64_SSE_CLASS;
3202 classes[1] = X86_64_SSEUP_CLASS;
3205 classes[0] = X86_64_SSE_CLASS;
3208 classes[0] = X86_64_SSEDF_CLASS;
3209 classes[1] = X86_64_SSEDF_CLASS;
3212 classes[0] = X86_64_COMPLEX_X87_CLASS;
3215 /* This modes is larger than 16 bytes. */
3223 classes[0] = X86_64_SSE_CLASS;
3224 classes[1] = X86_64_SSEUP_CLASS;
3230 classes[0] = X86_64_SSE_CLASS;
3236 gcc_assert (VECTOR_MODE_P (mode));
3241 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3243 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3244 classes[0] = X86_64_INTEGERSI_CLASS;
3246 classes[0] = X86_64_INTEGER_CLASS;
3247 classes[1] = X86_64_INTEGER_CLASS;
3248 return 1 + (bytes > 8);
3252 /* Examine the argument and return set number of register required in each
3253 class. Return 0 iff parameter should be passed in memory. */
3255 examine_argument (enum machine_mode mode, tree type, int in_return,
3256 int *int_nregs, int *sse_nregs)
3258 enum x86_64_reg_class class[MAX_CLASSES];
3259 int n = classify_argument (mode, type, class, 0);
3265 for (n--; n >= 0; n--)
3268 case X86_64_INTEGER_CLASS:
3269 case X86_64_INTEGERSI_CLASS:
3272 case X86_64_SSE_CLASS:
3273 case X86_64_SSESF_CLASS:
3274 case X86_64_SSEDF_CLASS:
3277 case X86_64_NO_CLASS:
3278 case X86_64_SSEUP_CLASS:
3280 case X86_64_X87_CLASS:
3281 case X86_64_X87UP_CLASS:
3285 case X86_64_COMPLEX_X87_CLASS:
3286 return in_return ? 2 : 0;
3287 case X86_64_MEMORY_CLASS:
3293 /* Construct container for the argument used by GCC interface. See
3294 FUNCTION_ARG for the detailed description. */
3297 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3298 tree type, int in_return, int nintregs, int nsseregs,
3299 const int *intreg, int sse_regno)
3301 /* The following variables hold the static issued_error state. */
3302 static bool issued_sse_arg_error;
3303 static bool issued_sse_ret_error;
3304 static bool issued_x87_ret_error;
3306 enum machine_mode tmpmode;
3308 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3309 enum x86_64_reg_class class[MAX_CLASSES];
3313 int needed_sseregs, needed_intregs;
3314 rtx exp[MAX_CLASSES];
3317 n = classify_argument (mode, type, class, 0);
3318 if (TARGET_DEBUG_ARG)
3321 fprintf (stderr, "Memory class\n");
3324 fprintf (stderr, "Classes:");
3325 for (i = 0; i < n; i++)
3327 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3329 fprintf (stderr, "\n");
3334 if (!examine_argument (mode, type, in_return, &needed_intregs,
3337 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3340 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3341 some less clueful developer tries to use floating-point anyway. */
3342 if (needed_sseregs && !TARGET_SSE)
3346 if (!issued_sse_ret_error)
3348 error ("SSE register return with SSE disabled");
3349 issued_sse_ret_error = true;
3352 else if (!issued_sse_arg_error)
3354 error ("SSE register argument with SSE disabled");
3355 issued_sse_arg_error = true;
3360 /* Likewise, error if the ABI requires us to return values in the
3361 x87 registers and the user specified -mno-80387. */
3362 if (!TARGET_80387 && in_return)
3363 for (i = 0; i < n; i++)
3364 if (class[i] == X86_64_X87_CLASS
3365 || class[i] == X86_64_X87UP_CLASS
3366 || class[i] == X86_64_COMPLEX_X87_CLASS)
3368 if (!issued_x87_ret_error)
3370 error ("x87 register return with x87 disabled");
3371 issued_x87_ret_error = true;
3376 /* First construct simple cases. Avoid SCmode, since we want to use
3377 single register to pass this type. */
3378 if (n == 1 && mode != SCmode)
3381 case X86_64_INTEGER_CLASS:
3382 case X86_64_INTEGERSI_CLASS:
3383 return gen_rtx_REG (mode, intreg[0]);
3384 case X86_64_SSE_CLASS:
3385 case X86_64_SSESF_CLASS:
3386 case X86_64_SSEDF_CLASS:
3387 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3388 case X86_64_X87_CLASS:
3389 case X86_64_COMPLEX_X87_CLASS:
3390 return gen_rtx_REG (mode, FIRST_STACK_REG);
3391 case X86_64_NO_CLASS:
3392 /* Zero sized array, struct or class. */
3397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3402 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3404 && class[1] == X86_64_INTEGER_CLASS
3405 && (mode == CDImode || mode == TImode || mode == TFmode)
3406 && intreg[0] + 1 == intreg[1])
3407 return gen_rtx_REG (mode, intreg[0]);
3409 /* Otherwise figure out the entries of the PARALLEL. */
3410 for (i = 0; i < n; i++)
3414 case X86_64_NO_CLASS:
3416 case X86_64_INTEGER_CLASS:
3417 case X86_64_INTEGERSI_CLASS:
3418 /* Merge TImodes on aligned occasions here too. */
3419 if (i * 8 + 8 > bytes)
3420 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3421 else if (class[i] == X86_64_INTEGERSI_CLASS)
3425 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3426 if (tmpmode == BLKmode)
3428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3429 gen_rtx_REG (tmpmode, *intreg),
3433 case X86_64_SSESF_CLASS:
3434 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3435 gen_rtx_REG (SFmode,
3436 SSE_REGNO (sse_regno)),
3440 case X86_64_SSEDF_CLASS:
3441 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3442 gen_rtx_REG (DFmode,
3443 SSE_REGNO (sse_regno)),
3447 case X86_64_SSE_CLASS:
3448 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3452 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3453 gen_rtx_REG (tmpmode,
3454 SSE_REGNO (sse_regno)),
3456 if (tmpmode == TImode)
3465 /* Empty aligned struct, union or class. */
3469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3470 for (i = 0; i < nexps; i++)
3471 XVECEXP (ret, 0, i) = exp [i];
3475 /* Update the data in CUM to advance over an argument
3476 of mode MODE and data type TYPE.
3477 (TYPE is null for libcalls where that information may not be available.) */
3480 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3481 tree type, int named)
3484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3485 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3488 mode = type_natural_mode (type);
3490 if (TARGET_DEBUG_ARG)
3491 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3492 "mode=%s, named=%d)\n\n",
3493 words, cum->words, cum->nregs, cum->sse_nregs,
3494 GET_MODE_NAME (mode), named);
3498 int int_nregs, sse_nregs;
3499 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3500 cum->words += words;
3501 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3503 cum->nregs -= int_nregs;
3504 cum->sse_nregs -= sse_nregs;
3505 cum->regno += int_nregs;
3506 cum->sse_regno += sse_nregs;
3509 cum->words += words;
3527 cum->words += words;
3528 cum->nregs -= words;
3529 cum->regno += words;
3531 if (cum->nregs <= 0)
3539 if (cum->float_in_sse < 2)
3542 if (cum->float_in_sse < 1)
3553 if (!type || !AGGREGATE_TYPE_P (type))
3555 cum->sse_words += words;
3556 cum->sse_nregs -= 1;
3557 cum->sse_regno += 1;
3558 if (cum->sse_nregs <= 0)
3570 if (!type || !AGGREGATE_TYPE_P (type))
3572 cum->mmx_words += words;
3573 cum->mmx_nregs -= 1;
3574 cum->mmx_regno += 1;
3575 if (cum->mmx_nregs <= 0)
3586 /* Define where to put the arguments to a function.
3587 Value is zero to push the argument on the stack,
3588 or a hard register in which to store the argument.
3590 MODE is the argument's machine mode.
3591 TYPE is the data type of the argument (as a tree).
3592 This is null for libcalls where that information may
3594 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3595 the preceding args and about the function being called.
3596 NAMED is nonzero if this argument is a named parameter
3597 (otherwise it is an extra parameter matching an ellipsis). */
3600 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3601 tree type, int named)
3603 enum machine_mode mode = orig_mode;
3606 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3607 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3608 static bool warnedsse, warnedmmx;
3610 /* To simplify the code below, represent vector types with a vector mode
3611 even if MMX/SSE are not active. */
3612 if (type && TREE_CODE (type) == VECTOR_TYPE)
3613 mode = type_natural_mode (type);
3615 /* Handle a hidden AL argument containing number of registers for varargs
3616 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3618 if (mode == VOIDmode)
3621 return GEN_INT (cum->maybe_vaarg
3622 ? (cum->sse_nregs < 0
3630 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3632 &x86_64_int_parameter_registers [cum->regno],
3637 /* For now, pass fp/complex values on the stack. */
3649 if (words <= cum->nregs)
3651 int regno = cum->regno;
3653 /* Fastcall allocates the first two DWORD (SImode) or
3654 smaller arguments to ECX and EDX. */
3657 if (mode == BLKmode || mode == DImode)
3660 /* ECX not EAX is the first allocated register. */
3664 ret = gen_rtx_REG (mode, regno);
3668 if (cum->float_in_sse < 2)
3671 if (cum->float_in_sse < 1)
3681 if (!type || !AGGREGATE_TYPE_P (type))
3683 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3686 warning (0, "SSE vector argument without SSE enabled "
3690 ret = gen_reg_or_parallel (mode, orig_mode,
3691 cum->sse_regno + FIRST_SSE_REG);
3698 if (!type || !AGGREGATE_TYPE_P (type))
3700 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3703 warning (0, "MMX vector argument without MMX enabled "
3707 ret = gen_reg_or_parallel (mode, orig_mode,
3708 cum->mmx_regno + FIRST_MMX_REG);
3713 if (TARGET_DEBUG_ARG)
3716 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3717 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3720 print_simple_rtl (stderr, ret);
3722 fprintf (stderr, ", stack");
3724 fprintf (stderr, " )\n");
3730 /* A C expression that indicates when an argument must be passed by
3731 reference. If nonzero for an argument, a copy of that argument is
3732 made in memory and a pointer to the argument is passed instead of
3733 the argument itself. The pointer is passed in whatever way is
3734 appropriate for passing a pointer to that type. */
3737 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3738 enum machine_mode mode ATTRIBUTE_UNUSED,
3739 tree type, bool named ATTRIBUTE_UNUSED)
3744 if (type && int_size_in_bytes (type) == -1)
3746 if (TARGET_DEBUG_ARG)
3747 fprintf (stderr, "function_arg_pass_by_reference\n");
3754 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3755 ABI. Only called if TARGET_SSE. */
3757 contains_128bit_aligned_vector_p (tree type)
3759 enum machine_mode mode = TYPE_MODE (type);
3760 if (SSE_REG_MODE_P (mode)
3761 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3763 if (TYPE_ALIGN (type) < 128)
3766 if (AGGREGATE_TYPE_P (type))
3768 /* Walk the aggregates recursively. */
3769 switch (TREE_CODE (type))
3773 case QUAL_UNION_TYPE:
3777 /* Walk all the structure fields. */
3778 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3780 if (TREE_CODE (field) == FIELD_DECL
3781 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3788 /* Just for use if some languages passes arrays by value. */
3789 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3800 /* Gives the alignment boundary, in bits, of an argument with the
3801 specified mode and type. */
3804 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3808 align = TYPE_ALIGN (type);
3810 align = GET_MODE_ALIGNMENT (mode);
3811 if (align < PARM_BOUNDARY)
3812 align = PARM_BOUNDARY;
3815 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3816 make an exception for SSE modes since these require 128bit
3819 The handling here differs from field_alignment. ICC aligns MMX
3820 arguments to 4 byte boundaries, while structure fields are aligned
3821 to 8 byte boundaries. */
3823 align = PARM_BOUNDARY;
3826 if (!SSE_REG_MODE_P (mode))
3827 align = PARM_BOUNDARY;
3831 if (!contains_128bit_aligned_vector_p (type))
3832 align = PARM_BOUNDARY;
3840 /* Return true if N is a possible register number of function value. */
3842 ix86_function_value_regno_p (int regno)
3845 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3846 || (regno == FIRST_SSE_REG && TARGET_SSE))
3850 && (regno == FIRST_MMX_REG && TARGET_MMX))
3856 /* Define how to find the value returned by a function.
3857 VALTYPE is the data type of the value (as a tree).
3858 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3859 otherwise, FUNC is 0. */
3861 ix86_function_value (tree valtype, tree fntype_or_decl,
3862 bool outgoing ATTRIBUTE_UNUSED)
3864 enum machine_mode natmode = type_natural_mode (valtype);
3868 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3869 1, REGPARM_MAX, SSE_REGPARM_MAX,
3870 x86_64_int_return_registers, 0);
3871 /* For zero sized structures, construct_container return NULL, but we
3872 need to keep rest of compiler happy by returning meaningful value. */
3874 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3879 tree fn = NULL_TREE, fntype;
3881 && DECL_P (fntype_or_decl))
3882 fn = fntype_or_decl;
3883 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3884 return gen_rtx_REG (TYPE_MODE (valtype),
3885 ix86_value_regno (natmode, fn, fntype));
3889 /* Return true iff type is returned in memory. */
3891 ix86_return_in_memory (tree type)
3893 int needed_intregs, needed_sseregs, size;
3894 enum machine_mode mode = type_natural_mode (type);
3897 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3899 if (mode == BLKmode)
3902 size = int_size_in_bytes (type);
3904 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3907 if (VECTOR_MODE_P (mode) || mode == TImode)
3909 /* User-created vectors small enough to fit in EAX. */
3913 /* MMX/3dNow values are returned in MM0,
3914 except when it doesn't exits. */
3916 return (TARGET_MMX ? 0 : 1);
3918 /* SSE values are returned in XMM0, except when it doesn't exist. */
3920 return (TARGET_SSE ? 0 : 1);
3934 /* When returning SSE vector types, we have a choice of either
3935 (1) being abi incompatible with a -march switch, or
3936 (2) generating an error.
3937 Given no good solution, I think the safest thing is one warning.
3938 The user won't be able to use -Werror, but....
3940 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3941 called in response to actually generating a caller or callee that
3942 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3943 via aggregate_value_p for general type probing from tree-ssa. */
3946 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3948 static bool warnedsse, warnedmmx;
3952 /* Look at the return type of the function, not the function type. */
3953 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3955 if (!TARGET_SSE && !warnedsse)
3958 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3961 warning (0, "SSE vector return without SSE enabled "
3966 if (!TARGET_MMX && !warnedmmx)
3968 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3971 warning (0, "MMX vector return without MMX enabled "
3980 /* Define how to find the value returned by a library function
3981 assuming the value has mode MODE. */
3983 ix86_libcall_value (enum machine_mode mode)
3997 return gen_rtx_REG (mode, FIRST_SSE_REG);
4000 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4004 return gen_rtx_REG (mode, 0);
4008 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4011 /* Given a mode, return the register to use for a return value. */
4014 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4016 gcc_assert (!TARGET_64BIT);
4018 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4019 we normally prevent this case when mmx is not available. However
4020 some ABIs may require the result to be returned like DImode. */
4021 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4022 return TARGET_MMX ? FIRST_MMX_REG : 0;
4024 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4025 we prevent this case when sse is not available. However some ABIs
4026 may require the result to be returned like integer TImode. */
4027 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4028 return TARGET_SSE ? FIRST_SSE_REG : 0;
4030 /* Decimal floating point values can go in %eax, unlike other float modes. */
4031 if (DECIMAL_FLOAT_MODE_P (mode))
4034 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4035 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4038 /* Floating point return values in %st(0), except for local functions when
4039 SSE math is enabled or for functions with sseregparm attribute. */
4040 if ((func || fntype)
4041 && (mode == SFmode || mode == DFmode))
4043 int sse_level = ix86_function_sseregparm (fntype, func);
4044 if ((sse_level >= 1 && mode == SFmode)
4045 || (sse_level == 2 && mode == DFmode))
4046 return FIRST_SSE_REG;
4049 return FIRST_FLOAT_REG;
4052 /* Create the va_list data type. */
4055 ix86_build_builtin_va_list (void)
4057 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4059 /* For i386 we use plain pointer to argument area. */
4061 return build_pointer_type (char_type_node);
4063 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4064 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4066 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4067 unsigned_type_node);
4068 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4069 unsigned_type_node);
4070 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4072 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4075 va_list_gpr_counter_field = f_gpr;
4076 va_list_fpr_counter_field = f_fpr;
4078 DECL_FIELD_CONTEXT (f_gpr) = record;
4079 DECL_FIELD_CONTEXT (f_fpr) = record;
4080 DECL_FIELD_CONTEXT (f_ovf) = record;
4081 DECL_FIELD_CONTEXT (f_sav) = record;
4083 TREE_CHAIN (record) = type_decl;
4084 TYPE_NAME (record) = type_decl;
4085 TYPE_FIELDS (record) = f_gpr;
4086 TREE_CHAIN (f_gpr) = f_fpr;
4087 TREE_CHAIN (f_fpr) = f_ovf;
4088 TREE_CHAIN (f_ovf) = f_sav;
4090 layout_type (record);
4092 /* The correct type is an array type of one element. */
4093 return build_array_type (record, build_index_type (size_zero_node));
4096 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4099 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4100 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4103 CUMULATIVE_ARGS next_cum;
4104 rtx save_area = NULL_RTX, mem;
4117 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4120 /* Indicate to allocate space on the stack for varargs save area. */
4121 ix86_save_varrargs_registers = 1;
4123 cfun->stack_alignment_needed = 128;
4125 fntype = TREE_TYPE (current_function_decl);
4126 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4127 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4128 != void_type_node));
4130 /* For varargs, we do not want to skip the dummy va_dcl argument.
4131 For stdargs, we do want to skip the last named argument. */
4134 function_arg_advance (&next_cum, mode, type, 1);
4137 save_area = frame_pointer_rtx;
4139 set = get_varargs_alias_set ();
4141 for (i = next_cum.regno;
4143 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4146 mem = gen_rtx_MEM (Pmode,
4147 plus_constant (save_area, i * UNITS_PER_WORD));
4148 MEM_NOTRAP_P (mem) = 1;
4149 set_mem_alias_set (mem, set);
4150 emit_move_insn (mem, gen_rtx_REG (Pmode,
4151 x86_64_int_parameter_registers[i]));
4154 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4156 /* Now emit code to save SSE registers. The AX parameter contains number
4157 of SSE parameter registers used to call this function. We use
4158 sse_prologue_save insn template that produces computed jump across
4159 SSE saves. We need some preparation work to get this working. */
4161 label = gen_label_rtx ();
4162 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4164 /* Compute address to jump to :
4165 label - 5*eax + nnamed_sse_arguments*5 */
4166 tmp_reg = gen_reg_rtx (Pmode);
4167 nsse_reg = gen_reg_rtx (Pmode);
4168 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4169 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4170 gen_rtx_MULT (Pmode, nsse_reg,
4172 if (next_cum.sse_regno)
4175 gen_rtx_CONST (DImode,
4176 gen_rtx_PLUS (DImode,
4178 GEN_INT (next_cum.sse_regno * 4))));
4180 emit_move_insn (nsse_reg, label_ref);
4181 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4183 /* Compute address of memory block we save into. We always use pointer
4184 pointing 127 bytes after first byte to store - this is needed to keep
4185 instruction size limited by 4 bytes. */
4186 tmp_reg = gen_reg_rtx (Pmode);
4187 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4188 plus_constant (save_area,
4189 8 * REGPARM_MAX + 127)));
4190 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4191 MEM_NOTRAP_P (mem) = 1;
4192 set_mem_alias_set (mem, set);
4193 set_mem_align (mem, BITS_PER_WORD);
4195 /* And finally do the dirty job! */
4196 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4197 GEN_INT (next_cum.sse_regno), label));
4202 /* Implement va_start. */
4205 ix86_va_start (tree valist, rtx nextarg)
4207 HOST_WIDE_INT words, n_gpr, n_fpr;
4208 tree f_gpr, f_fpr, f_ovf, f_sav;
4209 tree gpr, fpr, ovf, sav, t;
4212 /* Only 64bit target needs something special. */
4215 std_expand_builtin_va_start (valist, nextarg);
4219 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4220 f_fpr = TREE_CHAIN (f_gpr);
4221 f_ovf = TREE_CHAIN (f_fpr);
4222 f_sav = TREE_CHAIN (f_ovf);
4224 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4225 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4226 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4227 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4228 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4230 /* Count number of gp and fp argument registers used. */
4231 words = current_function_args_info.words;
4232 n_gpr = current_function_args_info.regno;
4233 n_fpr = current_function_args_info.sse_regno;
4235 if (TARGET_DEBUG_ARG)
4236 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4237 (int) words, (int) n_gpr, (int) n_fpr);
4239 if (cfun->va_list_gpr_size)
4241 type = TREE_TYPE (gpr);
4242 t = build2 (MODIFY_EXPR, type, gpr,
4243 build_int_cst (type, n_gpr * 8));
4244 TREE_SIDE_EFFECTS (t) = 1;
4245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4248 if (cfun->va_list_fpr_size)
4250 type = TREE_TYPE (fpr);
4251 t = build2 (MODIFY_EXPR, type, fpr,
4252 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4253 TREE_SIDE_EFFECTS (t) = 1;
4254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4257 /* Find the overflow area. */
4258 type = TREE_TYPE (ovf);
4259 t = make_tree (type, virtual_incoming_args_rtx);
4261 t = build2 (PLUS_EXPR, type, t,
4262 build_int_cst (type, words * UNITS_PER_WORD));
4263 t = build2 (MODIFY_EXPR, type, ovf, t);
4264 TREE_SIDE_EFFECTS (t) = 1;
4265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4267 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4269 /* Find the register save area.
4270 Prologue of the function save it right above stack frame. */
4271 type = TREE_TYPE (sav);
4272 t = make_tree (type, frame_pointer_rtx);
4273 t = build2 (MODIFY_EXPR, type, sav, t);
4274 TREE_SIDE_EFFECTS (t) = 1;
4275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4279 /* Implement va_arg. */
4282 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4284 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4285 tree f_gpr, f_fpr, f_ovf, f_sav;
4286 tree gpr, fpr, ovf, sav, t;
4288 tree lab_false, lab_over = NULL_TREE;
4293 enum machine_mode nat_mode;
4295 /* Only 64bit target needs something special. */
4297 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4299 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4300 f_fpr = TREE_CHAIN (f_gpr);
4301 f_ovf = TREE_CHAIN (f_fpr);
4302 f_sav = TREE_CHAIN (f_ovf);
4304 valist = build_va_arg_indirect_ref (valist);
4305 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4306 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4307 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4308 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4310 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4312 type = build_pointer_type (type);
4313 size = int_size_in_bytes (type);
4314 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4316 nat_mode = type_natural_mode (type);
4317 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4318 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4320 /* Pull the value out of the saved registers. */
4322 addr = create_tmp_var (ptr_type_node, "addr");
4323 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4327 int needed_intregs, needed_sseregs;
4329 tree int_addr, sse_addr;
4331 lab_false = create_artificial_label ();
4332 lab_over = create_artificial_label ();
4334 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4336 need_temp = (!REG_P (container)
4337 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4338 || TYPE_ALIGN (type) > 128));
4340 /* In case we are passing structure, verify that it is consecutive block
4341 on the register save area. If not we need to do moves. */
4342 if (!need_temp && !REG_P (container))
4344 /* Verify that all registers are strictly consecutive */
4345 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4349 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4351 rtx slot = XVECEXP (container, 0, i);
4352 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4353 || INTVAL (XEXP (slot, 1)) != i * 16)
4361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4363 rtx slot = XVECEXP (container, 0, i);
4364 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4365 || INTVAL (XEXP (slot, 1)) != i * 8)
4377 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4378 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4379 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4380 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4383 /* First ensure that we fit completely in registers. */
4386 t = build_int_cst (TREE_TYPE (gpr),
4387 (REGPARM_MAX - needed_intregs + 1) * 8);
4388 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4389 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4390 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4391 gimplify_and_add (t, pre_p);
4395 t = build_int_cst (TREE_TYPE (fpr),
4396 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4398 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4399 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4400 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4401 gimplify_and_add (t, pre_p);
4404 /* Compute index to start of area used for integer regs. */
4407 /* int_addr = gpr + sav; */
4408 t = fold_convert (ptr_type_node, gpr);
4409 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4410 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4411 gimplify_and_add (t, pre_p);
4415 /* sse_addr = fpr + sav; */
4416 t = fold_convert (ptr_type_node, fpr);
4417 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4418 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4419 gimplify_and_add (t, pre_p);
4424 tree temp = create_tmp_var (type, "va_arg_tmp");
4427 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4428 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4429 gimplify_and_add (t, pre_p);
4431 for (i = 0; i < XVECLEN (container, 0); i++)
4433 rtx slot = XVECEXP (container, 0, i);
4434 rtx reg = XEXP (slot, 0);
4435 enum machine_mode mode = GET_MODE (reg);
4436 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4437 tree addr_type = build_pointer_type (piece_type);
4440 tree dest_addr, dest;
4442 if (SSE_REGNO_P (REGNO (reg)))
4444 src_addr = sse_addr;
4445 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4449 src_addr = int_addr;
4450 src_offset = REGNO (reg) * 8;
4452 src_addr = fold_convert (addr_type, src_addr);
4453 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4454 size_int (src_offset)));
4455 src = build_va_arg_indirect_ref (src_addr);
4457 dest_addr = fold_convert (addr_type, addr);
4458 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4459 size_int (INTVAL (XEXP (slot, 1)))));
4460 dest = build_va_arg_indirect_ref (dest_addr);
4462 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4463 gimplify_and_add (t, pre_p);
4469 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4470 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4471 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4472 gimplify_and_add (t, pre_p);
4476 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4477 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4478 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4479 gimplify_and_add (t, pre_p);
4482 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4483 gimplify_and_add (t, pre_p);
4485 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4486 append_to_statement_list (t, pre_p);
4489 /* ... otherwise out of the overflow area. */
4491 /* Care for on-stack alignment if needed. */
4492 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4493 || integer_zerop (TYPE_SIZE (type)))
4497 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4498 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4499 build_int_cst (TREE_TYPE (ovf), align - 1));
4500 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4501 build_int_cst (TREE_TYPE (t), -align));
4503 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4505 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4506 gimplify_and_add (t2, pre_p);
4508 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4509 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4510 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4511 gimplify_and_add (t, pre_p);
4515 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4516 append_to_statement_list (t, pre_p);
4519 ptrtype = build_pointer_type (type);
4520 addr = fold_convert (ptrtype, addr);
4523 addr = build_va_arg_indirect_ref (addr);
4524 return build_va_arg_indirect_ref (addr);
4527 /* Return nonzero if OPNUM's MEM should be matched
4528 in movabs* patterns. */
4531 ix86_check_movabs (rtx insn, int opnum)
4535 set = PATTERN (insn);
4536 if (GET_CODE (set) == PARALLEL)
4537 set = XVECEXP (set, 0, 0);
4538 gcc_assert (GET_CODE (set) == SET);
4539 mem = XEXP (set, opnum);
4540 while (GET_CODE (mem) == SUBREG)
4541 mem = SUBREG_REG (mem);
4542 gcc_assert (GET_CODE (mem) == MEM);
4543 return (volatile_ok || !MEM_VOLATILE_P (mem));
4546 /* Initialize the table of extra 80387 mathematical constants. */
4549 init_ext_80387_constants (void)
4551 static const char * cst[5] =
4553 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4554 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4555 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4556 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4557 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4561 for (i = 0; i < 5; i++)
4563 real_from_string (&ext_80387_constants_table[i], cst[i]);
4564 /* Ensure each constant is rounded to XFmode precision. */
4565 real_convert (&ext_80387_constants_table[i],
4566 XFmode, &ext_80387_constants_table[i]);
4569 ext_80387_constants_init = 1;
4572 /* Return true if the constant is something that can be loaded with
4573 a special instruction. */
4576 standard_80387_constant_p (rtx x)
4580 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4583 if (x == CONST0_RTX (GET_MODE (x)))
4585 if (x == CONST1_RTX (GET_MODE (x)))
4588 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4590 /* For XFmode constants, try to find a special 80387 instruction when
4591 optimizing for size or on those CPUs that benefit from them. */
4592 if (GET_MODE (x) == XFmode
4593 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4597 if (! ext_80387_constants_init)
4598 init_ext_80387_constants ();
4600 for (i = 0; i < 5; i++)
4601 if (real_identical (&r, &ext_80387_constants_table[i]))
4605 /* Load of the constant -0.0 or -1.0 will be split as
4606 fldz;fchs or fld1;fchs sequence. */
4607 if (real_isnegzero (&r))
4609 if (real_identical (&r, &dconstm1))
4615 /* Return the opcode of the special instruction to be used to load
4619 standard_80387_constant_opcode (rtx x)
4621 switch (standard_80387_constant_p (x))
4645 /* Return the CONST_DOUBLE representing the 80387 constant that is
4646 loaded by the specified special instruction. The argument IDX
4647 matches the return value from standard_80387_constant_p. */
4650 standard_80387_constant_rtx (int idx)
4654 if (! ext_80387_constants_init)
4655 init_ext_80387_constants ();
4671 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4675 /* Return 1 if mode is a valid mode for sse. */
4677 standard_sse_mode_p (enum machine_mode mode)
4694 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4697 standard_sse_constant_p (rtx x)
4699 enum machine_mode mode = GET_MODE (x);
4701 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4703 if (vector_all_ones_operand (x, mode)
4704 && standard_sse_mode_p (mode))
4705 return TARGET_SSE2 ? 2 : -1;
4710 /* Return the opcode of the special instruction to be used to load
4714 standard_sse_constant_opcode (rtx insn, rtx x)
4716 switch (standard_sse_constant_p (x))
4719 if (get_attr_mode (insn) == MODE_V4SF)
4720 return "xorps\t%0, %0";
4721 else if (get_attr_mode (insn) == MODE_V2DF)
4722 return "xorpd\t%0, %0";
4724 return "pxor\t%0, %0";
4726 return "pcmpeqd\t%0, %0";
4731 /* Returns 1 if OP contains a symbol reference */
4734 symbolic_reference_mentioned_p (rtx op)
4739 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4742 fmt = GET_RTX_FORMAT (GET_CODE (op));
4743 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4749 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4750 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4754 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4761 /* Return 1 if it is appropriate to emit `ret' instructions in the
4762 body of a function. Do this only if the epilogue is simple, needing a
4763 couple of insns. Prior to reloading, we can't tell how many registers
4764 must be saved, so return 0 then. Return 0 if there is no frame
4765 marker to de-allocate. */
4768 ix86_can_use_return_insn_p (void)
4770 struct ix86_frame frame;
4772 if (! reload_completed || frame_pointer_needed)
4775 /* Don't allow more than 32 pop, since that's all we can do
4776 with one instruction. */
4777 if (current_function_pops_args
4778 && current_function_args_size >= 32768)
4781 ix86_compute_frame_layout (&frame);
4782 return frame.to_allocate == 0 && frame.nregs == 0;
4785 /* Value should be nonzero if functions must have frame pointers.
4786 Zero means the frame pointer need not be set up (and parms may
4787 be accessed via the stack pointer) in functions that seem suitable. */
4790 ix86_frame_pointer_required (void)
4792 /* If we accessed previous frames, then the generated code expects
4793 to be able to access the saved ebp value in our frame. */
4794 if (cfun->machine->accesses_prev_frame)
4797 /* Several x86 os'es need a frame pointer for other reasons,
4798 usually pertaining to setjmp. */
4799 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4802 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4803 the frame pointer by default. Turn it back on now if we've not
4804 got a leaf function. */
4805 if (TARGET_OMIT_LEAF_FRAME_POINTER
4806 && (!current_function_is_leaf
4807 || ix86_current_function_calls_tls_descriptor))
4810 if (current_function_profile)
4816 /* Record that the current function accesses previous call frames. */
4819 ix86_setup_frame_addresses (void)
4821 cfun->machine->accesses_prev_frame = 1;
4824 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4825 # define USE_HIDDEN_LINKONCE 1
4827 # define USE_HIDDEN_LINKONCE 0
4830 static int pic_labels_used;
4832 /* Fills in the label name that should be used for a pc thunk for
4833 the given register. */
4836 get_pc_thunk_name (char name[32], unsigned int regno)
4838 gcc_assert (!TARGET_64BIT);
4840 if (USE_HIDDEN_LINKONCE)
4841 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4843 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4847 /* This function generates code for -fpic that loads %ebx with
4848 the return address of the caller and then returns. */
4851 ix86_file_end (void)
4856 for (regno = 0; regno < 8; ++regno)
4860 if (! ((pic_labels_used >> regno) & 1))
4863 get_pc_thunk_name (name, regno);
4868 switch_to_section (darwin_sections[text_coal_section]);
4869 fputs ("\t.weak_definition\t", asm_out_file);
4870 assemble_name (asm_out_file, name);
4871 fputs ("\n\t.private_extern\t", asm_out_file);
4872 assemble_name (asm_out_file, name);
4873 fputs ("\n", asm_out_file);
4874 ASM_OUTPUT_LABEL (asm_out_file, name);
4878 if (USE_HIDDEN_LINKONCE)
4882 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4884 TREE_PUBLIC (decl) = 1;
4885 TREE_STATIC (decl) = 1;
4886 DECL_ONE_ONLY (decl) = 1;
4888 (*targetm.asm_out.unique_section) (decl, 0);
4889 switch_to_section (get_named_section (decl, NULL, 0));
4891 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4892 fputs ("\t.hidden\t", asm_out_file);
4893 assemble_name (asm_out_file, name);
4894 fputc ('\n', asm_out_file);
4895 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4899 switch_to_section (text_section);
4900 ASM_OUTPUT_LABEL (asm_out_file, name);
4903 xops[0] = gen_rtx_REG (SImode, regno);
4904 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4905 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4906 output_asm_insn ("ret", xops);
4909 if (NEED_INDICATE_EXEC_STACK)
4910 file_end_indicate_exec_stack ();
4913 /* Emit code for the SET_GOT patterns. */
4916 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4921 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4923 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4925 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4928 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4930 output_asm_insn ("call\t%a2", xops);
4933 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4934 is what will be referenced by the Mach-O PIC subsystem. */
4936 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4939 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4940 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4943 output_asm_insn ("pop{l}\t%0", xops);
4948 get_pc_thunk_name (name, REGNO (dest));
4949 pic_labels_used |= 1 << REGNO (dest);
4951 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4952 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4953 output_asm_insn ("call\t%X2", xops);
4954 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4955 is what will be referenced by the Mach-O PIC subsystem. */
4958 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4960 targetm.asm_out.internal_label (asm_out_file, "L",
4961 CODE_LABEL_NUMBER (label));
4968 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4969 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4971 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4976 /* Generate an "push" pattern for input ARG. */
4981 return gen_rtx_SET (VOIDmode,
4983 gen_rtx_PRE_DEC (Pmode,
4984 stack_pointer_rtx)),
4988 /* Return >= 0 if there is an unused call-clobbered register available
4989 for the entire function. */
4992 ix86_select_alt_pic_regnum (void)
4994 if (current_function_is_leaf && !current_function_profile
4995 && !ix86_current_function_calls_tls_descriptor)
4998 for (i = 2; i >= 0; --i)
4999 if (!regs_ever_live[i])
5003 return INVALID_REGNUM;
5006 /* Return 1 if we need to save REGNO. */
5008 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5010 if (pic_offset_table_rtx
5011 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5012 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5013 || current_function_profile
5014 || current_function_calls_eh_return
5015 || current_function_uses_const_pool))
5017 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5022 if (current_function_calls_eh_return && maybe_eh_return)
5027 unsigned test = EH_RETURN_DATA_REGNO (i);
5028 if (test == INVALID_REGNUM)
5035 if (cfun->machine->force_align_arg_pointer
5036 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5039 return (regs_ever_live[regno]
5040 && !call_used_regs[regno]
5041 && !fixed_regs[regno]
5042 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5045 /* Return number of registers to be saved on the stack. */
5048 ix86_nsaved_regs (void)
5053 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5054 if (ix86_save_reg (regno, true))
5059 /* Return the offset between two registers, one to be eliminated, and the other
5060 its replacement, at the start of a routine. */
5063 ix86_initial_elimination_offset (int from, int to)
5065 struct ix86_frame frame;
5066 ix86_compute_frame_layout (&frame);
5068 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5069 return frame.hard_frame_pointer_offset;
5070 else if (from == FRAME_POINTER_REGNUM
5071 && to == HARD_FRAME_POINTER_REGNUM)
5072 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5075 gcc_assert (to == STACK_POINTER_REGNUM);
5077 if (from == ARG_POINTER_REGNUM)
5078 return frame.stack_pointer_offset;
5080 gcc_assert (from == FRAME_POINTER_REGNUM);
5081 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5085 /* Fill structure ix86_frame about frame of currently computed function. */
5088 ix86_compute_frame_layout (struct ix86_frame *frame)
5090 HOST_WIDE_INT total_size;
5091 unsigned int stack_alignment_needed;
5092 HOST_WIDE_INT offset;
5093 unsigned int preferred_alignment;
5094 HOST_WIDE_INT size = get_frame_size ();
5096 frame->nregs = ix86_nsaved_regs ();
5099 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5100 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5102 /* During reload iteration the amount of registers saved can change.
5103 Recompute the value as needed. Do not recompute when amount of registers
5104 didn't change as reload does multiple calls to the function and does not
5105 expect the decision to change within single iteration. */
5107 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5109 int count = frame->nregs;
5111 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5112 /* The fast prologue uses move instead of push to save registers. This
5113 is significantly longer, but also executes faster as modern hardware
5114 can execute the moves in parallel, but can't do that for push/pop.
5116 Be careful about choosing what prologue to emit: When function takes
5117 many instructions to execute we may use slow version as well as in
5118 case function is known to be outside hot spot (this is known with
5119 feedback only). Weight the size of function by number of registers
5120 to save as it is cheap to use one or two push instructions but very
5121 slow to use many of them. */
5123 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5124 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5125 || (flag_branch_probabilities
5126 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5127 cfun->machine->use_fast_prologue_epilogue = false;
5129 cfun->machine->use_fast_prologue_epilogue
5130 = !expensive_function_p (count);
5132 if (TARGET_PROLOGUE_USING_MOVE
5133 && cfun->machine->use_fast_prologue_epilogue)
5134 frame->save_regs_using_mov = true;
5136 frame->save_regs_using_mov = false;
5139 /* Skip return address and saved base pointer. */
5140 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5142 frame->hard_frame_pointer_offset = offset;
5144 /* Do some sanity checking of stack_alignment_needed and
5145 preferred_alignment, since i386 port is the only using those features
5146 that may break easily. */
5148 gcc_assert (!size || stack_alignment_needed);
5149 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5150 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5151 gcc_assert (stack_alignment_needed
5152 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5154 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5155 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5157 /* Register save area */
5158 offset += frame->nregs * UNITS_PER_WORD;
5161 if (ix86_save_varrargs_registers)
5163 offset += X86_64_VARARGS_SIZE;
5164 frame->va_arg_size = X86_64_VARARGS_SIZE;
5167 frame->va_arg_size = 0;
5169 /* Align start of frame for local function. */
5170 frame->padding1 = ((offset + stack_alignment_needed - 1)
5171 & -stack_alignment_needed) - offset;
5173 offset += frame->padding1;
5175 /* Frame pointer points here. */
5176 frame->frame_pointer_offset = offset;
5180 /* Add outgoing arguments area. Can be skipped if we eliminated
5181 all the function calls as dead code.
5182 Skipping is however impossible when function calls alloca. Alloca
5183 expander assumes that last current_function_outgoing_args_size
5184 of stack frame are unused. */
5185 if (ACCUMULATE_OUTGOING_ARGS
5186 && (!current_function_is_leaf || current_function_calls_alloca
5187 || ix86_current_function_calls_tls_descriptor))
5189 offset += current_function_outgoing_args_size;
5190 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5193 frame->outgoing_arguments_size = 0;
5195 /* Align stack boundary. Only needed if we're calling another function
5197 if (!current_function_is_leaf || current_function_calls_alloca
5198 || ix86_current_function_calls_tls_descriptor)
5199 frame->padding2 = ((offset + preferred_alignment - 1)
5200 & -preferred_alignment) - offset;
5202 frame->padding2 = 0;
5204 offset += frame->padding2;
5206 /* We've reached end of stack frame. */
5207 frame->stack_pointer_offset = offset;
5209 /* Size prologue needs to allocate. */
5210 frame->to_allocate =
5211 (size + frame->padding1 + frame->padding2
5212 + frame->outgoing_arguments_size + frame->va_arg_size);
5214 if ((!frame->to_allocate && frame->nregs <= 1)
5215 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5216 frame->save_regs_using_mov = false;
5218 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5219 && current_function_is_leaf
5220 && !ix86_current_function_calls_tls_descriptor)
5222 frame->red_zone_size = frame->to_allocate;
5223 if (frame->save_regs_using_mov)
5224 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5225 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5226 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5229 frame->red_zone_size = 0;
5230 frame->to_allocate -= frame->red_zone_size;
5231 frame->stack_pointer_offset -= frame->red_zone_size;
5233 fprintf (stderr, "nregs: %i\n", frame->nregs);
5234 fprintf (stderr, "size: %i\n", size);
5235 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5236 fprintf (stderr, "padding1: %i\n", frame->padding1);
5237 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5238 fprintf (stderr, "padding2: %i\n", frame->padding2);
5239 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5240 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5241 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5242 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5243 frame->hard_frame_pointer_offset);
5244 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5248 /* Emit code to save registers in the prologue. */
5251 ix86_emit_save_regs (void)
5256 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5257 if (ix86_save_reg (regno, true))
5259 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5260 RTX_FRAME_RELATED_P (insn) = 1;
5264 /* Emit code to save registers using MOV insns. First register
5265 is restored from POINTER + OFFSET. */
5267 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5272 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5273 if (ix86_save_reg (regno, true))
5275 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5277 gen_rtx_REG (Pmode, regno));
5278 RTX_FRAME_RELATED_P (insn) = 1;
5279 offset += UNITS_PER_WORD;
5283 /* Expand prologue or epilogue stack adjustment.
5284 The pattern exist to put a dependency on all ebp-based memory accesses.
5285 STYLE should be negative if instructions should be marked as frame related,
5286 zero if %r11 register is live and cannot be freely used and positive
5290 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5295 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5296 else if (x86_64_immediate_operand (offset, DImode))
5297 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5301 /* r11 is used by indirect sibcall return as well, set before the
5302 epilogue and used after the epilogue. ATM indirect sibcall
5303 shouldn't be used together with huge frame sizes in one
5304 function because of the frame_size check in sibcall.c. */
5306 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5307 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5309 RTX_FRAME_RELATED_P (insn) = 1;
5310 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5314 RTX_FRAME_RELATED_P (insn) = 1;
5317 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5320 ix86_internal_arg_pointer (void)
5322 bool has_force_align_arg_pointer =
5323 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5324 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5325 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5326 && DECL_NAME (current_function_decl)
5327 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5328 && DECL_FILE_SCOPE_P (current_function_decl))
5329 || ix86_force_align_arg_pointer
5330 || has_force_align_arg_pointer)
5332 /* Nested functions can't realign the stack due to a register
5334 if (DECL_CONTEXT (current_function_decl)
5335 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5337 if (ix86_force_align_arg_pointer)
5338 warning (0, "-mstackrealign ignored for nested functions");
5339 if (has_force_align_arg_pointer)
5340 error ("%s not supported for nested functions",
5341 ix86_force_align_arg_pointer_string);
5342 return virtual_incoming_args_rtx;
5344 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5345 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5348 return virtual_incoming_args_rtx;
5351 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5352 This is called from dwarf2out.c to emit call frame instructions
5353 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5355 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5357 rtx unspec = SET_SRC (pattern);
5358 gcc_assert (GET_CODE (unspec) == UNSPEC);
5362 case UNSPEC_REG_SAVE:
5363 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5364 SET_DEST (pattern));
5366 case UNSPEC_DEF_CFA:
5367 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5368 INTVAL (XVECEXP (unspec, 0, 0)));
5375 /* Expand the prologue into a bunch of separate insns. */
5378 ix86_expand_prologue (void)
5382 struct ix86_frame frame;
5383 HOST_WIDE_INT allocate;
5385 ix86_compute_frame_layout (&frame);
5387 if (cfun->machine->force_align_arg_pointer)
5391 /* Grab the argument pointer. */
5392 x = plus_constant (stack_pointer_rtx, 4);
5393 y = cfun->machine->force_align_arg_pointer;
5394 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5395 RTX_FRAME_RELATED_P (insn) = 1;
5397 /* The unwind info consists of two parts: install the fafp as the cfa,
5398 and record the fafp as the "save register" of the stack pointer.
5399 The later is there in order that the unwinder can see where it
5400 should restore the stack pointer across the and insn. */
5401 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5402 x = gen_rtx_SET (VOIDmode, y, x);
5403 RTX_FRAME_RELATED_P (x) = 1;
5404 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5406 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5407 RTX_FRAME_RELATED_P (y) = 1;
5408 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5409 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5410 REG_NOTES (insn) = x;
5412 /* Align the stack. */
5413 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5416 /* And here we cheat like madmen with the unwind info. We force the
5417 cfa register back to sp+4, which is exactly what it was at the
5418 start of the function. Re-pushing the return address results in
5419 the return at the same spot relative to the cfa, and thus is
5420 correct wrt the unwind info. */
5421 x = cfun->machine->force_align_arg_pointer;
5422 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5423 insn = emit_insn (gen_push (x));
5424 RTX_FRAME_RELATED_P (insn) = 1;
5427 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5428 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5429 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5430 REG_NOTES (insn) = x;
5433 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5434 slower on all targets. Also sdb doesn't like it. */
5436 if (frame_pointer_needed)
5438 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5439 RTX_FRAME_RELATED_P (insn) = 1;
5441 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5442 RTX_FRAME_RELATED_P (insn) = 1;
5445 allocate = frame.to_allocate;
5447 if (!frame.save_regs_using_mov)
5448 ix86_emit_save_regs ();
5450 allocate += frame.nregs * UNITS_PER_WORD;
5452 /* When using red zone we may start register saving before allocating
5453 the stack frame saving one cycle of the prologue. */
5454 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5455 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5456 : stack_pointer_rtx,
5457 -frame.nregs * UNITS_PER_WORD);
5461 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5462 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5463 GEN_INT (-allocate), -1);
5466 /* Only valid for Win32. */
5467 rtx eax = gen_rtx_REG (SImode, 0);
5468 bool eax_live = ix86_eax_live_at_start_p ();
5471 gcc_assert (!TARGET_64BIT);
5475 emit_insn (gen_push (eax));
5479 emit_move_insn (eax, GEN_INT (allocate));
5481 insn = emit_insn (gen_allocate_stack_worker (eax));
5482 RTX_FRAME_RELATED_P (insn) = 1;
5483 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5484 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5485 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5486 t, REG_NOTES (insn));
5490 if (frame_pointer_needed)
5491 t = plus_constant (hard_frame_pointer_rtx,
5494 - frame.nregs * UNITS_PER_WORD);
5496 t = plus_constant (stack_pointer_rtx, allocate);
5497 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5501 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5503 if (!frame_pointer_needed || !frame.to_allocate)
5504 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5506 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5507 -frame.nregs * UNITS_PER_WORD);
5510 pic_reg_used = false;
5511 if (pic_offset_table_rtx
5512 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5513 || current_function_profile))
5515 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5517 if (alt_pic_reg_used != INVALID_REGNUM)
5518 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5520 pic_reg_used = true;
5526 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5528 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5530 /* Even with accurate pre-reload life analysis, we can wind up
5531 deleting all references to the pic register after reload.
5532 Consider if cross-jumping unifies two sides of a branch
5533 controlled by a comparison vs the only read from a global.
5534 In which case, allow the set_got to be deleted, though we're
5535 too late to do anything about the ebx save in the prologue. */
5536 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5539 /* Prevent function calls from be scheduled before the call to mcount.
5540 In the pic_reg_used case, make sure that the got load isn't deleted. */
5541 if (current_function_profile)
5542 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5545 /* Emit code to restore saved registers using MOV insns. First register
5546 is restored from POINTER + OFFSET. */
5548 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5549 int maybe_eh_return)
5552 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5554 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5555 if (ix86_save_reg (regno, maybe_eh_return))
5557 /* Ensure that adjust_address won't be forced to produce pointer
5558 out of range allowed by x86-64 instruction set. */
5559 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5563 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5564 emit_move_insn (r11, GEN_INT (offset));
5565 emit_insn (gen_adddi3 (r11, r11, pointer));
5566 base_address = gen_rtx_MEM (Pmode, r11);
5569 emit_move_insn (gen_rtx_REG (Pmode, regno),
5570 adjust_address (base_address, Pmode, offset));
5571 offset += UNITS_PER_WORD;
5575 /* Restore function stack, frame, and registers. */
5578 ix86_expand_epilogue (int style)
5581 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5582 struct ix86_frame frame;
5583 HOST_WIDE_INT offset;
5585 ix86_compute_frame_layout (&frame);
5587 /* Calculate start of saved registers relative to ebp. Special care
5588 must be taken for the normal return case of a function using
5589 eh_return: the eax and edx registers are marked as saved, but not
5590 restored along this path. */
5591 offset = frame.nregs;
5592 if (current_function_calls_eh_return && style != 2)
5594 offset *= -UNITS_PER_WORD;
5596 /* If we're only restoring one register and sp is not valid then
5597 using a move instruction to restore the register since it's
5598 less work than reloading sp and popping the register.
5600 The default code result in stack adjustment using add/lea instruction,
5601 while this code results in LEAVE instruction (or discrete equivalent),
5602 so it is profitable in some other cases as well. Especially when there
5603 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5604 and there is exactly one register to pop. This heuristic may need some
5605 tuning in future. */
5606 if ((!sp_valid && frame.nregs <= 1)
5607 || (TARGET_EPILOGUE_USING_MOVE
5608 && cfun->machine->use_fast_prologue_epilogue
5609 && (frame.nregs > 1 || frame.to_allocate))
5610 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5611 || (frame_pointer_needed && TARGET_USE_LEAVE
5612 && cfun->machine->use_fast_prologue_epilogue
5613 && frame.nregs == 1)
5614 || current_function_calls_eh_return)
5616 /* Restore registers. We can use ebp or esp to address the memory
5617 locations. If both are available, default to ebp, since offsets
5618 are known to be small. Only exception is esp pointing directly to the
5619 end of block of saved registers, where we may simplify addressing
5622 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5623 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5624 frame.to_allocate, style == 2);
5626 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5627 offset, style == 2);
5629 /* eh_return epilogues need %ecx added to the stack pointer. */
5632 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5634 if (frame_pointer_needed)
5636 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5637 tmp = plus_constant (tmp, UNITS_PER_WORD);
5638 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5640 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5641 emit_move_insn (hard_frame_pointer_rtx, tmp);
5643 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5648 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5649 tmp = plus_constant (tmp, (frame.to_allocate
5650 + frame.nregs * UNITS_PER_WORD));
5651 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5654 else if (!frame_pointer_needed)
5655 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5656 GEN_INT (frame.to_allocate
5657 + frame.nregs * UNITS_PER_WORD),
5659 /* If not an i386, mov & pop is faster than "leave". */
5660 else if (TARGET_USE_LEAVE || optimize_size
5661 || !cfun->machine->use_fast_prologue_epilogue)
5662 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5665 pro_epilogue_adjust_stack (stack_pointer_rtx,
5666 hard_frame_pointer_rtx,
5669 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5671 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5676 /* First step is to deallocate the stack frame so that we can
5677 pop the registers. */
5680 gcc_assert (frame_pointer_needed);
5681 pro_epilogue_adjust_stack (stack_pointer_rtx,
5682 hard_frame_pointer_rtx,
5683 GEN_INT (offset), style);
5685 else if (frame.to_allocate)
5686 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5687 GEN_INT (frame.to_allocate), style);
5689 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5690 if (ix86_save_reg (regno, false))
5693 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5695 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5697 if (frame_pointer_needed)
5699 /* Leave results in shorter dependency chains on CPUs that are
5700 able to grok it fast. */
5701 if (TARGET_USE_LEAVE)
5702 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5703 else if (TARGET_64BIT)
5704 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5706 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5710 if (cfun->machine->force_align_arg_pointer)
5712 emit_insn (gen_addsi3 (stack_pointer_rtx,
5713 cfun->machine->force_align_arg_pointer,
5717 /* Sibcall epilogues don't want a return instruction. */
5721 if (current_function_pops_args && current_function_args_size)
5723 rtx popc = GEN_INT (current_function_pops_args);
5725 /* i386 can only pop 64K bytes. If asked to pop more, pop
5726 return address, do explicit add, and jump indirectly to the
5729 if (current_function_pops_args >= 65536)
5731 rtx ecx = gen_rtx_REG (SImode, 2);
5733 /* There is no "pascal" calling convention in 64bit ABI. */
5734 gcc_assert (!TARGET_64BIT);
5736 emit_insn (gen_popsi1 (ecx));
5737 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5738 emit_jump_insn (gen_return_indirect_internal (ecx));
5741 emit_jump_insn (gen_return_pop_internal (popc));
5744 emit_jump_insn (gen_return_internal ());
5747 /* Reset from the function's potential modifications. */
5750 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5751 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5753 if (pic_offset_table_rtx)
5754 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5756 /* Mach-O doesn't support labels at the end of objects, so if
5757 it looks like we might want one, insert a NOP. */
5759 rtx insn = get_last_insn ();
5762 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5763 insn = PREV_INSN (insn);
5767 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5768 fputs ("\tnop\n", file);
5774 /* Extract the parts of an RTL expression that is a valid memory address
5775 for an instruction. Return 0 if the structure of the address is
5776 grossly off. Return -1 if the address contains ASHIFT, so it is not
5777 strictly valid, but still used for computing length of lea instruction. */
5780 ix86_decompose_address (rtx addr, struct ix86_address *out)
5782 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5783 rtx base_reg, index_reg;
5784 HOST_WIDE_INT scale = 1;
5785 rtx scale_rtx = NULL_RTX;
5787 enum ix86_address_seg seg = SEG_DEFAULT;
5789 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5791 else if (GET_CODE (addr) == PLUS)
5801 addends[n++] = XEXP (op, 1);
5804 while (GET_CODE (op) == PLUS);
5809 for (i = n; i >= 0; --i)
5812 switch (GET_CODE (op))
5817 index = XEXP (op, 0);
5818 scale_rtx = XEXP (op, 1);
5822 if (XINT (op, 1) == UNSPEC_TP
5823 && TARGET_TLS_DIRECT_SEG_REFS
5824 && seg == SEG_DEFAULT)
5825 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5854 else if (GET_CODE (addr) == MULT)
5856 index = XEXP (addr, 0); /* index*scale */
5857 scale_rtx = XEXP (addr, 1);
5859 else if (GET_CODE (addr) == ASHIFT)
5863 /* We're called for lea too, which implements ashift on occasion. */
5864 index = XEXP (addr, 0);
5865 tmp = XEXP (addr, 1);
5866 if (GET_CODE (tmp) != CONST_INT)
5868 scale = INTVAL (tmp);
5869 if ((unsigned HOST_WIDE_INT) scale > 3)
5875 disp = addr; /* displacement */
5877 /* Extract the integral value of scale. */
5880 if (GET_CODE (scale_rtx) != CONST_INT)
5882 scale = INTVAL (scale_rtx);
5885 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5886 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5888 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5889 if (base_reg && index_reg && scale == 1
5890 && (index_reg == arg_pointer_rtx
5891 || index_reg == frame_pointer_rtx
5892 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5895 tmp = base, base = index, index = tmp;
5896 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5899 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5900 if ((base_reg == hard_frame_pointer_rtx
5901 || base_reg == frame_pointer_rtx
5902 || base_reg == arg_pointer_rtx) && !disp)
5905 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5906 Avoid this by transforming to [%esi+0]. */
5907 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5908 && base_reg && !index_reg && !disp
5910 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5913 /* Special case: encode reg+reg instead of reg*2. */
5914 if (!base && index && scale && scale == 2)
5915 base = index, base_reg = index_reg, scale = 1;
5917 /* Special case: scaling cannot be encoded without base or displacement. */
5918 if (!base && !disp && index && scale != 1)
5930 /* Return cost of the memory address x.
5931 For i386, it is better to use a complex address than let gcc copy
5932 the address into a reg and make a new pseudo. But not if the address
5933 requires to two regs - that would mean more pseudos with longer
5936 ix86_address_cost (rtx x)
5938 struct ix86_address parts;
5940 int ok = ix86_decompose_address (x, &parts);
5944 if (parts.base && GET_CODE (parts.base) == SUBREG)
5945 parts.base = SUBREG_REG (parts.base);
5946 if (parts.index && GET_CODE (parts.index) == SUBREG)
5947 parts.index = SUBREG_REG (parts.index);
5949 /* More complex memory references are better. */
5950 if (parts.disp && parts.disp != const0_rtx)
5952 if (parts.seg != SEG_DEFAULT)
5955 /* Attempt to minimize number of registers in the address. */
5957 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5959 && (!REG_P (parts.index)
5960 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5964 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5966 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5967 && parts.base != parts.index)
5970 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5971 since it's predecode logic can't detect the length of instructions
5972 and it degenerates to vector decoded. Increase cost of such
5973 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5974 to split such addresses or even refuse such addresses at all.
5976 Following addressing modes are affected:
5981 The first and last case may be avoidable by explicitly coding the zero in
5982 memory address, but I don't have AMD-K6 machine handy to check this
5986 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5987 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5988 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5994 /* If X is a machine specific address (i.e. a symbol or label being
5995 referenced as a displacement from the GOT implemented using an
5996 UNSPEC), then return the base term. Otherwise return X. */
5999 ix86_find_base_term (rtx x)
6005 if (GET_CODE (x) != CONST)
6008 if (GET_CODE (term) == PLUS
6009 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6010 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6011 term = XEXP (term, 0);
6012 if (GET_CODE (term) != UNSPEC
6013 || XINT (term, 1) != UNSPEC_GOTPCREL)
6016 term = XVECEXP (term, 0, 0);
6018 if (GET_CODE (term) != SYMBOL_REF
6019 && GET_CODE (term) != LABEL_REF)
6025 term = ix86_delegitimize_address (x);
6027 if (GET_CODE (term) != SYMBOL_REF
6028 && GET_CODE (term) != LABEL_REF)
6034 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6035 this is used for to form addresses to local data when -fPIC is in
6039 darwin_local_data_pic (rtx disp)
6041 if (GET_CODE (disp) == MINUS)
6043 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6044 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6045 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6047 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6048 if (! strcmp (sym_name, "<pic base>"))
6056 /* Determine if a given RTX is a valid constant. We already know this
6057 satisfies CONSTANT_P. */
6060 legitimate_constant_p (rtx x)
6062 switch (GET_CODE (x))
6067 if (GET_CODE (x) == PLUS)
6069 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6074 if (TARGET_MACHO && darwin_local_data_pic (x))
6077 /* Only some unspecs are valid as "constants". */
6078 if (GET_CODE (x) == UNSPEC)
6079 switch (XINT (x, 1))
6082 return TARGET_64BIT;
6085 x = XVECEXP (x, 0, 0);
6086 return (GET_CODE (x) == SYMBOL_REF
6087 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6089 x = XVECEXP (x, 0, 0);
6090 return (GET_CODE (x) == SYMBOL_REF
6091 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6096 /* We must have drilled down to a symbol. */
6097 if (GET_CODE (x) == LABEL_REF)
6099 if (GET_CODE (x) != SYMBOL_REF)
6104 /* TLS symbols are never valid. */
6105 if (SYMBOL_REF_TLS_MODEL (x))
6110 if (GET_MODE (x) == TImode
6111 && x != CONST0_RTX (TImode)
6117 if (x == CONST0_RTX (GET_MODE (x)))
6125 /* Otherwise we handle everything else in the move patterns. */
6129 /* Determine if it's legal to put X into the constant pool. This
6130 is not possible for the address of thread-local symbols, which
6131 is checked above. */
6134 ix86_cannot_force_const_mem (rtx x)
6136 /* We can always put integral constants and vectors in memory. */
6137 switch (GET_CODE (x))
6147 return !legitimate_constant_p (x);
6150 /* Determine if a given RTX is a valid constant address. */
6153 constant_address_p (rtx x)
6155 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6158 /* Nonzero if the constant value X is a legitimate general operand
6159 when generating PIC code. It is given that flag_pic is on and
6160 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6163 legitimate_pic_operand_p (rtx x)
6167 switch (GET_CODE (x))
6170 inner = XEXP (x, 0);
6171 if (GET_CODE (inner) == PLUS
6172 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6173 inner = XEXP (inner, 0);
6175 /* Only some unspecs are valid as "constants". */
6176 if (GET_CODE (inner) == UNSPEC)
6177 switch (XINT (inner, 1))
6180 return TARGET_64BIT;
6182 x = XVECEXP (inner, 0, 0);
6183 return (GET_CODE (x) == SYMBOL_REF
6184 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6192 return legitimate_pic_address_disp_p (x);
6199 /* Determine if a given CONST RTX is a valid memory displacement
6203 legitimate_pic_address_disp_p (rtx disp)
6207 /* In 64bit mode we can allow direct addresses of symbols and labels
6208 when they are not dynamic symbols. */
6211 rtx op0 = disp, op1;
6213 switch (GET_CODE (disp))
6219 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6221 op0 = XEXP (XEXP (disp, 0), 0);
6222 op1 = XEXP (XEXP (disp, 0), 1);
6223 if (GET_CODE (op1) != CONST_INT
6224 || INTVAL (op1) >= 16*1024*1024
6225 || INTVAL (op1) < -16*1024*1024)
6227 if (GET_CODE (op0) == LABEL_REF)
6229 if (GET_CODE (op0) != SYMBOL_REF)
6234 /* TLS references should always be enclosed in UNSPEC. */
6235 if (SYMBOL_REF_TLS_MODEL (op0))
6237 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6245 if (GET_CODE (disp) != CONST)
6247 disp = XEXP (disp, 0);
6251 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6252 of GOT tables. We should not need these anyway. */
6253 if (GET_CODE (disp) != UNSPEC
6254 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6255 && XINT (disp, 1) != UNSPEC_GOTOFF))
6258 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6259 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6265 if (GET_CODE (disp) == PLUS)
6267 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6269 disp = XEXP (disp, 0);
6273 if (TARGET_MACHO && darwin_local_data_pic (disp))
6276 if (GET_CODE (disp) != UNSPEC)
6279 switch (XINT (disp, 1))
6284 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6286 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6287 While ABI specify also 32bit relocation but we don't produce it in
6288 small PIC model at all. */
6289 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6290 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6292 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6294 case UNSPEC_GOTTPOFF:
6295 case UNSPEC_GOTNTPOFF:
6296 case UNSPEC_INDNTPOFF:
6299 disp = XVECEXP (disp, 0, 0);
6300 return (GET_CODE (disp) == SYMBOL_REF
6301 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6303 disp = XVECEXP (disp, 0, 0);
6304 return (GET_CODE (disp) == SYMBOL_REF
6305 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6307 disp = XVECEXP (disp, 0, 0);
6308 return (GET_CODE (disp) == SYMBOL_REF
6309 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6315 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6316 memory address for an instruction. The MODE argument is the machine mode
6317 for the MEM expression that wants to use this address.
6319 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6320 convert common non-canonical forms to canonical form so that they will
6324 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6326 struct ix86_address parts;
6327 rtx base, index, disp;
6328 HOST_WIDE_INT scale;
6329 const char *reason = NULL;
6330 rtx reason_rtx = NULL_RTX;
6332 if (TARGET_DEBUG_ADDR)
6335 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6336 GET_MODE_NAME (mode), strict);
6340 if (ix86_decompose_address (addr, &parts) <= 0)
6342 reason = "decomposition failed";
6347 index = parts.index;
6349 scale = parts.scale;
6351 /* Validate base register.
6353 Don't allow SUBREG's that span more than a word here. It can lead to spill
6354 failures when the base is one word out of a two word structure, which is
6355 represented internally as a DImode int. */
6364 else if (GET_CODE (base) == SUBREG
6365 && REG_P (SUBREG_REG (base))
6366 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6368 reg = SUBREG_REG (base);
6371 reason = "base is not a register";
6375 if (GET_MODE (base) != Pmode)
6377 reason = "base is not in Pmode";
6381 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6382 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6384 reason = "base is not valid";
6389 /* Validate index register.
6391 Don't allow SUBREG's that span more than a word here -- same as above. */
6400 else if (GET_CODE (index) == SUBREG
6401 && REG_P (SUBREG_REG (index))
6402 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6404 reg = SUBREG_REG (index);
6407 reason = "index is not a register";
6411 if (GET_MODE (index) != Pmode)
6413 reason = "index is not in Pmode";
6417 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6418 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6420 reason = "index is not valid";
6425 /* Validate scale factor. */
6428 reason_rtx = GEN_INT (scale);
6431 reason = "scale without index";
6435 if (scale != 2 && scale != 4 && scale != 8)
6437 reason = "scale is not a valid multiplier";
6442 /* Validate displacement. */
6447 if (GET_CODE (disp) == CONST
6448 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6449 switch (XINT (XEXP (disp, 0), 1))
6451 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6452 used. While ABI specify also 32bit relocations, we don't produce
6453 them at all and use IP relative instead. */
6456 gcc_assert (flag_pic);
6458 goto is_legitimate_pic;
6459 reason = "64bit address unspec";
6462 case UNSPEC_GOTPCREL:
6463 gcc_assert (flag_pic);
6464 goto is_legitimate_pic;
6466 case UNSPEC_GOTTPOFF:
6467 case UNSPEC_GOTNTPOFF:
6468 case UNSPEC_INDNTPOFF:
6474 reason = "invalid address unspec";
6478 else if (SYMBOLIC_CONST (disp)
6482 && MACHOPIC_INDIRECT
6483 && !machopic_operand_p (disp)
6489 if (TARGET_64BIT && (index || base))
6491 /* foo@dtpoff(%rX) is ok. */
6492 if (GET_CODE (disp) != CONST
6493 || GET_CODE (XEXP (disp, 0)) != PLUS
6494 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6495 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6496 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6497 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6499 reason = "non-constant pic memory reference";
6503 else if (! legitimate_pic_address_disp_p (disp))
6505 reason = "displacement is an invalid pic construct";
6509 /* This code used to verify that a symbolic pic displacement
6510 includes the pic_offset_table_rtx register.
6512 While this is good idea, unfortunately these constructs may
6513 be created by "adds using lea" optimization for incorrect
6522 This code is nonsensical, but results in addressing
6523 GOT table with pic_offset_table_rtx base. We can't
6524 just refuse it easily, since it gets matched by
6525 "addsi3" pattern, that later gets split to lea in the
6526 case output register differs from input. While this
6527 can be handled by separate addsi pattern for this case
6528 that never results in lea, this seems to be easier and
6529 correct fix for crash to disable this test. */
6531 else if (GET_CODE (disp) != LABEL_REF
6532 && GET_CODE (disp) != CONST_INT
6533 && (GET_CODE (disp) != CONST
6534 || !legitimate_constant_p (disp))
6535 && (GET_CODE (disp) != SYMBOL_REF
6536 || !legitimate_constant_p (disp)))
6538 reason = "displacement is not constant";
6541 else if (TARGET_64BIT
6542 && !x86_64_immediate_operand (disp, VOIDmode))
6544 reason = "displacement is out of range";
6549 /* Everything looks valid. */
6550 if (TARGET_DEBUG_ADDR)
6551 fprintf (stderr, "Success.\n");
6555 if (TARGET_DEBUG_ADDR)
6557 fprintf (stderr, "Error: %s\n", reason);
6558 debug_rtx (reason_rtx);
6563 /* Return a unique alias set for the GOT. */
6565 static HOST_WIDE_INT
6566 ix86_GOT_alias_set (void)
6568 static HOST_WIDE_INT set = -1;
6570 set = new_alias_set ();
6574 /* Return a legitimate reference for ORIG (an address) using the
6575 register REG. If REG is 0, a new pseudo is generated.
6577 There are two types of references that must be handled:
6579 1. Global data references must load the address from the GOT, via
6580 the PIC reg. An insn is emitted to do this load, and the reg is
6583 2. Static data references, constant pool addresses, and code labels
6584 compute the address as an offset from the GOT, whose base is in
6585 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6586 differentiate them from global data objects. The returned
6587 address is the PIC reg + an unspec constant.
6589 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6590 reg also appears in the address. */
6593 legitimize_pic_address (rtx orig, rtx reg)
6600 if (TARGET_MACHO && !TARGET_64BIT)
6603 reg = gen_reg_rtx (Pmode);
6604 /* Use the generic Mach-O PIC machinery. */
6605 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6609 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6611 else if (TARGET_64BIT
6612 && ix86_cmodel != CM_SMALL_PIC
6613 && local_symbolic_operand (addr, Pmode))
6616 /* This symbol may be referenced via a displacement from the PIC
6617 base address (@GOTOFF). */
6619 if (reload_in_progress)
6620 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6621 if (GET_CODE (addr) == CONST)
6622 addr = XEXP (addr, 0);
6623 if (GET_CODE (addr) == PLUS)
6625 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6626 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6629 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6630 new = gen_rtx_CONST (Pmode, new);
6632 tmpreg = gen_reg_rtx (Pmode);
6635 emit_move_insn (tmpreg, new);
6639 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6640 tmpreg, 1, OPTAB_DIRECT);
6643 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6645 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6647 /* This symbol may be referenced via a displacement from the PIC
6648 base address (@GOTOFF). */
6650 if (reload_in_progress)
6651 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6652 if (GET_CODE (addr) == CONST)
6653 addr = XEXP (addr, 0);
6654 if (GET_CODE (addr) == PLUS)
6656 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6657 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6660 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6661 new = gen_rtx_CONST (Pmode, new);
6662 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6666 emit_move_insn (reg, new);
6670 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6674 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6675 new = gen_rtx_CONST (Pmode, new);
6676 new = gen_const_mem (Pmode, new);
6677 set_mem_alias_set (new, ix86_GOT_alias_set ());
6680 reg = gen_reg_rtx (Pmode);
6681 /* Use directly gen_movsi, otherwise the address is loaded
6682 into register for CSE. We don't want to CSE this addresses,
6683 instead we CSE addresses from the GOT table, so skip this. */
6684 emit_insn (gen_movsi (reg, new));
6689 /* This symbol must be referenced via a load from the
6690 Global Offset Table (@GOT). */
6692 if (reload_in_progress)
6693 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6694 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6695 new = gen_rtx_CONST (Pmode, new);
6696 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6697 new = gen_const_mem (Pmode, new);
6698 set_mem_alias_set (new, ix86_GOT_alias_set ());
6701 reg = gen_reg_rtx (Pmode);
6702 emit_move_insn (reg, new);
6708 if (GET_CODE (addr) == CONST_INT
6709 && !x86_64_immediate_operand (addr, VOIDmode))
6713 emit_move_insn (reg, addr);
6717 new = force_reg (Pmode, addr);
6719 else if (GET_CODE (addr) == CONST)
6721 addr = XEXP (addr, 0);
6723 /* We must match stuff we generate before. Assume the only
6724 unspecs that can get here are ours. Not that we could do
6725 anything with them anyway.... */
6726 if (GET_CODE (addr) == UNSPEC
6727 || (GET_CODE (addr) == PLUS
6728 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6730 gcc_assert (GET_CODE (addr) == PLUS);
6732 if (GET_CODE (addr) == PLUS)
6734 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6736 /* Check first to see if this is a constant offset from a @GOTOFF
6737 symbol reference. */
6738 if (local_symbolic_operand (op0, Pmode)
6739 && GET_CODE (op1) == CONST_INT)
6743 if (reload_in_progress)
6744 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6745 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6747 new = gen_rtx_PLUS (Pmode, new, op1);
6748 new = gen_rtx_CONST (Pmode, new);
6749 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6753 emit_move_insn (reg, new);
6759 if (INTVAL (op1) < -16*1024*1024
6760 || INTVAL (op1) >= 16*1024*1024)
6762 if (!x86_64_immediate_operand (op1, Pmode))
6763 op1 = force_reg (Pmode, op1);
6764 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6770 base = legitimize_pic_address (XEXP (addr, 0), reg);
6771 new = legitimize_pic_address (XEXP (addr, 1),
6772 base == reg ? NULL_RTX : reg);
6774 if (GET_CODE (new) == CONST_INT)
6775 new = plus_constant (base, INTVAL (new));
6778 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6780 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6781 new = XEXP (new, 1);
6783 new = gen_rtx_PLUS (Pmode, base, new);
6791 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6794 get_thread_pointer (int to_reg)
6798 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6802 reg = gen_reg_rtx (Pmode);
6803 insn = gen_rtx_SET (VOIDmode, reg, tp);
6804 insn = emit_insn (insn);
6809 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6810 false if we expect this to be used for a memory address and true if
6811 we expect to load the address into a register. */
6814 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6816 rtx dest, base, off, pic, tp;
6821 case TLS_MODEL_GLOBAL_DYNAMIC:
6822 dest = gen_reg_rtx (Pmode);
6823 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6825 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6827 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6830 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6831 insns = get_insns ();
6834 emit_libcall_block (insns, dest, rax, x);
6836 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6837 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6839 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6841 if (TARGET_GNU2_TLS)
6843 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6845 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6849 case TLS_MODEL_LOCAL_DYNAMIC:
6850 base = gen_reg_rtx (Pmode);
6851 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6853 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6855 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6858 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6859 insns = get_insns ();
6862 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6863 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6864 emit_libcall_block (insns, base, rax, note);
6866 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6867 emit_insn (gen_tls_local_dynamic_base_64 (base));
6869 emit_insn (gen_tls_local_dynamic_base_32 (base));
6871 if (TARGET_GNU2_TLS)
6873 rtx x = ix86_tls_module_base ();
6875 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6876 gen_rtx_MINUS (Pmode, x, tp));
6879 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6880 off = gen_rtx_CONST (Pmode, off);
6882 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6884 if (TARGET_GNU2_TLS)
6886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6893 case TLS_MODEL_INITIAL_EXEC:
6897 type = UNSPEC_GOTNTPOFF;
6901 if (reload_in_progress)
6902 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6903 pic = pic_offset_table_rtx;
6904 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6906 else if (!TARGET_ANY_GNU_TLS)
6908 pic = gen_reg_rtx (Pmode);
6909 emit_insn (gen_set_got (pic));
6910 type = UNSPEC_GOTTPOFF;
6915 type = UNSPEC_INDNTPOFF;
6918 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6919 off = gen_rtx_CONST (Pmode, off);
6921 off = gen_rtx_PLUS (Pmode, pic, off);
6922 off = gen_const_mem (Pmode, off);
6923 set_mem_alias_set (off, ix86_GOT_alias_set ());
6925 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6927 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6928 off = force_reg (Pmode, off);
6929 return gen_rtx_PLUS (Pmode, base, off);
6933 base = get_thread_pointer (true);
6934 dest = gen_reg_rtx (Pmode);
6935 emit_insn (gen_subsi3 (dest, base, off));
6939 case TLS_MODEL_LOCAL_EXEC:
6940 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6941 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6942 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6943 off = gen_rtx_CONST (Pmode, off);
6945 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6947 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6948 return gen_rtx_PLUS (Pmode, base, off);
6952 base = get_thread_pointer (true);
6953 dest = gen_reg_rtx (Pmode);
6954 emit_insn (gen_subsi3 (dest, base, off));
6965 /* Try machine-dependent ways of modifying an illegitimate address
6966 to be legitimate. If we find one, return the new, valid address.
6967 This macro is used in only one place: `memory_address' in explow.c.
6969 OLDX is the address as it was before break_out_memory_refs was called.
6970 In some cases it is useful to look at this to decide what needs to be done.
6972 MODE and WIN are passed so that this macro can use
6973 GO_IF_LEGITIMATE_ADDRESS.
6975 It is always safe for this macro to do nothing. It exists to recognize
6976 opportunities to optimize the output.
6978 For the 80386, we handle X+REG by loading X into a register R and
6979 using R+REG. R will go in a general reg and indexing will be used.
6980 However, if REG is a broken-out memory address or multiplication,
6981 nothing needs to be done because REG can certainly go in a general reg.
6983 When -fpic is used, special handling is needed for symbolic references.
6984 See comments by legitimize_pic_address in i386.c for details. */
6987 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6992 if (TARGET_DEBUG_ADDR)
6994 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6995 GET_MODE_NAME (mode));
6999 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7001 return legitimize_tls_address (x, log, false);
7002 if (GET_CODE (x) == CONST
7003 && GET_CODE (XEXP (x, 0)) == PLUS
7004 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7005 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7007 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7008 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7011 if (flag_pic && SYMBOLIC_CONST (x))
7012 return legitimize_pic_address (x, 0);
7014 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7015 if (GET_CODE (x) == ASHIFT
7016 && GET_CODE (XEXP (x, 1)) == CONST_INT
7017 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7020 log = INTVAL (XEXP (x, 1));
7021 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7022 GEN_INT (1 << log));
7025 if (GET_CODE (x) == PLUS)
7027 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7029 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7030 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7031 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7034 log = INTVAL (XEXP (XEXP (x, 0), 1));
7035 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7036 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7037 GEN_INT (1 << log));
7040 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7041 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7042 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7045 log = INTVAL (XEXP (XEXP (x, 1), 1));
7046 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7047 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7048 GEN_INT (1 << log));
7051 /* Put multiply first if it isn't already. */
7052 if (GET_CODE (XEXP (x, 1)) == MULT)
7054 rtx tmp = XEXP (x, 0);
7055 XEXP (x, 0) = XEXP (x, 1);
7060 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7061 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7062 created by virtual register instantiation, register elimination, and
7063 similar optimizations. */
7064 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7067 x = gen_rtx_PLUS (Pmode,
7068 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7069 XEXP (XEXP (x, 1), 0)),
7070 XEXP (XEXP (x, 1), 1));
7074 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7075 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7076 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7078 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7079 && CONSTANT_P (XEXP (x, 1)))
7082 rtx other = NULL_RTX;
7084 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7086 constant = XEXP (x, 1);
7087 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7089 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7091 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7092 other = XEXP (x, 1);
7100 x = gen_rtx_PLUS (Pmode,
7101 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7102 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7103 plus_constant (other, INTVAL (constant)));
7107 if (changed && legitimate_address_p (mode, x, FALSE))
7110 if (GET_CODE (XEXP (x, 0)) == MULT)
7113 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7116 if (GET_CODE (XEXP (x, 1)) == MULT)
7119 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7123 && GET_CODE (XEXP (x, 1)) == REG
7124 && GET_CODE (XEXP (x, 0)) == REG)
7127 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7130 x = legitimize_pic_address (x, 0);
7133 if (changed && legitimate_address_p (mode, x, FALSE))
7136 if (GET_CODE (XEXP (x, 0)) == REG)
7138 rtx temp = gen_reg_rtx (Pmode);
7139 rtx val = force_operand (XEXP (x, 1), temp);
7141 emit_move_insn (temp, val);
7147 else if (GET_CODE (XEXP (x, 1)) == REG)
7149 rtx temp = gen_reg_rtx (Pmode);
7150 rtx val = force_operand (XEXP (x, 0), temp);
7152 emit_move_insn (temp, val);
7162 /* Print an integer constant expression in assembler syntax. Addition
7163 and subtraction are the only arithmetic that may appear in these
7164 expressions. FILE is the stdio stream to write to, X is the rtx, and
7165 CODE is the operand print code from the output string. */
7168 output_pic_addr_const (FILE *file, rtx x, int code)
7172 switch (GET_CODE (x))
7175 gcc_assert (flag_pic);
7180 output_addr_const (file, x);
7181 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7182 fputs ("@PLT", file);
7189 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7190 assemble_name (asm_out_file, buf);
7194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7198 /* This used to output parentheses around the expression,
7199 but that does not work on the 386 (either ATT or BSD assembler). */
7200 output_pic_addr_const (file, XEXP (x, 0), code);
7204 if (GET_MODE (x) == VOIDmode)
7206 /* We can use %d if the number is <32 bits and positive. */
7207 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7208 fprintf (file, "0x%lx%08lx",
7209 (unsigned long) CONST_DOUBLE_HIGH (x),
7210 (unsigned long) CONST_DOUBLE_LOW (x));
7212 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7215 /* We can't handle floating point constants;
7216 PRINT_OPERAND must handle them. */
7217 output_operand_lossage ("floating constant misused");
7221 /* Some assemblers need integer constants to appear first. */
7222 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7224 output_pic_addr_const (file, XEXP (x, 0), code);
7226 output_pic_addr_const (file, XEXP (x, 1), code);
7230 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7231 output_pic_addr_const (file, XEXP (x, 1), code);
7233 output_pic_addr_const (file, XEXP (x, 0), code);
7239 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7240 output_pic_addr_const (file, XEXP (x, 0), code);
7242 output_pic_addr_const (file, XEXP (x, 1), code);
7244 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7248 gcc_assert (XVECLEN (x, 0) == 1);
7249 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7250 switch (XINT (x, 1))
7253 fputs ("@GOT", file);
7256 fputs ("@GOTOFF", file);
7258 case UNSPEC_GOTPCREL:
7259 fputs ("@GOTPCREL(%rip)", file);
7261 case UNSPEC_GOTTPOFF:
7262 /* FIXME: This might be @TPOFF in Sun ld too. */
7263 fputs ("@GOTTPOFF", file);
7266 fputs ("@TPOFF", file);
7270 fputs ("@TPOFF", file);
7272 fputs ("@NTPOFF", file);
7275 fputs ("@DTPOFF", file);
7277 case UNSPEC_GOTNTPOFF:
7279 fputs ("@GOTTPOFF(%rip)", file);
7281 fputs ("@GOTNTPOFF", file);
7283 case UNSPEC_INDNTPOFF:
7284 fputs ("@INDNTPOFF", file);
7287 output_operand_lossage ("invalid UNSPEC as operand");
7293 output_operand_lossage ("invalid expression as operand");
7297 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7298 We need to emit DTP-relative relocations. */
7301 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7303 fputs (ASM_LONG, file);
7304 output_addr_const (file, x);
7305 fputs ("@DTPOFF", file);
7311 fputs (", 0", file);
7318 /* In the name of slightly smaller debug output, and to cater to
7319 general assembler lossage, recognize PIC+GOTOFF and turn it back
7320 into a direct symbol reference.
7322 On Darwin, this is necessary to avoid a crash, because Darwin
7323 has a different PIC label for each routine but the DWARF debugging
7324 information is not associated with any particular routine, so it's
7325 necessary to remove references to the PIC label from RTL stored by
7326 the DWARF output code. */
7329 ix86_delegitimize_address (rtx orig_x)
7332 /* reg_addend is NULL or a multiple of some register. */
7333 rtx reg_addend = NULL_RTX;
7334 /* const_addend is NULL or a const_int. */
7335 rtx const_addend = NULL_RTX;
7336 /* This is the result, or NULL. */
7337 rtx result = NULL_RTX;
7339 if (GET_CODE (x) == MEM)
7344 if (GET_CODE (x) != CONST
7345 || GET_CODE (XEXP (x, 0)) != UNSPEC
7346 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7347 || GET_CODE (orig_x) != MEM)
7349 return XVECEXP (XEXP (x, 0), 0, 0);
7352 if (GET_CODE (x) != PLUS
7353 || GET_CODE (XEXP (x, 1)) != CONST)
7356 if (GET_CODE (XEXP (x, 0)) == REG
7357 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7358 /* %ebx + GOT/GOTOFF */
7360 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7362 /* %ebx + %reg * scale + GOT/GOTOFF */
7363 reg_addend = XEXP (x, 0);
7364 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7365 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7366 reg_addend = XEXP (reg_addend, 1);
7367 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7368 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7369 reg_addend = XEXP (reg_addend, 0);
7372 if (GET_CODE (reg_addend) != REG
7373 && GET_CODE (reg_addend) != MULT
7374 && GET_CODE (reg_addend) != ASHIFT)
7380 x = XEXP (XEXP (x, 1), 0);
7381 if (GET_CODE (x) == PLUS
7382 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7384 const_addend = XEXP (x, 1);
7388 if (GET_CODE (x) == UNSPEC
7389 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7390 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7391 result = XVECEXP (x, 0, 0);
7393 if (TARGET_MACHO && darwin_local_data_pic (x)
7394 && GET_CODE (orig_x) != MEM)
7395 result = XEXP (x, 0);
7401 result = gen_rtx_PLUS (Pmode, result, const_addend);
7403 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7408 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7413 if (mode == CCFPmode || mode == CCFPUmode)
7415 enum rtx_code second_code, bypass_code;
7416 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7417 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7418 code = ix86_fp_compare_code_to_integer (code);
7422 code = reverse_condition (code);
7433 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7437 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7438 Those same assemblers have the same but opposite lossage on cmov. */
7439 gcc_assert (mode == CCmode);
7440 suffix = fp ? "nbe" : "a";
7460 gcc_assert (mode == CCmode);
7482 gcc_assert (mode == CCmode);
7483 suffix = fp ? "nb" : "ae";
7486 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7490 gcc_assert (mode == CCmode);
7494 suffix = fp ? "u" : "p";
7497 suffix = fp ? "nu" : "np";
7502 fputs (suffix, file);
7505 /* Print the name of register X to FILE based on its machine mode and number.
7506 If CODE is 'w', pretend the mode is HImode.
7507 If CODE is 'b', pretend the mode is QImode.
7508 If CODE is 'k', pretend the mode is SImode.
7509 If CODE is 'q', pretend the mode is DImode.
7510 If CODE is 'h', pretend the reg is the 'high' byte register.
7511 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7514 print_reg (rtx x, int code, FILE *file)
7516 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7517 && REGNO (x) != FRAME_POINTER_REGNUM
7518 && REGNO (x) != FLAGS_REG
7519 && REGNO (x) != FPSR_REG
7520 && REGNO (x) != FPCR_REG);
7522 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7525 if (code == 'w' || MMX_REG_P (x))
7527 else if (code == 'b')
7529 else if (code == 'k')
7531 else if (code == 'q')
7533 else if (code == 'y')
7535 else if (code == 'h')
7538 code = GET_MODE_SIZE (GET_MODE (x));
7540 /* Irritatingly, AMD extended registers use different naming convention
7541 from the normal registers. */
7542 if (REX_INT_REG_P (x))
7544 gcc_assert (TARGET_64BIT);
7548 error ("extended registers have no high halves");
7551 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7554 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7557 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7560 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7563 error ("unsupported operand size for extended register");
7571 if (STACK_TOP_P (x))
7573 fputs ("st(0)", file);
7580 if (! ANY_FP_REG_P (x))
7581 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7586 fputs (hi_reg_name[REGNO (x)], file);
7589 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7591 fputs (qi_reg_name[REGNO (x)], file);
7594 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7596 fputs (qi_high_reg_name[REGNO (x)], file);
7603 /* Locate some local-dynamic symbol still in use by this function
7604 so that we can print its name in some tls_local_dynamic_base
7608 get_some_local_dynamic_name (void)
7612 if (cfun->machine->some_ld_name)
7613 return cfun->machine->some_ld_name;
7615 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7617 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7618 return cfun->machine->some_ld_name;
7624 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7628 if (GET_CODE (x) == SYMBOL_REF
7629 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7631 cfun->machine->some_ld_name = XSTR (x, 0);
7639 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7640 C -- print opcode suffix for set/cmov insn.
7641 c -- like C, but print reversed condition
7642 F,f -- likewise, but for floating-point.
7643 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7645 R -- print the prefix for register names.
7646 z -- print the opcode suffix for the size of the current operand.
7647 * -- print a star (in certain assembler syntax)
7648 A -- print an absolute memory reference.
7649 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7650 s -- print a shift double count, followed by the assemblers argument
7652 b -- print the QImode name of the register for the indicated operand.
7653 %b0 would print %al if operands[0] is reg 0.
7654 w -- likewise, print the HImode name of the register.
7655 k -- likewise, print the SImode name of the register.
7656 q -- likewise, print the DImode name of the register.
7657 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7658 y -- print "st(0)" instead of "st" as a register.
7659 D -- print condition for SSE cmp instruction.
7660 P -- if PIC, print an @PLT suffix.
7661 X -- don't print any sort of PIC '@' suffix for a symbol.
7662 & -- print some in-use local-dynamic symbol name.
7663 H -- print a memory address offset by 8; used for sse high-parts
7667 print_operand (FILE *file, rtx x, int code)
7674 if (ASSEMBLER_DIALECT == ASM_ATT)
7679 assemble_name (file, get_some_local_dynamic_name ());
7683 switch (ASSEMBLER_DIALECT)
7690 /* Intel syntax. For absolute addresses, registers should not
7691 be surrounded by braces. */
7692 if (GET_CODE (x) != REG)
7695 PRINT_OPERAND (file, x, 0);
7705 PRINT_OPERAND (file, x, 0);
7710 if (ASSEMBLER_DIALECT == ASM_ATT)
7715 if (ASSEMBLER_DIALECT == ASM_ATT)
7720 if (ASSEMBLER_DIALECT == ASM_ATT)
7725 if (ASSEMBLER_DIALECT == ASM_ATT)
7730 if (ASSEMBLER_DIALECT == ASM_ATT)
7735 if (ASSEMBLER_DIALECT == ASM_ATT)
7740 /* 387 opcodes don't get size suffixes if the operands are
7742 if (STACK_REG_P (x))
7745 /* Likewise if using Intel opcodes. */
7746 if (ASSEMBLER_DIALECT == ASM_INTEL)
7749 /* This is the size of op from size of operand. */
7750 switch (GET_MODE_SIZE (GET_MODE (x)))
7753 #ifdef HAVE_GAS_FILDS_FISTS
7759 if (GET_MODE (x) == SFmode)
7774 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7776 #ifdef GAS_MNEMONICS
7802 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7804 PRINT_OPERAND (file, x, 0);
7810 /* Little bit of braindamage here. The SSE compare instructions
7811 does use completely different names for the comparisons that the
7812 fp conditional moves. */
7813 switch (GET_CODE (x))
7828 fputs ("unord", file);
7832 fputs ("neq", file);
7836 fputs ("nlt", file);
7840 fputs ("nle", file);
7843 fputs ("ord", file);
7850 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7851 if (ASSEMBLER_DIALECT == ASM_ATT)
7853 switch (GET_MODE (x))
7855 case HImode: putc ('w', file); break;
7857 case SFmode: putc ('l', file); break;
7859 case DFmode: putc ('q', file); break;
7860 default: gcc_unreachable ();
7867 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7870 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7871 if (ASSEMBLER_DIALECT == ASM_ATT)
7874 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7877 /* Like above, but reverse condition */
7879 /* Check to see if argument to %c is really a constant
7880 and not a condition code which needs to be reversed. */
7881 if (!COMPARISON_P (x))
7883 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7886 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7889 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7890 if (ASSEMBLER_DIALECT == ASM_ATT)
7893 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7897 /* It doesn't actually matter what mode we use here, as we're
7898 only going to use this for printing. */
7899 x = adjust_address_nv (x, DImode, 8);
7906 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7909 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7912 int pred_val = INTVAL (XEXP (x, 0));
7914 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7915 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7917 int taken = pred_val > REG_BR_PROB_BASE / 2;
7918 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7920 /* Emit hints only in the case default branch prediction
7921 heuristics would fail. */
7922 if (taken != cputaken)
7924 /* We use 3e (DS) prefix for taken branches and
7925 2e (CS) prefix for not taken branches. */
7927 fputs ("ds ; ", file);
7929 fputs ("cs ; ", file);
7936 output_operand_lossage ("invalid operand code '%c'", code);
7940 if (GET_CODE (x) == REG)
7941 print_reg (x, code, file);
7943 else if (GET_CODE (x) == MEM)
7945 /* No `byte ptr' prefix for call instructions. */
7946 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7949 switch (GET_MODE_SIZE (GET_MODE (x)))
7951 case 1: size = "BYTE"; break;
7952 case 2: size = "WORD"; break;
7953 case 4: size = "DWORD"; break;
7954 case 8: size = "QWORD"; break;
7955 case 12: size = "XWORD"; break;
7956 case 16: size = "XMMWORD"; break;
7961 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7964 else if (code == 'w')
7966 else if (code == 'k')
7970 fputs (" PTR ", file);
7974 /* Avoid (%rip) for call operands. */
7975 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7976 && GET_CODE (x) != CONST_INT)
7977 output_addr_const (file, x);
7978 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7979 output_operand_lossage ("invalid constraints for operand");
7984 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7989 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7990 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7992 if (ASSEMBLER_DIALECT == ASM_ATT)
7994 fprintf (file, "0x%08lx", l);
7997 /* These float cases don't actually occur as immediate operands. */
7998 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8002 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8003 fprintf (file, "%s", dstr);
8006 else if (GET_CODE (x) == CONST_DOUBLE
8007 && GET_MODE (x) == XFmode)
8011 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8012 fprintf (file, "%s", dstr);
8017 /* We have patterns that allow zero sets of memory, for instance.
8018 In 64-bit mode, we should probably support all 8-byte vectors,
8019 since we can in fact encode that into an immediate. */
8020 if (GET_CODE (x) == CONST_VECTOR)
8022 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8028 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8030 if (ASSEMBLER_DIALECT == ASM_ATT)
8033 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8034 || GET_CODE (x) == LABEL_REF)
8036 if (ASSEMBLER_DIALECT == ASM_ATT)
8039 fputs ("OFFSET FLAT:", file);
8042 if (GET_CODE (x) == CONST_INT)
8043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8045 output_pic_addr_const (file, x, code);
8047 output_addr_const (file, x);
8051 /* Print a memory operand whose address is ADDR. */
8054 print_operand_address (FILE *file, rtx addr)
8056 struct ix86_address parts;
8057 rtx base, index, disp;
8059 int ok = ix86_decompose_address (addr, &parts);
8064 index = parts.index;
8066 scale = parts.scale;
8074 if (USER_LABEL_PREFIX[0] == 0)
8076 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8082 if (!base && !index)
8084 /* Displacement only requires special attention. */
8086 if (GET_CODE (disp) == CONST_INT)
8088 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8090 if (USER_LABEL_PREFIX[0] == 0)
8092 fputs ("ds:", file);
8094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8097 output_pic_addr_const (file, disp, 0);
8099 output_addr_const (file, disp);
8101 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8104 if (GET_CODE (disp) == CONST
8105 && GET_CODE (XEXP (disp, 0)) == PLUS
8106 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8107 disp = XEXP (XEXP (disp, 0), 0);
8108 if (GET_CODE (disp) == LABEL_REF
8109 || (GET_CODE (disp) == SYMBOL_REF
8110 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8111 fputs ("(%rip)", file);
8116 if (ASSEMBLER_DIALECT == ASM_ATT)
8121 output_pic_addr_const (file, disp, 0);
8122 else if (GET_CODE (disp) == LABEL_REF)
8123 output_asm_label (disp);
8125 output_addr_const (file, disp);
8130 print_reg (base, 0, file);
8134 print_reg (index, 0, file);
8136 fprintf (file, ",%d", scale);
8142 rtx offset = NULL_RTX;
8146 /* Pull out the offset of a symbol; print any symbol itself. */
8147 if (GET_CODE (disp) == CONST
8148 && GET_CODE (XEXP (disp, 0)) == PLUS
8149 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8151 offset = XEXP (XEXP (disp, 0), 1);
8152 disp = gen_rtx_CONST (VOIDmode,
8153 XEXP (XEXP (disp, 0), 0));
8157 output_pic_addr_const (file, disp, 0);
8158 else if (GET_CODE (disp) == LABEL_REF)
8159 output_asm_label (disp);
8160 else if (GET_CODE (disp) == CONST_INT)
8163 output_addr_const (file, disp);
8169 print_reg (base, 0, file);
8172 if (INTVAL (offset) >= 0)
8174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8178 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8185 print_reg (index, 0, file);
8187 fprintf (file, "*%d", scale);
8195 output_addr_const_extra (FILE *file, rtx x)
8199 if (GET_CODE (x) != UNSPEC)
8202 op = XVECEXP (x, 0, 0);
8203 switch (XINT (x, 1))
8205 case UNSPEC_GOTTPOFF:
8206 output_addr_const (file, op);
8207 /* FIXME: This might be @TPOFF in Sun ld. */
8208 fputs ("@GOTTPOFF", file);
8211 output_addr_const (file, op);
8212 fputs ("@TPOFF", file);
8215 output_addr_const (file, op);
8217 fputs ("@TPOFF", file);
8219 fputs ("@NTPOFF", file);
8222 output_addr_const (file, op);
8223 fputs ("@DTPOFF", file);
8225 case UNSPEC_GOTNTPOFF:
8226 output_addr_const (file, op);
8228 fputs ("@GOTTPOFF(%rip)", file);
8230 fputs ("@GOTNTPOFF", file);
8232 case UNSPEC_INDNTPOFF:
8233 output_addr_const (file, op);
8234 fputs ("@INDNTPOFF", file);
8244 /* Split one or more DImode RTL references into pairs of SImode
8245 references. The RTL can be REG, offsettable MEM, integer constant, or
8246 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8247 split and "num" is its length. lo_half and hi_half are output arrays
8248 that parallel "operands". */
8251 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8255 rtx op = operands[num];
8257 /* simplify_subreg refuse to split volatile memory addresses,
8258 but we still have to handle it. */
8259 if (GET_CODE (op) == MEM)
8261 lo_half[num] = adjust_address (op, SImode, 0);
8262 hi_half[num] = adjust_address (op, SImode, 4);
8266 lo_half[num] = simplify_gen_subreg (SImode, op,
8267 GET_MODE (op) == VOIDmode
8268 ? DImode : GET_MODE (op), 0);
8269 hi_half[num] = simplify_gen_subreg (SImode, op,
8270 GET_MODE (op) == VOIDmode
8271 ? DImode : GET_MODE (op), 4);
8275 /* Split one or more TImode RTL references into pairs of DImode
8276 references. The RTL can be REG, offsettable MEM, integer constant, or
8277 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8278 split and "num" is its length. lo_half and hi_half are output arrays
8279 that parallel "operands". */
8282 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8286 rtx op = operands[num];
8288 /* simplify_subreg refuse to split volatile memory addresses, but we
8289 still have to handle it. */
8290 if (GET_CODE (op) == MEM)
8292 lo_half[num] = adjust_address (op, DImode, 0);
8293 hi_half[num] = adjust_address (op, DImode, 8);
8297 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8298 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8303 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8304 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8305 is the expression of the binary operation. The output may either be
8306 emitted here, or returned to the caller, like all output_* functions.
8308 There is no guarantee that the operands are the same mode, as they
8309 might be within FLOAT or FLOAT_EXTEND expressions. */
8311 #ifndef SYSV386_COMPAT
8312 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8313 wants to fix the assemblers because that causes incompatibility
8314 with gcc. No-one wants to fix gcc because that causes
8315 incompatibility with assemblers... You can use the option of
8316 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8317 #define SYSV386_COMPAT 1
8321 output_387_binary_op (rtx insn, rtx *operands)
8323 static char buf[30];
8326 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8328 #ifdef ENABLE_CHECKING
8329 /* Even if we do not want to check the inputs, this documents input
8330 constraints. Which helps in understanding the following code. */
8331 if (STACK_REG_P (operands[0])
8332 && ((REG_P (operands[1])
8333 && REGNO (operands[0]) == REGNO (operands[1])
8334 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8335 || (REG_P (operands[2])
8336 && REGNO (operands[0]) == REGNO (operands[2])
8337 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8338 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8341 gcc_assert (is_sse);
8344 switch (GET_CODE (operands[3]))
8347 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8348 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8356 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8357 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8365 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8366 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8374 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8375 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8389 if (GET_MODE (operands[0]) == SFmode)
8390 strcat (buf, "ss\t{%2, %0|%0, %2}");
8392 strcat (buf, "sd\t{%2, %0|%0, %2}");
8397 switch (GET_CODE (operands[3]))
8401 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8403 rtx temp = operands[2];
8404 operands[2] = operands[1];
8408 /* know operands[0] == operands[1]. */
8410 if (GET_CODE (operands[2]) == MEM)
8416 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8418 if (STACK_TOP_P (operands[0]))
8419 /* How is it that we are storing to a dead operand[2]?
8420 Well, presumably operands[1] is dead too. We can't
8421 store the result to st(0) as st(0) gets popped on this
8422 instruction. Instead store to operands[2] (which I
8423 think has to be st(1)). st(1) will be popped later.
8424 gcc <= 2.8.1 didn't have this check and generated
8425 assembly code that the Unixware assembler rejected. */
8426 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8428 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8432 if (STACK_TOP_P (operands[0]))
8433 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8435 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8440 if (GET_CODE (operands[1]) == MEM)
8446 if (GET_CODE (operands[2]) == MEM)
8452 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8455 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8456 derived assemblers, confusingly reverse the direction of
8457 the operation for fsub{r} and fdiv{r} when the
8458 destination register is not st(0). The Intel assembler
8459 doesn't have this brain damage. Read !SYSV386_COMPAT to
8460 figure out what the hardware really does. */
8461 if (STACK_TOP_P (operands[0]))
8462 p = "{p\t%0, %2|rp\t%2, %0}";
8464 p = "{rp\t%2, %0|p\t%0, %2}";
8466 if (STACK_TOP_P (operands[0]))
8467 /* As above for fmul/fadd, we can't store to st(0). */
8468 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8470 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8475 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8478 if (STACK_TOP_P (operands[0]))
8479 p = "{rp\t%0, %1|p\t%1, %0}";
8481 p = "{p\t%1, %0|rp\t%0, %1}";
8483 if (STACK_TOP_P (operands[0]))
8484 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8486 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8491 if (STACK_TOP_P (operands[0]))
8493 if (STACK_TOP_P (operands[1]))
8494 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8496 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8499 else if (STACK_TOP_P (operands[1]))
8502 p = "{\t%1, %0|r\t%0, %1}";
8504 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8510 p = "{r\t%2, %0|\t%0, %2}";
8512 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8525 /* Return needed mode for entity in optimize_mode_switching pass. */
8528 ix86_mode_needed (int entity, rtx insn)
8530 enum attr_i387_cw mode;
8532 /* The mode UNINITIALIZED is used to store control word after a
8533 function call or ASM pattern. The mode ANY specify that function
8534 has no requirements on the control word and make no changes in the
8535 bits we are interested in. */
8538 || (NONJUMP_INSN_P (insn)
8539 && (asm_noperands (PATTERN (insn)) >= 0
8540 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8541 return I387_CW_UNINITIALIZED;
8543 if (recog_memoized (insn) < 0)
8546 mode = get_attr_i387_cw (insn);
8551 if (mode == I387_CW_TRUNC)
8556 if (mode == I387_CW_FLOOR)
8561 if (mode == I387_CW_CEIL)
8566 if (mode == I387_CW_MASK_PM)
8577 /* Output code to initialize control word copies used by trunc?f?i and
8578 rounding patterns. CURRENT_MODE is set to current control word,
8579 while NEW_MODE is set to new control word. */
8582 emit_i387_cw_initialization (int mode)
8584 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8589 rtx reg = gen_reg_rtx (HImode);
8591 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8592 emit_move_insn (reg, stored_mode);
8594 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8599 /* round toward zero (truncate) */
8600 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8601 slot = SLOT_CW_TRUNC;
8605 /* round down toward -oo */
8606 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8607 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8608 slot = SLOT_CW_FLOOR;
8612 /* round up toward +oo */
8613 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8614 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8615 slot = SLOT_CW_CEIL;
8618 case I387_CW_MASK_PM:
8619 /* mask precision exception for nearbyint() */
8620 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8621 slot = SLOT_CW_MASK_PM;
8633 /* round toward zero (truncate) */
8634 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8635 slot = SLOT_CW_TRUNC;
8639 /* round down toward -oo */
8640 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8641 slot = SLOT_CW_FLOOR;
8645 /* round up toward +oo */
8646 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8647 slot = SLOT_CW_CEIL;
8650 case I387_CW_MASK_PM:
8651 /* mask precision exception for nearbyint() */
8652 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8653 slot = SLOT_CW_MASK_PM;
8661 gcc_assert (slot < MAX_386_STACK_LOCALS);
8663 new_mode = assign_386_stack_local (HImode, slot);
8664 emit_move_insn (new_mode, reg);
8667 /* Output code for INSN to convert a float to a signed int. OPERANDS
8668 are the insn operands. The output may be [HSD]Imode and the input
8669 operand may be [SDX]Fmode. */
8672 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8674 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8675 int dimode_p = GET_MODE (operands[0]) == DImode;
8676 int round_mode = get_attr_i387_cw (insn);
8678 /* Jump through a hoop or two for DImode, since the hardware has no
8679 non-popping instruction. We used to do this a different way, but
8680 that was somewhat fragile and broke with post-reload splitters. */
8681 if ((dimode_p || fisttp) && !stack_top_dies)
8682 output_asm_insn ("fld\t%y1", operands);
8684 gcc_assert (STACK_TOP_P (operands[1]));
8685 gcc_assert (GET_CODE (operands[0]) == MEM);
8688 output_asm_insn ("fisttp%z0\t%0", operands);
8691 if (round_mode != I387_CW_ANY)
8692 output_asm_insn ("fldcw\t%3", operands);
8693 if (stack_top_dies || dimode_p)
8694 output_asm_insn ("fistp%z0\t%0", operands);
8696 output_asm_insn ("fist%z0\t%0", operands);
8697 if (round_mode != I387_CW_ANY)
8698 output_asm_insn ("fldcw\t%2", operands);
8704 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8705 have the values zero or one, indicates the ffreep insn's operand
8706 from the OPERANDS array. */
8709 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8711 if (TARGET_USE_FFREEP)
8712 #if HAVE_AS_IX86_FFREEP
8713 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8716 static char retval[] = ".word\t0xc_df";
8717 int regno = REGNO (operands[opno]);
8719 gcc_assert (FP_REGNO_P (regno));
8721 retval[9] = '0' + (regno - FIRST_STACK_REG);
8726 return opno ? "fstp\t%y1" : "fstp\t%y0";
8730 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8731 should be used. UNORDERED_P is true when fucom should be used. */
8734 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8737 rtx cmp_op0, cmp_op1;
8738 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8742 cmp_op0 = operands[0];
8743 cmp_op1 = operands[1];
8747 cmp_op0 = operands[1];
8748 cmp_op1 = operands[2];
8753 if (GET_MODE (operands[0]) == SFmode)
8755 return "ucomiss\t{%1, %0|%0, %1}";
8757 return "comiss\t{%1, %0|%0, %1}";
8760 return "ucomisd\t{%1, %0|%0, %1}";
8762 return "comisd\t{%1, %0|%0, %1}";
8765 gcc_assert (STACK_TOP_P (cmp_op0));
8767 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8769 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8773 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8774 return output_387_ffreep (operands, 1);
8777 return "ftst\n\tfnstsw\t%0";
8780 if (STACK_REG_P (cmp_op1)
8782 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8783 && REGNO (cmp_op1) != FIRST_STACK_REG)
8785 /* If both the top of the 387 stack dies, and the other operand
8786 is also a stack register that dies, then this must be a
8787 `fcompp' float compare */
8791 /* There is no double popping fcomi variant. Fortunately,
8792 eflags is immune from the fstp's cc clobbering. */
8794 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8796 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8797 return output_387_ffreep (operands, 0);
8802 return "fucompp\n\tfnstsw\t%0";
8804 return "fcompp\n\tfnstsw\t%0";
8809 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8811 static const char * const alt[16] =
8813 "fcom%z2\t%y2\n\tfnstsw\t%0",
8814 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8815 "fucom%z2\t%y2\n\tfnstsw\t%0",
8816 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8818 "ficom%z2\t%y2\n\tfnstsw\t%0",
8819 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8823 "fcomi\t{%y1, %0|%0, %y1}",
8824 "fcomip\t{%y1, %0|%0, %y1}",
8825 "fucomi\t{%y1, %0|%0, %y1}",
8826 "fucomip\t{%y1, %0|%0, %y1}",
8837 mask = eflags_p << 3;
8838 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8839 mask |= unordered_p << 1;
8840 mask |= stack_top_dies;
8842 gcc_assert (mask < 16);
8851 ix86_output_addr_vec_elt (FILE *file, int value)
8853 const char *directive = ASM_LONG;
8857 directive = ASM_QUAD;
8859 gcc_assert (!TARGET_64BIT);
8862 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8866 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8869 fprintf (file, "%s%s%d-%s%d\n",
8870 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8871 else if (HAVE_AS_GOTOFF_IN_DATA)
8872 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8874 else if (TARGET_MACHO)
8876 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8877 machopic_output_function_base_name (file);
8878 fprintf(file, "\n");
8882 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8883 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8886 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8890 ix86_expand_clear (rtx dest)
8894 /* We play register width games, which are only valid after reload. */
8895 gcc_assert (reload_completed);
8897 /* Avoid HImode and its attendant prefix byte. */
8898 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8899 dest = gen_rtx_REG (SImode, REGNO (dest));
8901 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8903 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8904 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8906 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8907 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8913 /* X is an unchanging MEM. If it is a constant pool reference, return
8914 the constant pool rtx, else NULL. */
8917 maybe_get_pool_constant (rtx x)
8919 x = ix86_delegitimize_address (XEXP (x, 0));
8921 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8922 return get_pool_constant (x);
8928 ix86_expand_move (enum machine_mode mode, rtx operands[])
8930 int strict = (reload_in_progress || reload_completed);
8932 enum tls_model model;
8937 if (GET_CODE (op1) == SYMBOL_REF)
8939 model = SYMBOL_REF_TLS_MODEL (op1);
8942 op1 = legitimize_tls_address (op1, model, true);
8943 op1 = force_operand (op1, op0);
8948 else if (GET_CODE (op1) == CONST
8949 && GET_CODE (XEXP (op1, 0)) == PLUS
8950 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8952 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8955 rtx addend = XEXP (XEXP (op1, 0), 1);
8956 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8957 op1 = force_operand (op1, NULL);
8958 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8959 op0, 1, OPTAB_DIRECT);
8965 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8967 if (TARGET_MACHO && !TARGET_64BIT)
8972 rtx temp = ((reload_in_progress
8973 || ((op0 && GET_CODE (op0) == REG)
8975 ? op0 : gen_reg_rtx (Pmode));
8976 op1 = machopic_indirect_data_reference (op1, temp);
8977 op1 = machopic_legitimize_pic_address (op1, mode,
8978 temp == op1 ? 0 : temp);
8980 else if (MACHOPIC_INDIRECT)
8981 op1 = machopic_indirect_data_reference (op1, 0);
8988 if (GET_CODE (op0) == MEM)
8989 op1 = force_reg (Pmode, op1);
8991 op1 = legitimize_address (op1, op1, Pmode);
8996 if (GET_CODE (op0) == MEM
8997 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8998 || !push_operand (op0, mode))
8999 && GET_CODE (op1) == MEM)
9000 op1 = force_reg (mode, op1);
9002 if (push_operand (op0, mode)
9003 && ! general_no_elim_operand (op1, mode))
9004 op1 = copy_to_mode_reg (mode, op1);
9006 /* Force large constants in 64bit compilation into register
9007 to get them CSEed. */
9008 if (TARGET_64BIT && mode == DImode
9009 && immediate_operand (op1, mode)
9010 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9011 && !register_operand (op0, mode)
9012 && optimize && !reload_completed && !reload_in_progress)
9013 op1 = copy_to_mode_reg (mode, op1);
9015 if (FLOAT_MODE_P (mode))
9017 /* If we are loading a floating point constant to a register,
9018 force the value to memory now, since we'll get better code
9019 out the back end. */
9023 else if (GET_CODE (op1) == CONST_DOUBLE)
9025 op1 = validize_mem (force_const_mem (mode, op1));
9026 if (!register_operand (op0, mode))
9028 rtx temp = gen_reg_rtx (mode);
9029 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9030 emit_move_insn (op0, temp);
9037 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9041 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9043 rtx op0 = operands[0], op1 = operands[1];
9045 /* Force constants other than zero into memory. We do not know how
9046 the instructions used to build constants modify the upper 64 bits
9047 of the register, once we have that information we may be able
9048 to handle some of them more efficiently. */
9049 if ((reload_in_progress | reload_completed) == 0
9050 && register_operand (op0, mode)
9052 && standard_sse_constant_p (op1) <= 0)
9053 op1 = validize_mem (force_const_mem (mode, op1));
9055 /* Make operand1 a register if it isn't already. */
9057 && !register_operand (op0, mode)
9058 && !register_operand (op1, mode))
9060 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9064 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9067 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9068 straight to ix86_expand_vector_move. */
9071 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9080 /* If we're optimizing for size, movups is the smallest. */
9083 op0 = gen_lowpart (V4SFmode, op0);
9084 op1 = gen_lowpart (V4SFmode, op1);
9085 emit_insn (gen_sse_movups (op0, op1));
9089 /* ??? If we have typed data, then it would appear that using
9090 movdqu is the only way to get unaligned data loaded with
9092 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9094 op0 = gen_lowpart (V16QImode, op0);
9095 op1 = gen_lowpart (V16QImode, op1);
9096 emit_insn (gen_sse2_movdqu (op0, op1));
9100 if (TARGET_SSE2 && mode == V2DFmode)
9104 /* When SSE registers are split into halves, we can avoid
9105 writing to the top half twice. */
9106 if (TARGET_SSE_SPLIT_REGS)
9108 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9113 /* ??? Not sure about the best option for the Intel chips.
9114 The following would seem to satisfy; the register is
9115 entirely cleared, breaking the dependency chain. We
9116 then store to the upper half, with a dependency depth
9117 of one. A rumor has it that Intel recommends two movsd
9118 followed by an unpacklpd, but this is unconfirmed. And
9119 given that the dependency depth of the unpacklpd would
9120 still be one, I'm not sure why this would be better. */
9121 zero = CONST0_RTX (V2DFmode);
9124 m = adjust_address (op1, DFmode, 0);
9125 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9126 m = adjust_address (op1, DFmode, 8);
9127 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9131 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9132 emit_move_insn (op0, CONST0_RTX (mode));
9134 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9136 if (mode != V4SFmode)
9137 op0 = gen_lowpart (V4SFmode, op0);
9138 m = adjust_address (op1, V2SFmode, 0);
9139 emit_insn (gen_sse_loadlps (op0, op0, m));
9140 m = adjust_address (op1, V2SFmode, 8);
9141 emit_insn (gen_sse_loadhps (op0, op0, m));
9144 else if (MEM_P (op0))
9146 /* If we're optimizing for size, movups is the smallest. */
9149 op0 = gen_lowpart (V4SFmode, op0);
9150 op1 = gen_lowpart (V4SFmode, op1);
9151 emit_insn (gen_sse_movups (op0, op1));
9155 /* ??? Similar to above, only less clear because of quote
9156 typeless stores unquote. */
9157 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9158 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9160 op0 = gen_lowpart (V16QImode, op0);
9161 op1 = gen_lowpart (V16QImode, op1);
9162 emit_insn (gen_sse2_movdqu (op0, op1));
9166 if (TARGET_SSE2 && mode == V2DFmode)
9168 m = adjust_address (op0, DFmode, 0);
9169 emit_insn (gen_sse2_storelpd (m, op1));
9170 m = adjust_address (op0, DFmode, 8);
9171 emit_insn (gen_sse2_storehpd (m, op1));
9175 if (mode != V4SFmode)
9176 op1 = gen_lowpart (V4SFmode, op1);
9177 m = adjust_address (op0, V2SFmode, 0);
9178 emit_insn (gen_sse_storelps (m, op1));
9179 m = adjust_address (op0, V2SFmode, 8);
9180 emit_insn (gen_sse_storehps (m, op1));
9187 /* Expand a push in MODE. This is some mode for which we do not support
9188 proper push instructions, at least from the registers that we expect
9189 the value to live in. */
9192 ix86_expand_push (enum machine_mode mode, rtx x)
9196 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9197 GEN_INT (-GET_MODE_SIZE (mode)),
9198 stack_pointer_rtx, 1, OPTAB_DIRECT);
9199 if (tmp != stack_pointer_rtx)
9200 emit_move_insn (stack_pointer_rtx, tmp);
9202 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9203 emit_move_insn (tmp, x);
9206 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9207 destination to use for the operation. If different from the true
9208 destination in operands[0], a copy operation will be required. */
9211 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9214 int matching_memory;
9215 rtx src1, src2, dst;
9221 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9222 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9223 && (rtx_equal_p (dst, src2)
9224 || immediate_operand (src1, mode)))
9231 /* If the destination is memory, and we do not have matching source
9232 operands, do things in registers. */
9233 matching_memory = 0;
9234 if (GET_CODE (dst) == MEM)
9236 if (rtx_equal_p (dst, src1))
9237 matching_memory = 1;
9238 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9239 && rtx_equal_p (dst, src2))
9240 matching_memory = 2;
9242 dst = gen_reg_rtx (mode);
9245 /* Both source operands cannot be in memory. */
9246 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9248 if (matching_memory != 2)
9249 src2 = force_reg (mode, src2);
9251 src1 = force_reg (mode, src1);
9254 /* If the operation is not commutable, source 1 cannot be a constant
9255 or non-matching memory. */
9256 if ((CONSTANT_P (src1)
9257 || (!matching_memory && GET_CODE (src1) == MEM))
9258 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9259 src1 = force_reg (mode, src1);
9261 src1 = operands[1] = src1;
9262 src2 = operands[2] = src2;
9266 /* Similarly, but assume that the destination has already been
9270 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9271 enum machine_mode mode, rtx operands[])
9273 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9274 gcc_assert (dst == operands[0]);
9277 /* Attempt to expand a binary operator. Make the expansion closer to the
9278 actual machine, then just general_operand, which will allow 3 separate
9279 memory references (one output, two input) in a single insn. */
9282 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9285 rtx src1, src2, dst, op, clob;
9287 dst = ix86_fixup_binary_operands (code, mode, operands);
9291 /* Emit the instruction. */
9293 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9294 if (reload_in_progress)
9296 /* Reload doesn't know about the flags register, and doesn't know that
9297 it doesn't want to clobber it. We can only do this with PLUS. */
9298 gcc_assert (code == PLUS);
9303 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9304 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9307 /* Fix up the destination if needed. */
9308 if (dst != operands[0])
9309 emit_move_insn (operands[0], dst);
9312 /* Return TRUE or FALSE depending on whether the binary operator meets the
9313 appropriate constraints. */
9316 ix86_binary_operator_ok (enum rtx_code code,
9317 enum machine_mode mode ATTRIBUTE_UNUSED,
9320 /* Both source operands cannot be in memory. */
9321 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9323 /* If the operation is not commutable, source 1 cannot be a constant. */
9324 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9326 /* If the destination is memory, we must have a matching source operand. */
9327 if (GET_CODE (operands[0]) == MEM
9328 && ! (rtx_equal_p (operands[0], operands[1])
9329 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9330 && rtx_equal_p (operands[0], operands[2]))))
9332 /* If the operation is not commutable and the source 1 is memory, we must
9333 have a matching destination. */
9334 if (GET_CODE (operands[1]) == MEM
9335 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9336 && ! rtx_equal_p (operands[0], operands[1]))
9341 /* Attempt to expand a unary operator. Make the expansion closer to the
9342 actual machine, then just general_operand, which will allow 2 separate
9343 memory references (one output, one input) in a single insn. */
9346 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9349 int matching_memory;
9350 rtx src, dst, op, clob;
9355 /* If the destination is memory, and we do not have matching source
9356 operands, do things in registers. */
9357 matching_memory = 0;
9360 if (rtx_equal_p (dst, src))
9361 matching_memory = 1;
9363 dst = gen_reg_rtx (mode);
9366 /* When source operand is memory, destination must match. */
9367 if (MEM_P (src) && !matching_memory)
9368 src = force_reg (mode, src);
9370 /* Emit the instruction. */
9372 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9373 if (reload_in_progress || code == NOT)
9375 /* Reload doesn't know about the flags register, and doesn't know that
9376 it doesn't want to clobber it. */
9377 gcc_assert (code == NOT);
9382 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9383 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9386 /* Fix up the destination if needed. */
9387 if (dst != operands[0])
9388 emit_move_insn (operands[0], dst);
9391 /* Return TRUE or FALSE depending on whether the unary operator meets the
9392 appropriate constraints. */
9395 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9396 enum machine_mode mode ATTRIBUTE_UNUSED,
9397 rtx operands[2] ATTRIBUTE_UNUSED)
9399 /* If one of operands is memory, source and destination must match. */
9400 if ((GET_CODE (operands[0]) == MEM
9401 || GET_CODE (operands[1]) == MEM)
9402 && ! rtx_equal_p (operands[0], operands[1]))
9407 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9408 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9409 true, then replicate the mask for all elements of the vector register.
9410 If INVERT is true, then create a mask excluding the sign bit. */
9413 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9415 enum machine_mode vec_mode;
9416 HOST_WIDE_INT hi, lo;
9421 /* Find the sign bit, sign extended to 2*HWI. */
9423 lo = 0x80000000, hi = lo < 0;
9424 else if (HOST_BITS_PER_WIDE_INT >= 64)
9425 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9427 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9432 /* Force this value into the low part of a fp vector constant. */
9433 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9434 mask = gen_lowpart (mode, mask);
9439 v = gen_rtvec (4, mask, mask, mask, mask);
9441 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9442 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9443 vec_mode = V4SFmode;
9448 v = gen_rtvec (2, mask, mask);
9450 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9451 vec_mode = V2DFmode;
9454 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9457 /* Generate code for floating point ABS or NEG. */
9460 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9463 rtx mask, set, use, clob, dst, src;
9464 bool matching_memory;
9465 bool use_sse = false;
9466 bool vector_mode = VECTOR_MODE_P (mode);
9467 enum machine_mode elt_mode = mode;
9471 elt_mode = GET_MODE_INNER (mode);
9474 else if (TARGET_SSE_MATH)
9475 use_sse = SSE_FLOAT_MODE_P (mode);
9477 /* NEG and ABS performed with SSE use bitwise mask operations.
9478 Create the appropriate mask now. */
9480 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9487 /* If the destination is memory, and we don't have matching source
9488 operands or we're using the x87, do things in registers. */
9489 matching_memory = false;
9492 if (use_sse && rtx_equal_p (dst, src))
9493 matching_memory = true;
9495 dst = gen_reg_rtx (mode);
9497 if (MEM_P (src) && !matching_memory)
9498 src = force_reg (mode, src);
9502 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9503 set = gen_rtx_SET (VOIDmode, dst, set);
9508 set = gen_rtx_fmt_e (code, mode, src);
9509 set = gen_rtx_SET (VOIDmode, dst, set);
9512 use = gen_rtx_USE (VOIDmode, mask);
9513 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9514 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9515 gen_rtvec (3, set, use, clob)));
9521 if (dst != operands[0])
9522 emit_move_insn (operands[0], dst);
9525 /* Expand a copysign operation. Special case operand 0 being a constant. */
9528 ix86_expand_copysign (rtx operands[])
9530 enum machine_mode mode, vmode;
9531 rtx dest, op0, op1, mask, nmask;
9537 mode = GET_MODE (dest);
9538 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9540 if (GET_CODE (op0) == CONST_DOUBLE)
9544 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9545 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9547 if (op0 == CONST0_RTX (mode))
9548 op0 = CONST0_RTX (vmode);
9552 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9553 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9555 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9556 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9559 mask = ix86_build_signbit_mask (mode, 0, 0);
9562 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9564 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9568 nmask = ix86_build_signbit_mask (mode, 0, 1);
9569 mask = ix86_build_signbit_mask (mode, 0, 0);
9572 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9574 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9578 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9579 be a constant, and so has already been expanded into a vector constant. */
9582 ix86_split_copysign_const (rtx operands[])
9584 enum machine_mode mode, vmode;
9585 rtx dest, op0, op1, mask, x;
9592 mode = GET_MODE (dest);
9593 vmode = GET_MODE (mask);
9595 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9596 x = gen_rtx_AND (vmode, dest, mask);
9597 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9599 if (op0 != CONST0_RTX (vmode))
9601 x = gen_rtx_IOR (vmode, dest, op0);
9602 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9606 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9607 so we have to do two masks. */
9610 ix86_split_copysign_var (rtx operands[])
9612 enum machine_mode mode, vmode;
9613 rtx dest, scratch, op0, op1, mask, nmask, x;
9616 scratch = operands[1];
9619 nmask = operands[4];
9622 mode = GET_MODE (dest);
9623 vmode = GET_MODE (mask);
9625 if (rtx_equal_p (op0, op1))
9627 /* Shouldn't happen often (it's useless, obviously), but when it does
9628 we'd generate incorrect code if we continue below. */
9629 emit_move_insn (dest, op0);
9633 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9635 gcc_assert (REGNO (op1) == REGNO (scratch));
9637 x = gen_rtx_AND (vmode, scratch, mask);
9638 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9641 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9642 x = gen_rtx_NOT (vmode, dest);
9643 x = gen_rtx_AND (vmode, x, op0);
9644 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9648 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9650 x = gen_rtx_AND (vmode, scratch, mask);
9652 else /* alternative 2,4 */
9654 gcc_assert (REGNO (mask) == REGNO (scratch));
9655 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9656 x = gen_rtx_AND (vmode, scratch, op1);
9658 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9660 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9662 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9663 x = gen_rtx_AND (vmode, dest, nmask);
9665 else /* alternative 3,4 */
9667 gcc_assert (REGNO (nmask) == REGNO (dest));
9669 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9670 x = gen_rtx_AND (vmode, dest, op0);
9672 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9675 x = gen_rtx_IOR (vmode, dest, scratch);
9676 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9679 /* Return TRUE or FALSE depending on whether the first SET in INSN
9680 has source and destination with matching CC modes, and that the
9681 CC mode is at least as constrained as REQ_MODE. */
9684 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9687 enum machine_mode set_mode;
9689 set = PATTERN (insn);
9690 if (GET_CODE (set) == PARALLEL)
9691 set = XVECEXP (set, 0, 0);
9692 gcc_assert (GET_CODE (set) == SET);
9693 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9695 set_mode = GET_MODE (SET_DEST (set));
9699 if (req_mode != CCNOmode
9700 && (req_mode != CCmode
9701 || XEXP (SET_SRC (set), 1) != const0_rtx))
9705 if (req_mode == CCGCmode)
9709 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9713 if (req_mode == CCZmode)
9723 return (GET_MODE (SET_SRC (set)) == set_mode);
9726 /* Generate insn patterns to do an integer compare of OPERANDS. */
9729 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9731 enum machine_mode cmpmode;
9734 cmpmode = SELECT_CC_MODE (code, op0, op1);
9735 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9737 /* This is very simple, but making the interface the same as in the
9738 FP case makes the rest of the code easier. */
9739 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9740 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9742 /* Return the test that should be put into the flags user, i.e.
9743 the bcc, scc, or cmov instruction. */
9744 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9747 /* Figure out whether to use ordered or unordered fp comparisons.
9748 Return the appropriate mode to use. */
9751 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9753 /* ??? In order to make all comparisons reversible, we do all comparisons
9754 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9755 all forms trapping and nontrapping comparisons, we can make inequality
9756 comparisons trapping again, since it results in better code when using
9757 FCOM based compares. */
9758 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9762 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9764 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9765 return ix86_fp_compare_mode (code);
9768 /* Only zero flag is needed. */
9770 case NE: /* ZF!=0 */
9772 /* Codes needing carry flag. */
9773 case GEU: /* CF=0 */
9774 case GTU: /* CF=0 & ZF=0 */
9775 case LTU: /* CF=1 */
9776 case LEU: /* CF=1 | ZF=1 */
9778 /* Codes possibly doable only with sign flag when
9779 comparing against zero. */
9780 case GE: /* SF=OF or SF=0 */
9781 case LT: /* SF<>OF or SF=1 */
9782 if (op1 == const0_rtx)
9785 /* For other cases Carry flag is not required. */
9787 /* Codes doable only with sign flag when comparing
9788 against zero, but we miss jump instruction for it
9789 so we need to use relational tests against overflow
9790 that thus needs to be zero. */
9791 case GT: /* ZF=0 & SF=OF */
9792 case LE: /* ZF=1 | SF<>OF */
9793 if (op1 == const0_rtx)
9797 /* strcmp pattern do (use flags) and combine may ask us for proper
9806 /* Return the fixed registers used for condition codes. */
9809 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9816 /* If two condition code modes are compatible, return a condition code
9817 mode which is compatible with both. Otherwise, return
9820 static enum machine_mode
9821 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9826 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9829 if ((m1 == CCGCmode && m2 == CCGOCmode)
9830 || (m1 == CCGOCmode && m2 == CCGCmode))
9858 /* These are only compatible with themselves, which we already
9864 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9867 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9869 enum rtx_code swapped_code = swap_condition (code);
9870 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9871 || (ix86_fp_comparison_cost (swapped_code)
9872 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9875 /* Swap, force into registers, or otherwise massage the two operands
9876 to a fp comparison. The operands are updated in place; the new
9877 comparison code is returned. */
9879 static enum rtx_code
9880 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9882 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9883 rtx op0 = *pop0, op1 = *pop1;
9884 enum machine_mode op_mode = GET_MODE (op0);
9885 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9887 /* All of the unordered compare instructions only work on registers.
9888 The same is true of the fcomi compare instructions. The XFmode
9889 compare instructions require registers except when comparing
9890 against zero or when converting operand 1 from fixed point to
9894 && (fpcmp_mode == CCFPUmode
9895 || (op_mode == XFmode
9896 && ! (standard_80387_constant_p (op0) == 1
9897 || standard_80387_constant_p (op1) == 1)
9898 && GET_CODE (op1) != FLOAT)
9899 || ix86_use_fcomi_compare (code)))
9901 op0 = force_reg (op_mode, op0);
9902 op1 = force_reg (op_mode, op1);
9906 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9907 things around if they appear profitable, otherwise force op0
9910 if (standard_80387_constant_p (op0) == 0
9911 || (GET_CODE (op0) == MEM
9912 && ! (standard_80387_constant_p (op1) == 0
9913 || GET_CODE (op1) == MEM)))
9916 tmp = op0, op0 = op1, op1 = tmp;
9917 code = swap_condition (code);
9920 if (GET_CODE (op0) != REG)
9921 op0 = force_reg (op_mode, op0);
9923 if (CONSTANT_P (op1))
9925 int tmp = standard_80387_constant_p (op1);
9927 op1 = validize_mem (force_const_mem (op_mode, op1));
9931 op1 = force_reg (op_mode, op1);
9934 op1 = force_reg (op_mode, op1);
9938 /* Try to rearrange the comparison to make it cheaper. */
9939 if (ix86_fp_comparison_cost (code)
9940 > ix86_fp_comparison_cost (swap_condition (code))
9941 && (GET_CODE (op1) == REG || !no_new_pseudos))
9944 tmp = op0, op0 = op1, op1 = tmp;
9945 code = swap_condition (code);
9946 if (GET_CODE (op0) != REG)
9947 op0 = force_reg (op_mode, op0);
9955 /* Convert comparison codes we use to represent FP comparison to integer
9956 code that will result in proper branch. Return UNKNOWN if no such code
9960 ix86_fp_compare_code_to_integer (enum rtx_code code)
9989 /* Split comparison code CODE into comparisons we can do using branch
9990 instructions. BYPASS_CODE is comparison code for branch that will
9991 branch around FIRST_CODE and SECOND_CODE. If some of branches
9992 is not required, set value to UNKNOWN.
9993 We never require more than two branches. */
9996 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9997 enum rtx_code *first_code,
9998 enum rtx_code *second_code)
10000 *first_code = code;
10001 *bypass_code = UNKNOWN;
10002 *second_code = UNKNOWN;
10004 /* The fcomi comparison sets flags as follows:
10014 case GT: /* GTU - CF=0 & ZF=0 */
10015 case GE: /* GEU - CF=0 */
10016 case ORDERED: /* PF=0 */
10017 case UNORDERED: /* PF=1 */
10018 case UNEQ: /* EQ - ZF=1 */
10019 case UNLT: /* LTU - CF=1 */
10020 case UNLE: /* LEU - CF=1 | ZF=1 */
10021 case LTGT: /* EQ - ZF=0 */
10023 case LT: /* LTU - CF=1 - fails on unordered */
10024 *first_code = UNLT;
10025 *bypass_code = UNORDERED;
10027 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10028 *first_code = UNLE;
10029 *bypass_code = UNORDERED;
10031 case EQ: /* EQ - ZF=1 - fails on unordered */
10032 *first_code = UNEQ;
10033 *bypass_code = UNORDERED;
10035 case NE: /* NE - ZF=0 - fails on unordered */
10036 *first_code = LTGT;
10037 *second_code = UNORDERED;
10039 case UNGE: /* GEU - CF=0 - fails on unordered */
10041 *second_code = UNORDERED;
10043 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10045 *second_code = UNORDERED;
10048 gcc_unreachable ();
10050 if (!TARGET_IEEE_FP)
10052 *second_code = UNKNOWN;
10053 *bypass_code = UNKNOWN;
10057 /* Return cost of comparison done fcom + arithmetics operations on AX.
10058 All following functions do use number of instructions as a cost metrics.
10059 In future this should be tweaked to compute bytes for optimize_size and
10060 take into account performance of various instructions on various CPUs. */
10062 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10064 if (!TARGET_IEEE_FP)
10066 /* The cost of code output by ix86_expand_fp_compare. */
10090 gcc_unreachable ();
10094 /* Return cost of comparison done using fcomi operation.
10095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10097 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10099 enum rtx_code bypass_code, first_code, second_code;
10100 /* Return arbitrarily high cost when instruction is not supported - this
10101 prevents gcc from using it. */
10104 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10105 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10108 /* Return cost of comparison done using sahf operation.
10109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10111 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10113 enum rtx_code bypass_code, first_code, second_code;
10114 /* Return arbitrarily high cost when instruction is not preferred - this
10115 avoids gcc from using it. */
10116 if (!TARGET_USE_SAHF && !optimize_size)
10118 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10119 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10122 /* Compute cost of the comparison done using any method.
10123 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10125 ix86_fp_comparison_cost (enum rtx_code code)
10127 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10130 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10131 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10133 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10134 if (min > sahf_cost)
10136 if (min > fcomi_cost)
10141 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10144 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10145 rtx *second_test, rtx *bypass_test)
10147 enum machine_mode fpcmp_mode, intcmp_mode;
10149 int cost = ix86_fp_comparison_cost (code);
10150 enum rtx_code bypass_code, first_code, second_code;
10152 fpcmp_mode = ix86_fp_compare_mode (code);
10153 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10156 *second_test = NULL_RTX;
10158 *bypass_test = NULL_RTX;
10160 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10162 /* Do fcomi/sahf based test when profitable. */
10163 if ((bypass_code == UNKNOWN || bypass_test)
10164 && (second_code == UNKNOWN || second_test)
10165 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10169 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10170 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10176 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10177 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10179 scratch = gen_reg_rtx (HImode);
10180 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10181 emit_insn (gen_x86_sahf_1 (scratch));
10184 /* The FP codes work out to act like unsigned. */
10185 intcmp_mode = fpcmp_mode;
10187 if (bypass_code != UNKNOWN)
10188 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10189 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10191 if (second_code != UNKNOWN)
10192 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10193 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10198 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10200 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10202 scratch = gen_reg_rtx (HImode);
10203 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10205 /* In the unordered case, we have to check C2 for NaN's, which
10206 doesn't happen to work out to anything nice combination-wise.
10207 So do some bit twiddling on the value we've got in AH to come
10208 up with an appropriate set of condition codes. */
10210 intcmp_mode = CCNOmode;
10215 if (code == GT || !TARGET_IEEE_FP)
10217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10225 intcmp_mode = CCmode;
10231 if (code == LT && TARGET_IEEE_FP)
10233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10234 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10235 intcmp_mode = CCmode;
10240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10246 if (code == GE || !TARGET_IEEE_FP)
10248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10254 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10261 if (code == LE && TARGET_IEEE_FP)
10263 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10264 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10265 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10266 intcmp_mode = CCmode;
10271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10277 if (code == EQ && TARGET_IEEE_FP)
10279 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10280 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10281 intcmp_mode = CCmode;
10286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10293 if (code == NE && TARGET_IEEE_FP)
10295 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10296 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10302 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10317 gcc_unreachable ();
10321 /* Return the test that should be put into the flags user, i.e.
10322 the bcc, scc, or cmov instruction. */
10323 return gen_rtx_fmt_ee (code, VOIDmode,
10324 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10329 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10332 op0 = ix86_compare_op0;
10333 op1 = ix86_compare_op1;
10336 *second_test = NULL_RTX;
10338 *bypass_test = NULL_RTX;
10340 if (ix86_compare_emitted)
10342 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10343 ix86_compare_emitted = NULL_RTX;
10345 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10346 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10347 second_test, bypass_test);
10349 ret = ix86_expand_int_compare (code, op0, op1);
10354 /* Return true if the CODE will result in nontrivial jump sequence. */
10356 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10358 enum rtx_code bypass_code, first_code, second_code;
10361 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10362 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10366 ix86_expand_branch (enum rtx_code code, rtx label)
10370 /* If we have emitted a compare insn, go straight to simple.
10371 ix86_expand_compare won't emit anything if ix86_compare_emitted
10373 if (ix86_compare_emitted)
10376 switch (GET_MODE (ix86_compare_op0))
10382 tmp = ix86_expand_compare (code, NULL, NULL);
10383 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10384 gen_rtx_LABEL_REF (VOIDmode, label),
10386 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10395 enum rtx_code bypass_code, first_code, second_code;
10397 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10398 &ix86_compare_op1);
10400 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10402 /* Check whether we will use the natural sequence with one jump. If
10403 so, we can expand jump early. Otherwise delay expansion by
10404 creating compound insn to not confuse optimizers. */
10405 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10408 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10409 gen_rtx_LABEL_REF (VOIDmode, label),
10410 pc_rtx, NULL_RTX, NULL_RTX);
10414 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10415 ix86_compare_op0, ix86_compare_op1);
10416 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10417 gen_rtx_LABEL_REF (VOIDmode, label),
10419 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10421 use_fcomi = ix86_use_fcomi_compare (code);
10422 vec = rtvec_alloc (3 + !use_fcomi);
10423 RTVEC_ELT (vec, 0) = tmp;
10425 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10427 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10430 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10432 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10441 /* Expand DImode branch into multiple compare+branch. */
10443 rtx lo[2], hi[2], label2;
10444 enum rtx_code code1, code2, code3;
10445 enum machine_mode submode;
10447 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10449 tmp = ix86_compare_op0;
10450 ix86_compare_op0 = ix86_compare_op1;
10451 ix86_compare_op1 = tmp;
10452 code = swap_condition (code);
10454 if (GET_MODE (ix86_compare_op0) == DImode)
10456 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10457 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10462 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10463 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10467 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10468 avoid two branches. This costs one extra insn, so disable when
10469 optimizing for size. */
10471 if ((code == EQ || code == NE)
10473 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10478 if (hi[1] != const0_rtx)
10479 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10480 NULL_RTX, 0, OPTAB_WIDEN);
10483 if (lo[1] != const0_rtx)
10484 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10485 NULL_RTX, 0, OPTAB_WIDEN);
10487 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10488 NULL_RTX, 0, OPTAB_WIDEN);
10490 ix86_compare_op0 = tmp;
10491 ix86_compare_op1 = const0_rtx;
10492 ix86_expand_branch (code, label);
10496 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10497 op1 is a constant and the low word is zero, then we can just
10498 examine the high word. */
10500 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10503 case LT: case LTU: case GE: case GEU:
10504 ix86_compare_op0 = hi[0];
10505 ix86_compare_op1 = hi[1];
10506 ix86_expand_branch (code, label);
10512 /* Otherwise, we need two or three jumps. */
10514 label2 = gen_label_rtx ();
10517 code2 = swap_condition (code);
10518 code3 = unsigned_condition (code);
10522 case LT: case GT: case LTU: case GTU:
10525 case LE: code1 = LT; code2 = GT; break;
10526 case GE: code1 = GT; code2 = LT; break;
10527 case LEU: code1 = LTU; code2 = GTU; break;
10528 case GEU: code1 = GTU; code2 = LTU; break;
10530 case EQ: code1 = UNKNOWN; code2 = NE; break;
10531 case NE: code2 = UNKNOWN; break;
10534 gcc_unreachable ();
10539 * if (hi(a) < hi(b)) goto true;
10540 * if (hi(a) > hi(b)) goto false;
10541 * if (lo(a) < lo(b)) goto true;
10545 ix86_compare_op0 = hi[0];
10546 ix86_compare_op1 = hi[1];
10548 if (code1 != UNKNOWN)
10549 ix86_expand_branch (code1, label);
10550 if (code2 != UNKNOWN)
10551 ix86_expand_branch (code2, label2);
10553 ix86_compare_op0 = lo[0];
10554 ix86_compare_op1 = lo[1];
10555 ix86_expand_branch (code3, label);
10557 if (code2 != UNKNOWN)
10558 emit_label (label2);
10563 gcc_unreachable ();
10567 /* Split branch based on floating point condition. */
10569 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10570 rtx target1, rtx target2, rtx tmp, rtx pushed)
10572 rtx second, bypass;
10573 rtx label = NULL_RTX;
10575 int bypass_probability = -1, second_probability = -1, probability = -1;
10578 if (target2 != pc_rtx)
10581 code = reverse_condition_maybe_unordered (code);
10586 condition = ix86_expand_fp_compare (code, op1, op2,
10587 tmp, &second, &bypass);
10589 /* Remove pushed operand from stack. */
10591 ix86_free_from_memory (GET_MODE (pushed));
10593 if (split_branch_probability >= 0)
10595 /* Distribute the probabilities across the jumps.
10596 Assume the BYPASS and SECOND to be always test
10598 probability = split_branch_probability;
10600 /* Value of 1 is low enough to make no need for probability
10601 to be updated. Later we may run some experiments and see
10602 if unordered values are more frequent in practice. */
10604 bypass_probability = 1;
10606 second_probability = 1;
10608 if (bypass != NULL_RTX)
10610 label = gen_label_rtx ();
10611 i = emit_jump_insn (gen_rtx_SET
10613 gen_rtx_IF_THEN_ELSE (VOIDmode,
10615 gen_rtx_LABEL_REF (VOIDmode,
10618 if (bypass_probability >= 0)
10620 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10621 GEN_INT (bypass_probability),
10624 i = emit_jump_insn (gen_rtx_SET
10626 gen_rtx_IF_THEN_ELSE (VOIDmode,
10627 condition, target1, target2)));
10628 if (probability >= 0)
10630 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10631 GEN_INT (probability),
10633 if (second != NULL_RTX)
10635 i = emit_jump_insn (gen_rtx_SET
10637 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10639 if (second_probability >= 0)
10641 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10642 GEN_INT (second_probability),
10645 if (label != NULL_RTX)
10646 emit_label (label);
10650 ix86_expand_setcc (enum rtx_code code, rtx dest)
10652 rtx ret, tmp, tmpreg, equiv;
10653 rtx second_test, bypass_test;
10655 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10656 return 0; /* FAIL */
10658 gcc_assert (GET_MODE (dest) == QImode);
10660 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10661 PUT_MODE (ret, QImode);
10666 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10667 if (bypass_test || second_test)
10669 rtx test = second_test;
10671 rtx tmp2 = gen_reg_rtx (QImode);
10674 gcc_assert (!second_test);
10675 test = bypass_test;
10677 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10679 PUT_MODE (test, QImode);
10680 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10683 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10685 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10688 /* Attach a REG_EQUAL note describing the comparison result. */
10689 if (ix86_compare_op0 && ix86_compare_op1)
10691 equiv = simplify_gen_relational (code, QImode,
10692 GET_MODE (ix86_compare_op0),
10693 ix86_compare_op0, ix86_compare_op1);
10694 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10697 return 1; /* DONE */
10700 /* Expand comparison setting or clearing carry flag. Return true when
10701 successful and set pop for the operation. */
10703 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10705 enum machine_mode mode =
10706 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10708 /* Do not handle DImode compares that go through special path. Also we can't
10709 deal with FP compares yet. This is possible to add. */
10710 if (mode == (TARGET_64BIT ? TImode : DImode))
10712 if (FLOAT_MODE_P (mode))
10714 rtx second_test = NULL, bypass_test = NULL;
10715 rtx compare_op, compare_seq;
10717 /* Shortcut: following common codes never translate into carry flag compares. */
10718 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10719 || code == ORDERED || code == UNORDERED)
10722 /* These comparisons require zero flag; swap operands so they won't. */
10723 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10724 && !TARGET_IEEE_FP)
10729 code = swap_condition (code);
10732 /* Try to expand the comparison and verify that we end up with carry flag
10733 based comparison. This is fails to be true only when we decide to expand
10734 comparison using arithmetic that is not too common scenario. */
10736 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10737 &second_test, &bypass_test);
10738 compare_seq = get_insns ();
10741 if (second_test || bypass_test)
10743 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10744 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10745 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10747 code = GET_CODE (compare_op);
10748 if (code != LTU && code != GEU)
10750 emit_insn (compare_seq);
10754 if (!INTEGRAL_MODE_P (mode))
10762 /* Convert a==0 into (unsigned)a<1. */
10765 if (op1 != const0_rtx)
10768 code = (code == EQ ? LTU : GEU);
10771 /* Convert a>b into b<a or a>=b-1. */
10774 if (GET_CODE (op1) == CONST_INT)
10776 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10777 /* Bail out on overflow. We still can swap operands but that
10778 would force loading of the constant into register. */
10779 if (op1 == const0_rtx
10780 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10782 code = (code == GTU ? GEU : LTU);
10789 code = (code == GTU ? LTU : GEU);
10793 /* Convert a>=0 into (unsigned)a<0x80000000. */
10796 if (mode == DImode || op1 != const0_rtx)
10798 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10799 code = (code == LT ? GEU : LTU);
10803 if (mode == DImode || op1 != constm1_rtx)
10805 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10806 code = (code == LE ? GEU : LTU);
10812 /* Swapping operands may cause constant to appear as first operand. */
10813 if (!nonimmediate_operand (op0, VOIDmode))
10815 if (no_new_pseudos)
10817 op0 = force_reg (mode, op0);
10819 ix86_compare_op0 = op0;
10820 ix86_compare_op1 = op1;
10821 *pop = ix86_expand_compare (code, NULL, NULL);
10822 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10827 ix86_expand_int_movcc (rtx operands[])
10829 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10830 rtx compare_seq, compare_op;
10831 rtx second_test, bypass_test;
10832 enum machine_mode mode = GET_MODE (operands[0]);
10833 bool sign_bit_compare_p = false;;
10836 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10837 compare_seq = get_insns ();
10840 compare_code = GET_CODE (compare_op);
10842 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10843 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10844 sign_bit_compare_p = true;
10846 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10847 HImode insns, we'd be swallowed in word prefix ops. */
10849 if ((mode != HImode || TARGET_FAST_PREFIX)
10850 && (mode != (TARGET_64BIT ? TImode : DImode))
10851 && GET_CODE (operands[2]) == CONST_INT
10852 && GET_CODE (operands[3]) == CONST_INT)
10854 rtx out = operands[0];
10855 HOST_WIDE_INT ct = INTVAL (operands[2]);
10856 HOST_WIDE_INT cf = INTVAL (operands[3]);
10857 HOST_WIDE_INT diff;
10860 /* Sign bit compares are better done using shifts than we do by using
10862 if (sign_bit_compare_p
10863 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10864 ix86_compare_op1, &compare_op))
10866 /* Detect overlap between destination and compare sources. */
10869 if (!sign_bit_compare_p)
10871 bool fpcmp = false;
10873 compare_code = GET_CODE (compare_op);
10875 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10876 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10879 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10882 /* To simplify rest of code, restrict to the GEU case. */
10883 if (compare_code == LTU)
10885 HOST_WIDE_INT tmp = ct;
10888 compare_code = reverse_condition (compare_code);
10889 code = reverse_condition (code);
10894 PUT_CODE (compare_op,
10895 reverse_condition_maybe_unordered
10896 (GET_CODE (compare_op)));
10898 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10902 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10903 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10904 tmp = gen_reg_rtx (mode);
10906 if (mode == DImode)
10907 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10909 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10913 if (code == GT || code == GE)
10914 code = reverse_condition (code);
10917 HOST_WIDE_INT tmp = ct;
10922 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10923 ix86_compare_op1, VOIDmode, 0, -1);
10936 tmp = expand_simple_binop (mode, PLUS,
10938 copy_rtx (tmp), 1, OPTAB_DIRECT);
10949 tmp = expand_simple_binop (mode, IOR,
10951 copy_rtx (tmp), 1, OPTAB_DIRECT);
10953 else if (diff == -1 && ct)
10963 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10965 tmp = expand_simple_binop (mode, PLUS,
10966 copy_rtx (tmp), GEN_INT (cf),
10967 copy_rtx (tmp), 1, OPTAB_DIRECT);
10975 * andl cf - ct, dest
10985 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10988 tmp = expand_simple_binop (mode, AND,
10990 gen_int_mode (cf - ct, mode),
10991 copy_rtx (tmp), 1, OPTAB_DIRECT);
10993 tmp = expand_simple_binop (mode, PLUS,
10994 copy_rtx (tmp), GEN_INT (ct),
10995 copy_rtx (tmp), 1, OPTAB_DIRECT);
10998 if (!rtx_equal_p (tmp, out))
10999 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11001 return 1; /* DONE */
11007 tmp = ct, ct = cf, cf = tmp;
11009 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11011 /* We may be reversing unordered compare to normal compare, that
11012 is not valid in general (we may convert non-trapping condition
11013 to trapping one), however on i386 we currently emit all
11014 comparisons unordered. */
11015 compare_code = reverse_condition_maybe_unordered (compare_code);
11016 code = reverse_condition_maybe_unordered (code);
11020 compare_code = reverse_condition (compare_code);
11021 code = reverse_condition (code);
11025 compare_code = UNKNOWN;
11026 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11027 && GET_CODE (ix86_compare_op1) == CONST_INT)
11029 if (ix86_compare_op1 == const0_rtx
11030 && (code == LT || code == GE))
11031 compare_code = code;
11032 else if (ix86_compare_op1 == constm1_rtx)
11036 else if (code == GT)
11041 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11042 if (compare_code != UNKNOWN
11043 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11044 && (cf == -1 || ct == -1))
11046 /* If lea code below could be used, only optimize
11047 if it results in a 2 insn sequence. */
11049 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11050 || diff == 3 || diff == 5 || diff == 9)
11051 || (compare_code == LT && ct == -1)
11052 || (compare_code == GE && cf == -1))
11055 * notl op1 (if necessary)
11063 code = reverse_condition (code);
11066 out = emit_store_flag (out, code, ix86_compare_op0,
11067 ix86_compare_op1, VOIDmode, 0, -1);
11069 out = expand_simple_binop (mode, IOR,
11071 out, 1, OPTAB_DIRECT);
11072 if (out != operands[0])
11073 emit_move_insn (operands[0], out);
11075 return 1; /* DONE */
11080 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11081 || diff == 3 || diff == 5 || diff == 9)
11082 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11084 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11090 * lea cf(dest*(ct-cf)),dest
11094 * This also catches the degenerate setcc-only case.
11100 out = emit_store_flag (out, code, ix86_compare_op0,
11101 ix86_compare_op1, VOIDmode, 0, 1);
11104 /* On x86_64 the lea instruction operates on Pmode, so we need
11105 to get arithmetics done in proper mode to match. */
11107 tmp = copy_rtx (out);
11111 out1 = copy_rtx (out);
11112 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11116 tmp = gen_rtx_PLUS (mode, tmp, out1);
11122 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11125 if (!rtx_equal_p (tmp, out))
11128 out = force_operand (tmp, copy_rtx (out));
11130 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11132 if (!rtx_equal_p (out, operands[0]))
11133 emit_move_insn (operands[0], copy_rtx (out));
11135 return 1; /* DONE */
11139 * General case: Jumpful:
11140 * xorl dest,dest cmpl op1, op2
11141 * cmpl op1, op2 movl ct, dest
11142 * setcc dest jcc 1f
11143 * decl dest movl cf, dest
11144 * andl (cf-ct),dest 1:
11147 * Size 20. Size 14.
11149 * This is reasonably steep, but branch mispredict costs are
11150 * high on modern cpus, so consider failing only if optimizing
11154 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11155 && BRANCH_COST >= 2)
11161 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11162 /* We may be reversing unordered compare to normal compare,
11163 that is not valid in general (we may convert non-trapping
11164 condition to trapping one), however on i386 we currently
11165 emit all comparisons unordered. */
11166 code = reverse_condition_maybe_unordered (code);
11169 code = reverse_condition (code);
11170 if (compare_code != UNKNOWN)
11171 compare_code = reverse_condition (compare_code);
11175 if (compare_code != UNKNOWN)
11177 /* notl op1 (if needed)
11182 For x < 0 (resp. x <= -1) there will be no notl,
11183 so if possible swap the constants to get rid of the
11185 True/false will be -1/0 while code below (store flag
11186 followed by decrement) is 0/-1, so the constants need
11187 to be exchanged once more. */
11189 if (compare_code == GE || !cf)
11191 code = reverse_condition (code);
11196 HOST_WIDE_INT tmp = cf;
11201 out = emit_store_flag (out, code, ix86_compare_op0,
11202 ix86_compare_op1, VOIDmode, 0, -1);
11206 out = emit_store_flag (out, code, ix86_compare_op0,
11207 ix86_compare_op1, VOIDmode, 0, 1);
11209 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11210 copy_rtx (out), 1, OPTAB_DIRECT);
11213 out = expand_simple_binop (mode, AND, copy_rtx (out),
11214 gen_int_mode (cf - ct, mode),
11215 copy_rtx (out), 1, OPTAB_DIRECT);
11217 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11218 copy_rtx (out), 1, OPTAB_DIRECT);
11219 if (!rtx_equal_p (out, operands[0]))
11220 emit_move_insn (operands[0], copy_rtx (out));
11222 return 1; /* DONE */
11226 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11228 /* Try a few things more with specific constants and a variable. */
11231 rtx var, orig_out, out, tmp;
11233 if (BRANCH_COST <= 2)
11234 return 0; /* FAIL */
11236 /* If one of the two operands is an interesting constant, load a
11237 constant with the above and mask it in with a logical operation. */
11239 if (GET_CODE (operands[2]) == CONST_INT)
11242 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11243 operands[3] = constm1_rtx, op = and_optab;
11244 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11245 operands[3] = const0_rtx, op = ior_optab;
11247 return 0; /* FAIL */
11249 else if (GET_CODE (operands[3]) == CONST_INT)
11252 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11253 operands[2] = constm1_rtx, op = and_optab;
11254 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11255 operands[2] = const0_rtx, op = ior_optab;
11257 return 0; /* FAIL */
11260 return 0; /* FAIL */
11262 orig_out = operands[0];
11263 tmp = gen_reg_rtx (mode);
11266 /* Recurse to get the constant loaded. */
11267 if (ix86_expand_int_movcc (operands) == 0)
11268 return 0; /* FAIL */
11270 /* Mask in the interesting variable. */
11271 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11273 if (!rtx_equal_p (out, orig_out))
11274 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11276 return 1; /* DONE */
11280 * For comparison with above,
11290 if (! nonimmediate_operand (operands[2], mode))
11291 operands[2] = force_reg (mode, operands[2]);
11292 if (! nonimmediate_operand (operands[3], mode))
11293 operands[3] = force_reg (mode, operands[3]);
11295 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11297 rtx tmp = gen_reg_rtx (mode);
11298 emit_move_insn (tmp, operands[3]);
11301 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11303 rtx tmp = gen_reg_rtx (mode);
11304 emit_move_insn (tmp, operands[2]);
11308 if (! register_operand (operands[2], VOIDmode)
11310 || ! register_operand (operands[3], VOIDmode)))
11311 operands[2] = force_reg (mode, operands[2]);
11314 && ! register_operand (operands[3], VOIDmode))
11315 operands[3] = force_reg (mode, operands[3]);
11317 emit_insn (compare_seq);
11318 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11319 gen_rtx_IF_THEN_ELSE (mode,
11320 compare_op, operands[2],
11323 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11324 gen_rtx_IF_THEN_ELSE (mode,
11326 copy_rtx (operands[3]),
11327 copy_rtx (operands[0]))));
11329 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11330 gen_rtx_IF_THEN_ELSE (mode,
11332 copy_rtx (operands[2]),
11333 copy_rtx (operands[0]))));
11335 return 1; /* DONE */
11338 /* Swap, force into registers, or otherwise massage the two operands
11339 to an sse comparison with a mask result. Thus we differ a bit from
11340 ix86_prepare_fp_compare_args which expects to produce a flags result.
11342 The DEST operand exists to help determine whether to commute commutative
11343 operators. The POP0/POP1 operands are updated in place. The new
11344 comparison code is returned, or UNKNOWN if not implementable. */
11346 static enum rtx_code
11347 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11348 rtx *pop0, rtx *pop1)
11356 /* We have no LTGT as an operator. We could implement it with
11357 NE & ORDERED, but this requires an extra temporary. It's
11358 not clear that it's worth it. */
11365 /* These are supported directly. */
11372 /* For commutative operators, try to canonicalize the destination
11373 operand to be first in the comparison - this helps reload to
11374 avoid extra moves. */
11375 if (!dest || !rtx_equal_p (dest, *pop1))
11383 /* These are not supported directly. Swap the comparison operands
11384 to transform into something that is supported. */
11388 code = swap_condition (code);
11392 gcc_unreachable ();
11398 /* Detect conditional moves that exactly match min/max operational
11399 semantics. Note that this is IEEE safe, as long as we don't
11400 interchange the operands.
11402 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11403 and TRUE if the operation is successful and instructions are emitted. */
11406 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11407 rtx cmp_op1, rtx if_true, rtx if_false)
11409 enum machine_mode mode;
11415 else if (code == UNGE)
11418 if_true = if_false;
11424 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11426 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11431 mode = GET_MODE (dest);
11433 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11434 but MODE may be a vector mode and thus not appropriate. */
11435 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11437 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11440 if_true = force_reg (mode, if_true);
11441 v = gen_rtvec (2, if_true, if_false);
11442 tmp = gen_rtx_UNSPEC (mode, v, u);
11446 code = is_min ? SMIN : SMAX;
11447 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11450 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11454 /* Expand an sse vector comparison. Return the register with the result. */
11457 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11458 rtx op_true, rtx op_false)
11460 enum machine_mode mode = GET_MODE (dest);
11463 cmp_op0 = force_reg (mode, cmp_op0);
11464 if (!nonimmediate_operand (cmp_op1, mode))
11465 cmp_op1 = force_reg (mode, cmp_op1);
11468 || reg_overlap_mentioned_p (dest, op_true)
11469 || reg_overlap_mentioned_p (dest, op_false))
11470 dest = gen_reg_rtx (mode);
11472 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11473 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11478 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11479 operations. This is used for both scalar and vector conditional moves. */
11482 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11484 enum machine_mode mode = GET_MODE (dest);
11487 if (op_false == CONST0_RTX (mode))
11489 op_true = force_reg (mode, op_true);
11490 x = gen_rtx_AND (mode, cmp, op_true);
11491 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11493 else if (op_true == CONST0_RTX (mode))
11495 op_false = force_reg (mode, op_false);
11496 x = gen_rtx_NOT (mode, cmp);
11497 x = gen_rtx_AND (mode, x, op_false);
11498 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11502 op_true = force_reg (mode, op_true);
11503 op_false = force_reg (mode, op_false);
11505 t2 = gen_reg_rtx (mode);
11507 t3 = gen_reg_rtx (mode);
11511 x = gen_rtx_AND (mode, op_true, cmp);
11512 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11514 x = gen_rtx_NOT (mode, cmp);
11515 x = gen_rtx_AND (mode, x, op_false);
11516 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11518 x = gen_rtx_IOR (mode, t3, t2);
11519 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11523 /* Expand a floating-point conditional move. Return true if successful. */
11526 ix86_expand_fp_movcc (rtx operands[])
11528 enum machine_mode mode = GET_MODE (operands[0]);
11529 enum rtx_code code = GET_CODE (operands[1]);
11530 rtx tmp, compare_op, second_test, bypass_test;
11532 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11534 enum machine_mode cmode;
11536 /* Since we've no cmove for sse registers, don't force bad register
11537 allocation just to gain access to it. Deny movcc when the
11538 comparison mode doesn't match the move mode. */
11539 cmode = GET_MODE (ix86_compare_op0);
11540 if (cmode == VOIDmode)
11541 cmode = GET_MODE (ix86_compare_op1);
11545 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11547 &ix86_compare_op1);
11548 if (code == UNKNOWN)
11551 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11552 ix86_compare_op1, operands[2],
11556 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11557 ix86_compare_op1, operands[2], operands[3]);
11558 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11562 /* The floating point conditional move instructions don't directly
11563 support conditions resulting from a signed integer comparison. */
11565 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11567 /* The floating point conditional move instructions don't directly
11568 support signed integer comparisons. */
11570 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11572 gcc_assert (!second_test && !bypass_test);
11573 tmp = gen_reg_rtx (QImode);
11574 ix86_expand_setcc (code, tmp);
11576 ix86_compare_op0 = tmp;
11577 ix86_compare_op1 = const0_rtx;
11578 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11580 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11582 tmp = gen_reg_rtx (mode);
11583 emit_move_insn (tmp, operands[3]);
11586 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11588 tmp = gen_reg_rtx (mode);
11589 emit_move_insn (tmp, operands[2]);
11593 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11594 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11595 operands[2], operands[3])));
11597 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11598 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11599 operands[3], operands[0])));
11601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11602 gen_rtx_IF_THEN_ELSE (mode, second_test,
11603 operands[2], operands[0])));
11608 /* Expand a floating-point vector conditional move; a vcond operation
11609 rather than a movcc operation. */
11612 ix86_expand_fp_vcond (rtx operands[])
11614 enum rtx_code code = GET_CODE (operands[3]);
11617 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11618 &operands[4], &operands[5]);
11619 if (code == UNKNOWN)
11622 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11623 operands[5], operands[1], operands[2]))
11626 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11627 operands[1], operands[2]);
11628 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11632 /* Expand a signed integral vector conditional move. */
11635 ix86_expand_int_vcond (rtx operands[])
11637 enum machine_mode mode = GET_MODE (operands[0]);
11638 enum rtx_code code = GET_CODE (operands[3]);
11639 bool negate = false;
11642 cop0 = operands[4];
11643 cop1 = operands[5];
11645 /* Canonicalize the comparison to EQ, GT, GTU. */
11656 code = reverse_condition (code);
11662 code = reverse_condition (code);
11668 code = swap_condition (code);
11669 x = cop0, cop0 = cop1, cop1 = x;
11673 gcc_unreachable ();
11676 /* Unsigned parallel compare is not supported by the hardware. Play some
11677 tricks to turn this into a signed comparison against 0. */
11680 cop0 = force_reg (mode, cop0);
11688 /* Perform a parallel modulo subtraction. */
11689 t1 = gen_reg_rtx (mode);
11690 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11692 /* Extract the original sign bit of op0. */
11693 mask = GEN_INT (-0x80000000);
11694 mask = gen_rtx_CONST_VECTOR (mode,
11695 gen_rtvec (4, mask, mask, mask, mask));
11696 mask = force_reg (mode, mask);
11697 t2 = gen_reg_rtx (mode);
11698 emit_insn (gen_andv4si3 (t2, cop0, mask));
11700 /* XOR it back into the result of the subtraction. This results
11701 in the sign bit set iff we saw unsigned underflow. */
11702 x = gen_reg_rtx (mode);
11703 emit_insn (gen_xorv4si3 (x, t1, t2));
11711 /* Perform a parallel unsigned saturating subtraction. */
11712 x = gen_reg_rtx (mode);
11713 emit_insn (gen_rtx_SET (VOIDmode, x,
11714 gen_rtx_US_MINUS (mode, cop0, cop1)));
11721 gcc_unreachable ();
11725 cop1 = CONST0_RTX (mode);
11728 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11729 operands[1+negate], operands[2-negate]);
11731 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11732 operands[2-negate]);
11736 /* Expand conditional increment or decrement using adb/sbb instructions.
11737 The default case using setcc followed by the conditional move can be
11738 done by generic code. */
11740 ix86_expand_int_addcc (rtx operands[])
11742 enum rtx_code code = GET_CODE (operands[1]);
11744 rtx val = const0_rtx;
11745 bool fpcmp = false;
11746 enum machine_mode mode = GET_MODE (operands[0]);
11748 if (operands[3] != const1_rtx
11749 && operands[3] != constm1_rtx)
11751 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11752 ix86_compare_op1, &compare_op))
11754 code = GET_CODE (compare_op);
11756 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11757 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11760 code = ix86_fp_compare_code_to_integer (code);
11767 PUT_CODE (compare_op,
11768 reverse_condition_maybe_unordered
11769 (GET_CODE (compare_op)));
11771 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11773 PUT_MODE (compare_op, mode);
11775 /* Construct either adc or sbb insn. */
11776 if ((code == LTU) == (operands[3] == constm1_rtx))
11778 switch (GET_MODE (operands[0]))
11781 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11784 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11787 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11790 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11793 gcc_unreachable ();
11798 switch (GET_MODE (operands[0]))
11801 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11804 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11807 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11810 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11813 gcc_unreachable ();
11816 return 1; /* DONE */
11820 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11821 works for floating pointer parameters and nonoffsetable memories.
11822 For pushes, it returns just stack offsets; the values will be saved
11823 in the right order. Maximally three parts are generated. */
11826 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11831 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11833 size = (GET_MODE_SIZE (mode) + 4) / 8;
11835 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11836 gcc_assert (size >= 2 && size <= 3);
11838 /* Optimize constant pool reference to immediates. This is used by fp
11839 moves, that force all constants to memory to allow combining. */
11840 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11842 rtx tmp = maybe_get_pool_constant (operand);
11847 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11849 /* The only non-offsetable memories we handle are pushes. */
11850 int ok = push_operand (operand, VOIDmode);
11854 operand = copy_rtx (operand);
11855 PUT_MODE (operand, Pmode);
11856 parts[0] = parts[1] = parts[2] = operand;
11860 if (GET_CODE (operand) == CONST_VECTOR)
11862 enum machine_mode imode = int_mode_for_mode (mode);
11863 /* Caution: if we looked through a constant pool memory above,
11864 the operand may actually have a different mode now. That's
11865 ok, since we want to pun this all the way back to an integer. */
11866 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11867 gcc_assert (operand != NULL);
11873 if (mode == DImode)
11874 split_di (&operand, 1, &parts[0], &parts[1]);
11877 if (REG_P (operand))
11879 gcc_assert (reload_completed);
11880 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11881 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11883 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11885 else if (offsettable_memref_p (operand))
11887 operand = adjust_address (operand, SImode, 0);
11888 parts[0] = operand;
11889 parts[1] = adjust_address (operand, SImode, 4);
11891 parts[2] = adjust_address (operand, SImode, 8);
11893 else if (GET_CODE (operand) == CONST_DOUBLE)
11898 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11902 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11903 parts[2] = gen_int_mode (l[2], SImode);
11906 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11909 gcc_unreachable ();
11911 parts[1] = gen_int_mode (l[1], SImode);
11912 parts[0] = gen_int_mode (l[0], SImode);
11915 gcc_unreachable ();
11920 if (mode == TImode)
11921 split_ti (&operand, 1, &parts[0], &parts[1]);
11922 if (mode == XFmode || mode == TFmode)
11924 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11925 if (REG_P (operand))
11927 gcc_assert (reload_completed);
11928 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11929 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11931 else if (offsettable_memref_p (operand))
11933 operand = adjust_address (operand, DImode, 0);
11934 parts[0] = operand;
11935 parts[1] = adjust_address (operand, upper_mode, 8);
11937 else if (GET_CODE (operand) == CONST_DOUBLE)
11942 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11943 real_to_target (l, &r, mode);
11945 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11946 if (HOST_BITS_PER_WIDE_INT >= 64)
11949 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11950 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11953 parts[0] = immed_double_const (l[0], l[1], DImode);
11955 if (upper_mode == SImode)
11956 parts[1] = gen_int_mode (l[2], SImode);
11957 else if (HOST_BITS_PER_WIDE_INT >= 64)
11960 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11961 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11964 parts[1] = immed_double_const (l[2], l[3], DImode);
11967 gcc_unreachable ();
11974 /* Emit insns to perform a move or push of DI, DF, and XF values.
11975 Return false when normal moves are needed; true when all required
11976 insns have been emitted. Operands 2-4 contain the input values
11977 int the correct order; operands 5-7 contain the output values. */
11980 ix86_split_long_move (rtx operands[])
11985 int collisions = 0;
11986 enum machine_mode mode = GET_MODE (operands[0]);
11988 /* The DFmode expanders may ask us to move double.
11989 For 64bit target this is single move. By hiding the fact
11990 here we simplify i386.md splitters. */
11991 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11993 /* Optimize constant pool reference to immediates. This is used by
11994 fp moves, that force all constants to memory to allow combining. */
11996 if (GET_CODE (operands[1]) == MEM
11997 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11998 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11999 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12000 if (push_operand (operands[0], VOIDmode))
12002 operands[0] = copy_rtx (operands[0]);
12003 PUT_MODE (operands[0], Pmode);
12006 operands[0] = gen_lowpart (DImode, operands[0]);
12007 operands[1] = gen_lowpart (DImode, operands[1]);
12008 emit_move_insn (operands[0], operands[1]);
12012 /* The only non-offsettable memory we handle is push. */
12013 if (push_operand (operands[0], VOIDmode))
12016 gcc_assert (GET_CODE (operands[0]) != MEM
12017 || offsettable_memref_p (operands[0]));
12019 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12020 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12022 /* When emitting push, take care for source operands on the stack. */
12023 if (push && GET_CODE (operands[1]) == MEM
12024 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12027 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12028 XEXP (part[1][2], 0));
12029 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12030 XEXP (part[1][1], 0));
12033 /* We need to do copy in the right order in case an address register
12034 of the source overlaps the destination. */
12035 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12037 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12039 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12042 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12045 /* Collision in the middle part can be handled by reordering. */
12046 if (collisions == 1 && nparts == 3
12047 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12050 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12051 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12054 /* If there are more collisions, we can't handle it by reordering.
12055 Do an lea to the last part and use only one colliding move. */
12056 else if (collisions > 1)
12062 base = part[0][nparts - 1];
12064 /* Handle the case when the last part isn't valid for lea.
12065 Happens in 64-bit mode storing the 12-byte XFmode. */
12066 if (GET_MODE (base) != Pmode)
12067 base = gen_rtx_REG (Pmode, REGNO (base));
12069 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12070 part[1][0] = replace_equiv_address (part[1][0], base);
12071 part[1][1] = replace_equiv_address (part[1][1],
12072 plus_constant (base, UNITS_PER_WORD));
12074 part[1][2] = replace_equiv_address (part[1][2],
12075 plus_constant (base, 8));
12085 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12086 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12087 emit_move_insn (part[0][2], part[1][2]);
12092 /* In 64bit mode we don't have 32bit push available. In case this is
12093 register, it is OK - we will just use larger counterpart. We also
12094 retype memory - these comes from attempt to avoid REX prefix on
12095 moving of second half of TFmode value. */
12096 if (GET_MODE (part[1][1]) == SImode)
12098 switch (GET_CODE (part[1][1]))
12101 part[1][1] = adjust_address (part[1][1], DImode, 0);
12105 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12109 gcc_unreachable ();
12112 if (GET_MODE (part[1][0]) == SImode)
12113 part[1][0] = part[1][1];
12116 emit_move_insn (part[0][1], part[1][1]);
12117 emit_move_insn (part[0][0], part[1][0]);
12121 /* Choose correct order to not overwrite the source before it is copied. */
12122 if ((REG_P (part[0][0])
12123 && REG_P (part[1][1])
12124 && (REGNO (part[0][0]) == REGNO (part[1][1])
12126 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12128 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12132 operands[2] = part[0][2];
12133 operands[3] = part[0][1];
12134 operands[4] = part[0][0];
12135 operands[5] = part[1][2];
12136 operands[6] = part[1][1];
12137 operands[7] = part[1][0];
12141 operands[2] = part[0][1];
12142 operands[3] = part[0][0];
12143 operands[5] = part[1][1];
12144 operands[6] = part[1][0];
12151 operands[2] = part[0][0];
12152 operands[3] = part[0][1];
12153 operands[4] = part[0][2];
12154 operands[5] = part[1][0];
12155 operands[6] = part[1][1];
12156 operands[7] = part[1][2];
12160 operands[2] = part[0][0];
12161 operands[3] = part[0][1];
12162 operands[5] = part[1][0];
12163 operands[6] = part[1][1];
12167 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12170 if (GET_CODE (operands[5]) == CONST_INT
12171 && operands[5] != const0_rtx
12172 && REG_P (operands[2]))
12174 if (GET_CODE (operands[6]) == CONST_INT
12175 && INTVAL (operands[6]) == INTVAL (operands[5]))
12176 operands[6] = operands[2];
12179 && GET_CODE (operands[7]) == CONST_INT
12180 && INTVAL (operands[7]) == INTVAL (operands[5]))
12181 operands[7] = operands[2];
12185 && GET_CODE (operands[6]) == CONST_INT
12186 && operands[6] != const0_rtx
12187 && REG_P (operands[3])
12188 && GET_CODE (operands[7]) == CONST_INT
12189 && INTVAL (operands[7]) == INTVAL (operands[6]))
12190 operands[7] = operands[3];
12193 emit_move_insn (operands[2], operands[5]);
12194 emit_move_insn (operands[3], operands[6]);
12196 emit_move_insn (operands[4], operands[7]);
12201 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12202 left shift by a constant, either using a single shift or
12203 a sequence of add instructions. */
12206 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12210 emit_insn ((mode == DImode
12212 : gen_adddi3) (operand, operand, operand));
12214 else if (!optimize_size
12215 && count * ix86_cost->add <= ix86_cost->shift_const)
12218 for (i=0; i<count; i++)
12220 emit_insn ((mode == DImode
12222 : gen_adddi3) (operand, operand, operand));
12226 emit_insn ((mode == DImode
12228 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12232 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12234 rtx low[2], high[2];
12236 const int single_width = mode == DImode ? 32 : 64;
12238 if (GET_CODE (operands[2]) == CONST_INT)
12240 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12241 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12243 if (count >= single_width)
12245 emit_move_insn (high[0], low[1]);
12246 emit_move_insn (low[0], const0_rtx);
12248 if (count > single_width)
12249 ix86_expand_ashl_const (high[0], count - single_width, mode);
12253 if (!rtx_equal_p (operands[0], operands[1]))
12254 emit_move_insn (operands[0], operands[1]);
12255 emit_insn ((mode == DImode
12257 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12258 ix86_expand_ashl_const (low[0], count, mode);
12263 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12265 if (operands[1] == const1_rtx)
12267 /* Assuming we've chosen a QImode capable registers, then 1 << N
12268 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12269 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12271 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12273 ix86_expand_clear (low[0]);
12274 ix86_expand_clear (high[0]);
12275 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12277 d = gen_lowpart (QImode, low[0]);
12278 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12279 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12280 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12282 d = gen_lowpart (QImode, high[0]);
12283 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12284 s = gen_rtx_NE (QImode, flags, const0_rtx);
12285 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12288 /* Otherwise, we can get the same results by manually performing
12289 a bit extract operation on bit 5/6, and then performing the two
12290 shifts. The two methods of getting 0/1 into low/high are exactly
12291 the same size. Avoiding the shift in the bit extract case helps
12292 pentium4 a bit; no one else seems to care much either way. */
12297 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12298 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12300 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12301 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12303 emit_insn ((mode == DImode
12305 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12306 emit_insn ((mode == DImode
12308 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12309 emit_move_insn (low[0], high[0]);
12310 emit_insn ((mode == DImode
12312 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12315 emit_insn ((mode == DImode
12317 : gen_ashldi3) (low[0], low[0], operands[2]));
12318 emit_insn ((mode == DImode
12320 : gen_ashldi3) (high[0], high[0], operands[2]));
12324 if (operands[1] == constm1_rtx)
12326 /* For -1 << N, we can avoid the shld instruction, because we
12327 know that we're shifting 0...31/63 ones into a -1. */
12328 emit_move_insn (low[0], constm1_rtx);
12330 emit_move_insn (high[0], low[0]);
12332 emit_move_insn (high[0], constm1_rtx);
12336 if (!rtx_equal_p (operands[0], operands[1]))
12337 emit_move_insn (operands[0], operands[1]);
12339 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12340 emit_insn ((mode == DImode
12342 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12345 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12347 if (TARGET_CMOVE && scratch)
12349 ix86_expand_clear (scratch);
12350 emit_insn ((mode == DImode
12351 ? gen_x86_shift_adj_1
12352 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12355 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12359 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12361 rtx low[2], high[2];
12363 const int single_width = mode == DImode ? 32 : 64;
12365 if (GET_CODE (operands[2]) == CONST_INT)
12367 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12368 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12370 if (count == single_width * 2 - 1)
12372 emit_move_insn (high[0], high[1]);
12373 emit_insn ((mode == DImode
12375 : gen_ashrdi3) (high[0], high[0],
12376 GEN_INT (single_width - 1)));
12377 emit_move_insn (low[0], high[0]);
12380 else if (count >= single_width)
12382 emit_move_insn (low[0], high[1]);
12383 emit_move_insn (high[0], low[0]);
12384 emit_insn ((mode == DImode
12386 : gen_ashrdi3) (high[0], high[0],
12387 GEN_INT (single_width - 1)));
12388 if (count > single_width)
12389 emit_insn ((mode == DImode
12391 : gen_ashrdi3) (low[0], low[0],
12392 GEN_INT (count - single_width)));
12396 if (!rtx_equal_p (operands[0], operands[1]))
12397 emit_move_insn (operands[0], operands[1]);
12398 emit_insn ((mode == DImode
12400 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12401 emit_insn ((mode == DImode
12403 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12408 if (!rtx_equal_p (operands[0], operands[1]))
12409 emit_move_insn (operands[0], operands[1]);
12411 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12413 emit_insn ((mode == DImode
12415 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12416 emit_insn ((mode == DImode
12418 : gen_ashrdi3) (high[0], high[0], operands[2]));
12420 if (TARGET_CMOVE && scratch)
12422 emit_move_insn (scratch, high[0]);
12423 emit_insn ((mode == DImode
12425 : gen_ashrdi3) (scratch, scratch,
12426 GEN_INT (single_width - 1)));
12427 emit_insn ((mode == DImode
12428 ? gen_x86_shift_adj_1
12429 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12433 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12438 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12440 rtx low[2], high[2];
12442 const int single_width = mode == DImode ? 32 : 64;
12444 if (GET_CODE (operands[2]) == CONST_INT)
12446 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12447 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12449 if (count >= single_width)
12451 emit_move_insn (low[0], high[1]);
12452 ix86_expand_clear (high[0]);
12454 if (count > single_width)
12455 emit_insn ((mode == DImode
12457 : gen_lshrdi3) (low[0], low[0],
12458 GEN_INT (count - single_width)));
12462 if (!rtx_equal_p (operands[0], operands[1]))
12463 emit_move_insn (operands[0], operands[1]);
12464 emit_insn ((mode == DImode
12466 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12467 emit_insn ((mode == DImode
12469 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12474 if (!rtx_equal_p (operands[0], operands[1]))
12475 emit_move_insn (operands[0], operands[1]);
12477 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12479 emit_insn ((mode == DImode
12481 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12482 emit_insn ((mode == DImode
12484 : gen_lshrdi3) (high[0], high[0], operands[2]));
12486 /* Heh. By reversing the arguments, we can reuse this pattern. */
12487 if (TARGET_CMOVE && scratch)
12489 ix86_expand_clear (scratch);
12490 emit_insn ((mode == DImode
12491 ? gen_x86_shift_adj_1
12492 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12496 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12500 /* Helper function for the string operations below. Dest VARIABLE whether
12501 it is aligned to VALUE bytes. If true, jump to the label. */
12503 ix86_expand_aligntest (rtx variable, int value)
12505 rtx label = gen_label_rtx ();
12506 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12507 if (GET_MODE (variable) == DImode)
12508 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12510 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12511 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12516 /* Adjust COUNTER by the VALUE. */
12518 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12520 if (GET_MODE (countreg) == DImode)
12521 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12523 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12526 /* Zero extend possibly SImode EXP to Pmode register. */
12528 ix86_zero_extend_to_Pmode (rtx exp)
12531 if (GET_MODE (exp) == VOIDmode)
12532 return force_reg (Pmode, exp);
12533 if (GET_MODE (exp) == Pmode)
12534 return copy_to_mode_reg (Pmode, exp);
12535 r = gen_reg_rtx (Pmode);
12536 emit_insn (gen_zero_extendsidi2 (r, exp));
12540 /* Expand string move (memcpy) operation. Use i386 string operations when
12541 profitable. expand_clrmem contains similar code. */
12543 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12545 rtx srcreg, destreg, countreg, srcexp, destexp;
12546 enum machine_mode counter_mode;
12547 HOST_WIDE_INT align = 0;
12548 unsigned HOST_WIDE_INT count = 0;
12550 if (GET_CODE (align_exp) == CONST_INT)
12551 align = INTVAL (align_exp);
12553 /* Can't use any of this if the user has appropriated esi or edi. */
12554 if (global_regs[4] || global_regs[5])
12557 /* This simple hack avoids all inlining code and simplifies code below. */
12558 if (!TARGET_ALIGN_STRINGOPS)
12561 if (GET_CODE (count_exp) == CONST_INT)
12563 count = INTVAL (count_exp);
12564 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12568 /* Figure out proper mode for counter. For 32bits it is always SImode,
12569 for 64bits use SImode when possible, otherwise DImode.
12570 Set count to number of bytes copied when known at compile time. */
12572 || GET_MODE (count_exp) == SImode
12573 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12574 counter_mode = SImode;
12576 counter_mode = DImode;
12578 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12580 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12581 if (destreg != XEXP (dst, 0))
12582 dst = replace_equiv_address_nv (dst, destreg);
12583 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12584 if (srcreg != XEXP (src, 0))
12585 src = replace_equiv_address_nv (src, srcreg);
12587 /* When optimizing for size emit simple rep ; movsb instruction for
12588 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12589 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12590 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12591 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12592 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12593 known to be zero or not. The rep; movsb sequence causes higher
12594 register pressure though, so take that into account. */
12596 if ((!optimize || optimize_size)
12601 || (count & 3) + count / 4 > 6))))
12603 emit_insn (gen_cld ());
12604 countreg = ix86_zero_extend_to_Pmode (count_exp);
12605 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12606 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12607 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12611 /* For constant aligned (or small unaligned) copies use rep movsl
12612 followed by code copying the rest. For PentiumPro ensure 8 byte
12613 alignment to allow rep movsl acceleration. */
12615 else if (count != 0
12617 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12618 || optimize_size || count < (unsigned int) 64))
12620 unsigned HOST_WIDE_INT offset = 0;
12621 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12622 rtx srcmem, dstmem;
12624 emit_insn (gen_cld ());
12625 if (count & ~(size - 1))
12627 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12629 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12631 while (offset < (count & ~(size - 1)))
12633 srcmem = adjust_automodify_address_nv (src, movs_mode,
12635 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12637 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12643 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12644 & (TARGET_64BIT ? -1 : 0x3fffffff));
12645 countreg = copy_to_mode_reg (counter_mode, countreg);
12646 countreg = ix86_zero_extend_to_Pmode (countreg);
12648 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12649 GEN_INT (size == 4 ? 2 : 3));
12650 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12651 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12653 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12654 countreg, destexp, srcexp));
12655 offset = count & ~(size - 1);
12658 if (size == 8 && (count & 0x04))
12660 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12662 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12664 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12669 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12671 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12673 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12678 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12680 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12682 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12685 /* The generic code based on the glibc implementation:
12686 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12687 allowing accelerated copying there)
12688 - copy the data using rep movsl
12689 - copy the rest. */
12694 rtx srcmem, dstmem;
12695 int desired_alignment = (TARGET_PENTIUMPRO
12696 && (count == 0 || count >= (unsigned int) 260)
12697 ? 8 : UNITS_PER_WORD);
12698 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12699 dst = change_address (dst, BLKmode, destreg);
12700 src = change_address (src, BLKmode, srcreg);
12702 /* In case we don't know anything about the alignment, default to
12703 library version, since it is usually equally fast and result in
12706 Also emit call when we know that the count is large and call overhead
12707 will not be important. */
12708 if (!TARGET_INLINE_ALL_STRINGOPS
12709 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12712 if (TARGET_SINGLE_STRINGOP)
12713 emit_insn (gen_cld ());
12715 countreg2 = gen_reg_rtx (Pmode);
12716 countreg = copy_to_mode_reg (counter_mode, count_exp);
12718 /* We don't use loops to align destination and to copy parts smaller
12719 than 4 bytes, because gcc is able to optimize such code better (in
12720 the case the destination or the count really is aligned, gcc is often
12721 able to predict the branches) and also it is friendlier to the
12722 hardware branch prediction.
12724 Using loops is beneficial for generic case, because we can
12725 handle small counts using the loops. Many CPUs (such as Athlon)
12726 have large REP prefix setup costs.
12728 This is quite costly. Maybe we can revisit this decision later or
12729 add some customizability to this code. */
12731 if (count == 0 && align < desired_alignment)
12733 label = gen_label_rtx ();
12734 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12735 LEU, 0, counter_mode, 1, label);
12739 rtx label = ix86_expand_aligntest (destreg, 1);
12740 srcmem = change_address (src, QImode, srcreg);
12741 dstmem = change_address (dst, QImode, destreg);
12742 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12743 ix86_adjust_counter (countreg, 1);
12744 emit_label (label);
12745 LABEL_NUSES (label) = 1;
12749 rtx label = ix86_expand_aligntest (destreg, 2);
12750 srcmem = change_address (src, HImode, srcreg);
12751 dstmem = change_address (dst, HImode, destreg);
12752 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12753 ix86_adjust_counter (countreg, 2);
12754 emit_label (label);
12755 LABEL_NUSES (label) = 1;
12757 if (align <= 4 && desired_alignment > 4)
12759 rtx label = ix86_expand_aligntest (destreg, 4);
12760 srcmem = change_address (src, SImode, srcreg);
12761 dstmem = change_address (dst, SImode, destreg);
12762 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12763 ix86_adjust_counter (countreg, 4);
12764 emit_label (label);
12765 LABEL_NUSES (label) = 1;
12768 if (label && desired_alignment > 4 && !TARGET_64BIT)
12770 emit_label (label);
12771 LABEL_NUSES (label) = 1;
12774 if (!TARGET_SINGLE_STRINGOP)
12775 emit_insn (gen_cld ());
12778 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12780 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12784 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12785 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12787 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12788 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12789 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12790 countreg2, destexp, srcexp));
12794 emit_label (label);
12795 LABEL_NUSES (label) = 1;
12797 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12799 srcmem = change_address (src, SImode, srcreg);
12800 dstmem = change_address (dst, SImode, destreg);
12801 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12803 if ((align <= 4 || count == 0) && TARGET_64BIT)
12805 rtx label = ix86_expand_aligntest (countreg, 4);
12806 srcmem = change_address (src, SImode, srcreg);
12807 dstmem = change_address (dst, SImode, destreg);
12808 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12809 emit_label (label);
12810 LABEL_NUSES (label) = 1;
12812 if (align > 2 && count != 0 && (count & 2))
12814 srcmem = change_address (src, HImode, srcreg);
12815 dstmem = change_address (dst, HImode, destreg);
12816 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12818 if (align <= 2 || count == 0)
12820 rtx label = ix86_expand_aligntest (countreg, 2);
12821 srcmem = change_address (src, HImode, srcreg);
12822 dstmem = change_address (dst, HImode, destreg);
12823 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12824 emit_label (label);
12825 LABEL_NUSES (label) = 1;
12827 if (align > 1 && count != 0 && (count & 1))
12829 srcmem = change_address (src, QImode, srcreg);
12830 dstmem = change_address (dst, QImode, destreg);
12831 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12833 if (align <= 1 || count == 0)
12835 rtx label = ix86_expand_aligntest (countreg, 1);
12836 srcmem = change_address (src, QImode, srcreg);
12837 dstmem = change_address (dst, QImode, destreg);
12838 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12839 emit_label (label);
12840 LABEL_NUSES (label) = 1;
12847 /* Expand string clear operation (bzero). Use i386 string operations when
12848 profitable. expand_movmem contains similar code. */
12850 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12852 rtx destreg, zeroreg, countreg, destexp;
12853 enum machine_mode counter_mode;
12854 HOST_WIDE_INT align = 0;
12855 unsigned HOST_WIDE_INT count = 0;
12857 if (GET_CODE (align_exp) == CONST_INT)
12858 align = INTVAL (align_exp);
12860 /* Can't use any of this if the user has appropriated esi. */
12861 if (global_regs[4])
12864 /* This simple hack avoids all inlining code and simplifies code below. */
12865 if (!TARGET_ALIGN_STRINGOPS)
12868 if (GET_CODE (count_exp) == CONST_INT)
12870 count = INTVAL (count_exp);
12871 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12874 /* Figure out proper mode for counter. For 32bits it is always SImode,
12875 for 64bits use SImode when possible, otherwise DImode.
12876 Set count to number of bytes copied when known at compile time. */
12878 || GET_MODE (count_exp) == SImode
12879 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12880 counter_mode = SImode;
12882 counter_mode = DImode;
12884 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12885 if (destreg != XEXP (dst, 0))
12886 dst = replace_equiv_address_nv (dst, destreg);
12889 /* When optimizing for size emit simple rep ; movsb instruction for
12890 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12891 sequence is 7 bytes long, so if optimizing for size and count is
12892 small enough that some stosl, stosw and stosb instructions without
12893 rep are shorter, fall back into the next if. */
12895 if ((!optimize || optimize_size)
12898 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12900 emit_insn (gen_cld ());
12902 countreg = ix86_zero_extend_to_Pmode (count_exp);
12903 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12904 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12905 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12907 else if (count != 0
12909 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12910 || optimize_size || count < (unsigned int) 64))
12912 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12913 unsigned HOST_WIDE_INT offset = 0;
12915 emit_insn (gen_cld ());
12917 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12918 if (count & ~(size - 1))
12920 unsigned HOST_WIDE_INT repcount;
12921 unsigned int max_nonrep;
12923 repcount = count >> (size == 4 ? 2 : 3);
12925 repcount &= 0x3fffffff;
12927 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12928 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12929 bytes. In both cases the latter seems to be faster for small
12931 max_nonrep = size == 4 ? 7 : 4;
12932 if (!optimize_size)
12935 case PROCESSOR_PENTIUM4:
12936 case PROCESSOR_NOCONA:
12943 if (repcount <= max_nonrep)
12944 while (repcount-- > 0)
12946 rtx mem = adjust_automodify_address_nv (dst,
12947 GET_MODE (zeroreg),
12949 emit_insn (gen_strset (destreg, mem, zeroreg));
12954 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12955 countreg = ix86_zero_extend_to_Pmode (countreg);
12956 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12957 GEN_INT (size == 4 ? 2 : 3));
12958 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12959 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12961 offset = count & ~(size - 1);
12964 if (size == 8 && (count & 0x04))
12966 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12968 emit_insn (gen_strset (destreg, mem,
12969 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12974 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12976 emit_insn (gen_strset (destreg, mem,
12977 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12982 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12984 emit_insn (gen_strset (destreg, mem,
12985 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12992 /* Compute desired alignment of the string operation. */
12993 int desired_alignment = (TARGET_PENTIUMPRO
12994 && (count == 0 || count >= (unsigned int) 260)
12995 ? 8 : UNITS_PER_WORD);
12997 /* In case we don't know anything about the alignment, default to
12998 library version, since it is usually equally fast and result in
13001 Also emit call when we know that the count is large and call overhead
13002 will not be important. */
13003 if (!TARGET_INLINE_ALL_STRINGOPS
13004 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
13007 if (TARGET_SINGLE_STRINGOP)
13008 emit_insn (gen_cld ());
13010 countreg2 = gen_reg_rtx (Pmode);
13011 countreg = copy_to_mode_reg (counter_mode, count_exp);
13012 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13013 /* Get rid of MEM_OFFSET, it won't be accurate. */
13014 dst = change_address (dst, BLKmode, destreg);
13016 if (count == 0 && align < desired_alignment)
13018 label = gen_label_rtx ();
13019 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13020 LEU, 0, counter_mode, 1, label);
13024 rtx label = ix86_expand_aligntest (destreg, 1);
13025 emit_insn (gen_strset (destreg, dst,
13026 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13027 ix86_adjust_counter (countreg, 1);
13028 emit_label (label);
13029 LABEL_NUSES (label) = 1;
13033 rtx label = ix86_expand_aligntest (destreg, 2);
13034 emit_insn (gen_strset (destreg, dst,
13035 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13036 ix86_adjust_counter (countreg, 2);
13037 emit_label (label);
13038 LABEL_NUSES (label) = 1;
13040 if (align <= 4 && desired_alignment > 4)
13042 rtx label = ix86_expand_aligntest (destreg, 4);
13043 emit_insn (gen_strset (destreg, dst,
13045 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13047 ix86_adjust_counter (countreg, 4);
13048 emit_label (label);
13049 LABEL_NUSES (label) = 1;
13052 if (label && desired_alignment > 4 && !TARGET_64BIT)
13054 emit_label (label);
13055 LABEL_NUSES (label) = 1;
13059 if (!TARGET_SINGLE_STRINGOP)
13060 emit_insn (gen_cld ());
13063 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13065 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13069 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13070 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13072 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13073 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13077 emit_label (label);
13078 LABEL_NUSES (label) = 1;
13081 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13082 emit_insn (gen_strset (destreg, dst,
13083 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13084 if (TARGET_64BIT && (align <= 4 || count == 0))
13086 rtx label = ix86_expand_aligntest (countreg, 4);
13087 emit_insn (gen_strset (destreg, dst,
13088 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13089 emit_label (label);
13090 LABEL_NUSES (label) = 1;
13092 if (align > 2 && count != 0 && (count & 2))
13093 emit_insn (gen_strset (destreg, dst,
13094 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13095 if (align <= 2 || count == 0)
13097 rtx label = ix86_expand_aligntest (countreg, 2);
13098 emit_insn (gen_strset (destreg, dst,
13099 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13100 emit_label (label);
13101 LABEL_NUSES (label) = 1;
13103 if (align > 1 && count != 0 && (count & 1))
13104 emit_insn (gen_strset (destreg, dst,
13105 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13106 if (align <= 1 || count == 0)
13108 rtx label = ix86_expand_aligntest (countreg, 1);
13109 emit_insn (gen_strset (destreg, dst,
13110 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13111 emit_label (label);
13112 LABEL_NUSES (label) = 1;
13118 /* Expand strlen. */
13120 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13122 rtx addr, scratch1, scratch2, scratch3, scratch4;
13124 /* The generic case of strlen expander is long. Avoid it's
13125 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13127 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13128 && !TARGET_INLINE_ALL_STRINGOPS
13130 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13133 addr = force_reg (Pmode, XEXP (src, 0));
13134 scratch1 = gen_reg_rtx (Pmode);
13136 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13139 /* Well it seems that some optimizer does not combine a call like
13140 foo(strlen(bar), strlen(bar));
13141 when the move and the subtraction is done here. It does calculate
13142 the length just once when these instructions are done inside of
13143 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13144 often used and I use one fewer register for the lifetime of
13145 output_strlen_unroll() this is better. */
13147 emit_move_insn (out, addr);
13149 ix86_expand_strlensi_unroll_1 (out, src, align);
13151 /* strlensi_unroll_1 returns the address of the zero at the end of
13152 the string, like memchr(), so compute the length by subtracting
13153 the start address. */
13155 emit_insn (gen_subdi3 (out, out, addr));
13157 emit_insn (gen_subsi3 (out, out, addr));
13162 scratch2 = gen_reg_rtx (Pmode);
13163 scratch3 = gen_reg_rtx (Pmode);
13164 scratch4 = force_reg (Pmode, constm1_rtx);
13166 emit_move_insn (scratch3, addr);
13167 eoschar = force_reg (QImode, eoschar);
13169 emit_insn (gen_cld ());
13170 src = replace_equiv_address_nv (src, scratch3);
13172 /* If .md starts supporting :P, this can be done in .md. */
13173 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13174 scratch4), UNSPEC_SCAS);
13175 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13178 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13179 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13183 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13184 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13190 /* Expand the appropriate insns for doing strlen if not just doing
13193 out = result, initialized with the start address
13194 align_rtx = alignment of the address.
13195 scratch = scratch register, initialized with the startaddress when
13196 not aligned, otherwise undefined
13198 This is just the body. It needs the initializations mentioned above and
13199 some address computing at the end. These things are done in i386.md. */
13202 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13206 rtx align_2_label = NULL_RTX;
13207 rtx align_3_label = NULL_RTX;
13208 rtx align_4_label = gen_label_rtx ();
13209 rtx end_0_label = gen_label_rtx ();
13211 rtx tmpreg = gen_reg_rtx (SImode);
13212 rtx scratch = gen_reg_rtx (SImode);
13216 if (GET_CODE (align_rtx) == CONST_INT)
13217 align = INTVAL (align_rtx);
13219 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13221 /* Is there a known alignment and is it less than 4? */
13224 rtx scratch1 = gen_reg_rtx (Pmode);
13225 emit_move_insn (scratch1, out);
13226 /* Is there a known alignment and is it not 2? */
13229 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13230 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13232 /* Leave just the 3 lower bits. */
13233 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13234 NULL_RTX, 0, OPTAB_WIDEN);
13236 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13237 Pmode, 1, align_4_label);
13238 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13239 Pmode, 1, align_2_label);
13240 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13241 Pmode, 1, align_3_label);
13245 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13246 check if is aligned to 4 - byte. */
13248 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13249 NULL_RTX, 0, OPTAB_WIDEN);
13251 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13252 Pmode, 1, align_4_label);
13255 mem = change_address (src, QImode, out);
13257 /* Now compare the bytes. */
13259 /* Compare the first n unaligned byte on a byte per byte basis. */
13260 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13261 QImode, 1, end_0_label);
13263 /* Increment the address. */
13265 emit_insn (gen_adddi3 (out, out, const1_rtx));
13267 emit_insn (gen_addsi3 (out, out, const1_rtx));
13269 /* Not needed with an alignment of 2 */
13272 emit_label (align_2_label);
13274 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13278 emit_insn (gen_adddi3 (out, out, const1_rtx));
13280 emit_insn (gen_addsi3 (out, out, const1_rtx));
13282 emit_label (align_3_label);
13285 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13289 emit_insn (gen_adddi3 (out, out, const1_rtx));
13291 emit_insn (gen_addsi3 (out, out, const1_rtx));
13294 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13295 align this loop. It gives only huge programs, but does not help to
13297 emit_label (align_4_label);
13299 mem = change_address (src, SImode, out);
13300 emit_move_insn (scratch, mem);
13302 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13304 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13306 /* This formula yields a nonzero result iff one of the bytes is zero.
13307 This saves three branches inside loop and many cycles. */
13309 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13310 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13311 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13312 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13313 gen_int_mode (0x80808080, SImode)));
13314 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13319 rtx reg = gen_reg_rtx (SImode);
13320 rtx reg2 = gen_reg_rtx (Pmode);
13321 emit_move_insn (reg, tmpreg);
13322 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13324 /* If zero is not in the first two bytes, move two bytes forward. */
13325 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13326 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13327 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13328 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13329 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13332 /* Emit lea manually to avoid clobbering of flags. */
13333 emit_insn (gen_rtx_SET (SImode, reg2,
13334 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13336 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13337 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13338 emit_insn (gen_rtx_SET (VOIDmode, out,
13339 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13346 rtx end_2_label = gen_label_rtx ();
13347 /* Is zero in the first two bytes? */
13349 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13350 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13351 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13352 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13353 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13355 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13356 JUMP_LABEL (tmp) = end_2_label;
13358 /* Not in the first two. Move two bytes forward. */
13359 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13361 emit_insn (gen_adddi3 (out, out, const2_rtx));
13363 emit_insn (gen_addsi3 (out, out, const2_rtx));
13365 emit_label (end_2_label);
13369 /* Avoid branch in fixing the byte. */
13370 tmpreg = gen_lowpart (QImode, tmpreg);
13371 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13372 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13374 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13376 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13378 emit_label (end_0_label);
13382 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13383 rtx callarg2 ATTRIBUTE_UNUSED,
13384 rtx pop, int sibcall)
13386 rtx use = NULL, call;
13388 if (pop == const0_rtx)
13390 gcc_assert (!TARGET_64BIT || !pop);
13392 if (TARGET_MACHO && !TARGET_64BIT)
13395 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13396 fnaddr = machopic_indirect_call_target (fnaddr);
13401 /* Static functions and indirect calls don't need the pic register. */
13402 if (! TARGET_64BIT && flag_pic
13403 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13404 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13405 use_reg (&use, pic_offset_table_rtx);
13408 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13410 rtx al = gen_rtx_REG (QImode, 0);
13411 emit_move_insn (al, callarg2);
13412 use_reg (&use, al);
13415 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13417 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13418 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13420 if (sibcall && TARGET_64BIT
13421 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13424 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13425 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13426 emit_move_insn (fnaddr, addr);
13427 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13430 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13432 call = gen_rtx_SET (VOIDmode, retval, call);
13435 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13436 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13437 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13440 call = emit_call_insn (call);
13442 CALL_INSN_FUNCTION_USAGE (call) = use;
13446 /* Clear stack slot assignments remembered from previous functions.
13447 This is called from INIT_EXPANDERS once before RTL is emitted for each
13450 static struct machine_function *
13451 ix86_init_machine_status (void)
13453 struct machine_function *f;
13455 f = ggc_alloc_cleared (sizeof (struct machine_function));
13456 f->use_fast_prologue_epilogue_nregs = -1;
13457 f->tls_descriptor_call_expanded_p = 0;
13462 /* Return a MEM corresponding to a stack slot with mode MODE.
13463 Allocate a new slot if necessary.
13465 The RTL for a function can have several slots available: N is
13466 which slot to use. */
13469 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13471 struct stack_local_entry *s;
13473 gcc_assert (n < MAX_386_STACK_LOCALS);
13475 for (s = ix86_stack_locals; s; s = s->next)
13476 if (s->mode == mode && s->n == n)
13479 s = (struct stack_local_entry *)
13480 ggc_alloc (sizeof (struct stack_local_entry));
13483 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13485 s->next = ix86_stack_locals;
13486 ix86_stack_locals = s;
13490 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13492 static GTY(()) rtx ix86_tls_symbol;
13494 ix86_tls_get_addr (void)
13497 if (!ix86_tls_symbol)
13499 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13500 (TARGET_ANY_GNU_TLS
13502 ? "___tls_get_addr"
13503 : "__tls_get_addr");
13506 return ix86_tls_symbol;
13509 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13511 static GTY(()) rtx ix86_tls_module_base_symbol;
13513 ix86_tls_module_base (void)
13516 if (!ix86_tls_module_base_symbol)
13518 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13519 "_TLS_MODULE_BASE_");
13520 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13521 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13524 return ix86_tls_module_base_symbol;
13527 /* Calculate the length of the memory address in the instruction
13528 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13531 memory_address_length (rtx addr)
13533 struct ix86_address parts;
13534 rtx base, index, disp;
13538 if (GET_CODE (addr) == PRE_DEC
13539 || GET_CODE (addr) == POST_INC
13540 || GET_CODE (addr) == PRE_MODIFY
13541 || GET_CODE (addr) == POST_MODIFY)
13544 ok = ix86_decompose_address (addr, &parts);
13547 if (parts.base && GET_CODE (parts.base) == SUBREG)
13548 parts.base = SUBREG_REG (parts.base);
13549 if (parts.index && GET_CODE (parts.index) == SUBREG)
13550 parts.index = SUBREG_REG (parts.index);
13553 index = parts.index;
13558 - esp as the base always wants an index,
13559 - ebp as the base always wants a displacement. */
13561 /* Register Indirect. */
13562 if (base && !index && !disp)
13564 /* esp (for its index) and ebp (for its displacement) need
13565 the two-byte modrm form. */
13566 if (addr == stack_pointer_rtx
13567 || addr == arg_pointer_rtx
13568 || addr == frame_pointer_rtx
13569 || addr == hard_frame_pointer_rtx)
13573 /* Direct Addressing. */
13574 else if (disp && !base && !index)
13579 /* Find the length of the displacement constant. */
13582 if (base && satisfies_constraint_K (disp))
13587 /* ebp always wants a displacement. */
13588 else if (base == hard_frame_pointer_rtx)
13591 /* An index requires the two-byte modrm form.... */
13593 /* ...like esp, which always wants an index. */
13594 || base == stack_pointer_rtx
13595 || base == arg_pointer_rtx
13596 || base == frame_pointer_rtx)
13603 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13604 is set, expect that insn have 8bit immediate alternative. */
13606 ix86_attr_length_immediate_default (rtx insn, int shortform)
13610 extract_insn_cached (insn);
13611 for (i = recog_data.n_operands - 1; i >= 0; --i)
13612 if (CONSTANT_P (recog_data.operand[i]))
13615 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13619 switch (get_attr_mode (insn))
13630 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13635 fatal_insn ("unknown insn mode", insn);
13641 /* Compute default value for "length_address" attribute. */
13643 ix86_attr_length_address_default (rtx insn)
13647 if (get_attr_type (insn) == TYPE_LEA)
13649 rtx set = PATTERN (insn);
13651 if (GET_CODE (set) == PARALLEL)
13652 set = XVECEXP (set, 0, 0);
13654 gcc_assert (GET_CODE (set) == SET);
13656 return memory_address_length (SET_SRC (set));
13659 extract_insn_cached (insn);
13660 for (i = recog_data.n_operands - 1; i >= 0; --i)
13661 if (GET_CODE (recog_data.operand[i]) == MEM)
13663 return memory_address_length (XEXP (recog_data.operand[i], 0));
13669 /* Return the maximum number of instructions a cpu can issue. */
13672 ix86_issue_rate (void)
13676 case PROCESSOR_PENTIUM:
13680 case PROCESSOR_PENTIUMPRO:
13681 case PROCESSOR_PENTIUM4:
13682 case PROCESSOR_ATHLON:
13684 case PROCESSOR_NOCONA:
13685 case PROCESSOR_GENERIC32:
13686 case PROCESSOR_GENERIC64:
13694 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13695 by DEP_INSN and nothing set by DEP_INSN. */
13698 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13702 /* Simplify the test for uninteresting insns. */
13703 if (insn_type != TYPE_SETCC
13704 && insn_type != TYPE_ICMOV
13705 && insn_type != TYPE_FCMOV
13706 && insn_type != TYPE_IBR)
13709 if ((set = single_set (dep_insn)) != 0)
13711 set = SET_DEST (set);
13714 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13715 && XVECLEN (PATTERN (dep_insn), 0) == 2
13716 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13717 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13719 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13720 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13725 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13728 /* This test is true if the dependent insn reads the flags but
13729 not any other potentially set register. */
13730 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13733 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13739 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13740 address with operands set by DEP_INSN. */
13743 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13747 if (insn_type == TYPE_LEA
13750 addr = PATTERN (insn);
13752 if (GET_CODE (addr) == PARALLEL)
13753 addr = XVECEXP (addr, 0, 0);
13755 gcc_assert (GET_CODE (addr) == SET);
13757 addr = SET_SRC (addr);
13762 extract_insn_cached (insn);
13763 for (i = recog_data.n_operands - 1; i >= 0; --i)
13764 if (GET_CODE (recog_data.operand[i]) == MEM)
13766 addr = XEXP (recog_data.operand[i], 0);
13773 return modified_in_p (addr, dep_insn);
13777 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13779 enum attr_type insn_type, dep_insn_type;
13780 enum attr_memory memory;
13782 int dep_insn_code_number;
13784 /* Anti and output dependencies have zero cost on all CPUs. */
13785 if (REG_NOTE_KIND (link) != 0)
13788 dep_insn_code_number = recog_memoized (dep_insn);
13790 /* If we can't recognize the insns, we can't really do anything. */
13791 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13794 insn_type = get_attr_type (insn);
13795 dep_insn_type = get_attr_type (dep_insn);
13799 case PROCESSOR_PENTIUM:
13800 /* Address Generation Interlock adds a cycle of latency. */
13801 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13804 /* ??? Compares pair with jump/setcc. */
13805 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13808 /* Floating point stores require value to be ready one cycle earlier. */
13809 if (insn_type == TYPE_FMOV
13810 && get_attr_memory (insn) == MEMORY_STORE
13811 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13815 case PROCESSOR_PENTIUMPRO:
13816 memory = get_attr_memory (insn);
13818 /* INT->FP conversion is expensive. */
13819 if (get_attr_fp_int_src (dep_insn))
13822 /* There is one cycle extra latency between an FP op and a store. */
13823 if (insn_type == TYPE_FMOV
13824 && (set = single_set (dep_insn)) != NULL_RTX
13825 && (set2 = single_set (insn)) != NULL_RTX
13826 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13827 && GET_CODE (SET_DEST (set2)) == MEM)
13830 /* Show ability of reorder buffer to hide latency of load by executing
13831 in parallel with previous instruction in case
13832 previous instruction is not needed to compute the address. */
13833 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13834 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13836 /* Claim moves to take one cycle, as core can issue one load
13837 at time and the next load can start cycle later. */
13838 if (dep_insn_type == TYPE_IMOV
13839 || dep_insn_type == TYPE_FMOV)
13847 memory = get_attr_memory (insn);
13849 /* The esp dependency is resolved before the instruction is really
13851 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13852 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13855 /* INT->FP conversion is expensive. */
13856 if (get_attr_fp_int_src (dep_insn))
13859 /* Show ability of reorder buffer to hide latency of load by executing
13860 in parallel with previous instruction in case
13861 previous instruction is not needed to compute the address. */
13862 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13863 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13865 /* Claim moves to take one cycle, as core can issue one load
13866 at time and the next load can start cycle later. */
13867 if (dep_insn_type == TYPE_IMOV
13868 || dep_insn_type == TYPE_FMOV)
13877 case PROCESSOR_ATHLON:
13879 case PROCESSOR_GENERIC32:
13880 case PROCESSOR_GENERIC64:
13881 memory = get_attr_memory (insn);
13883 /* Show ability of reorder buffer to hide latency of load by executing
13884 in parallel with previous instruction in case
13885 previous instruction is not needed to compute the address. */
13886 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13887 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13889 enum attr_unit unit = get_attr_unit (insn);
13892 /* Because of the difference between the length of integer and
13893 floating unit pipeline preparation stages, the memory operands
13894 for floating point are cheaper.
13896 ??? For Athlon it the difference is most probably 2. */
13897 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13900 loadcost = TARGET_ATHLON ? 2 : 0;
13902 if (cost >= loadcost)
13915 /* How many alternative schedules to try. This should be as wide as the
13916 scheduling freedom in the DFA, but no wider. Making this value too
13917 large results extra work for the scheduler. */
13920 ia32_multipass_dfa_lookahead (void)
13922 if (ix86_tune == PROCESSOR_PENTIUM)
13925 if (ix86_tune == PROCESSOR_PENTIUMPRO
13926 || ix86_tune == PROCESSOR_K6)
13934 /* Compute the alignment given to a constant that is being placed in memory.
13935 EXP is the constant and ALIGN is the alignment that the object would
13937 The value of this function is used instead of that alignment to align
13941 ix86_constant_alignment (tree exp, int align)
13943 if (TREE_CODE (exp) == REAL_CST)
13945 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13947 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13950 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13951 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13952 return BITS_PER_WORD;
13957 /* Compute the alignment for a static variable.
13958 TYPE is the data type, and ALIGN is the alignment that
13959 the object would ordinarily have. The value of this function is used
13960 instead of that alignment to align the object. */
13963 ix86_data_alignment (tree type, int align)
13965 int max_align = optimize_size ? BITS_PER_WORD : 256;
13967 if (AGGREGATE_TYPE_P (type)
13968 && TYPE_SIZE (type)
13969 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13970 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13971 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13972 && align < max_align)
13975 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13976 to 16byte boundary. */
13979 if (AGGREGATE_TYPE_P (type)
13980 && TYPE_SIZE (type)
13981 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13982 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13983 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13987 if (TREE_CODE (type) == ARRAY_TYPE)
13989 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13991 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13994 else if (TREE_CODE (type) == COMPLEX_TYPE)
13997 if (TYPE_MODE (type) == DCmode && align < 64)
13999 if (TYPE_MODE (type) == XCmode && align < 128)
14002 else if ((TREE_CODE (type) == RECORD_TYPE
14003 || TREE_CODE (type) == UNION_TYPE
14004 || TREE_CODE (type) == QUAL_UNION_TYPE)
14005 && TYPE_FIELDS (type))
14007 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14009 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14012 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14013 || TREE_CODE (type) == INTEGER_TYPE)
14015 if (TYPE_MODE (type) == DFmode && align < 64)
14017 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14024 /* Compute the alignment for a local variable.
14025 TYPE is the data type, and ALIGN is the alignment that
14026 the object would ordinarily have. The value of this macro is used
14027 instead of that alignment to align the object. */
14030 ix86_local_alignment (tree type, int align)
14032 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14033 to 16byte boundary. */
14036 if (AGGREGATE_TYPE_P (type)
14037 && TYPE_SIZE (type)
14038 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14039 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14040 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14043 if (TREE_CODE (type) == ARRAY_TYPE)
14045 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14047 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14050 else if (TREE_CODE (type) == COMPLEX_TYPE)
14052 if (TYPE_MODE (type) == DCmode && align < 64)
14054 if (TYPE_MODE (type) == XCmode && align < 128)
14057 else if ((TREE_CODE (type) == RECORD_TYPE
14058 || TREE_CODE (type) == UNION_TYPE
14059 || TREE_CODE (type) == QUAL_UNION_TYPE)
14060 && TYPE_FIELDS (type))
14062 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14064 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14067 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14068 || TREE_CODE (type) == INTEGER_TYPE)
14071 if (TYPE_MODE (type) == DFmode && align < 64)
14073 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14079 /* Emit RTL insns to initialize the variable parts of a trampoline.
14080 FNADDR is an RTX for the address of the function's pure code.
14081 CXT is an RTX for the static chain value for the function. */
14083 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14087 /* Compute offset from the end of the jmp to the target function. */
14088 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14089 plus_constant (tramp, 10),
14090 NULL_RTX, 1, OPTAB_DIRECT);
14091 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14092 gen_int_mode (0xb9, QImode));
14093 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14094 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14095 gen_int_mode (0xe9, QImode));
14096 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14101 /* Try to load address using shorter movl instead of movabs.
14102 We may want to support movq for kernel mode, but kernel does not use
14103 trampolines at the moment. */
14104 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14106 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14107 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14108 gen_int_mode (0xbb41, HImode));
14109 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14110 gen_lowpart (SImode, fnaddr));
14115 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14116 gen_int_mode (0xbb49, HImode));
14117 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14121 /* Load static chain using movabs to r10. */
14122 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14123 gen_int_mode (0xba49, HImode));
14124 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14127 /* Jump to the r11 */
14128 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14129 gen_int_mode (0xff49, HImode));
14130 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14131 gen_int_mode (0xe3, QImode));
14133 gcc_assert (offset <= TRAMPOLINE_SIZE);
14136 #ifdef ENABLE_EXECUTE_STACK
14137 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14138 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14142 /* Codes for all the SSE/MMX builtins. */
14145 IX86_BUILTIN_ADDPS,
14146 IX86_BUILTIN_ADDSS,
14147 IX86_BUILTIN_DIVPS,
14148 IX86_BUILTIN_DIVSS,
14149 IX86_BUILTIN_MULPS,
14150 IX86_BUILTIN_MULSS,
14151 IX86_BUILTIN_SUBPS,
14152 IX86_BUILTIN_SUBSS,
14154 IX86_BUILTIN_CMPEQPS,
14155 IX86_BUILTIN_CMPLTPS,
14156 IX86_BUILTIN_CMPLEPS,
14157 IX86_BUILTIN_CMPGTPS,
14158 IX86_BUILTIN_CMPGEPS,
14159 IX86_BUILTIN_CMPNEQPS,
14160 IX86_BUILTIN_CMPNLTPS,
14161 IX86_BUILTIN_CMPNLEPS,
14162 IX86_BUILTIN_CMPNGTPS,
14163 IX86_BUILTIN_CMPNGEPS,
14164 IX86_BUILTIN_CMPORDPS,
14165 IX86_BUILTIN_CMPUNORDPS,
14166 IX86_BUILTIN_CMPEQSS,
14167 IX86_BUILTIN_CMPLTSS,
14168 IX86_BUILTIN_CMPLESS,
14169 IX86_BUILTIN_CMPNEQSS,
14170 IX86_BUILTIN_CMPNLTSS,
14171 IX86_BUILTIN_CMPNLESS,
14172 IX86_BUILTIN_CMPNGTSS,
14173 IX86_BUILTIN_CMPNGESS,
14174 IX86_BUILTIN_CMPORDSS,
14175 IX86_BUILTIN_CMPUNORDSS,
14177 IX86_BUILTIN_COMIEQSS,
14178 IX86_BUILTIN_COMILTSS,
14179 IX86_BUILTIN_COMILESS,
14180 IX86_BUILTIN_COMIGTSS,
14181 IX86_BUILTIN_COMIGESS,
14182 IX86_BUILTIN_COMINEQSS,
14183 IX86_BUILTIN_UCOMIEQSS,
14184 IX86_BUILTIN_UCOMILTSS,
14185 IX86_BUILTIN_UCOMILESS,
14186 IX86_BUILTIN_UCOMIGTSS,
14187 IX86_BUILTIN_UCOMIGESS,
14188 IX86_BUILTIN_UCOMINEQSS,
14190 IX86_BUILTIN_CVTPI2PS,
14191 IX86_BUILTIN_CVTPS2PI,
14192 IX86_BUILTIN_CVTSI2SS,
14193 IX86_BUILTIN_CVTSI642SS,
14194 IX86_BUILTIN_CVTSS2SI,
14195 IX86_BUILTIN_CVTSS2SI64,
14196 IX86_BUILTIN_CVTTPS2PI,
14197 IX86_BUILTIN_CVTTSS2SI,
14198 IX86_BUILTIN_CVTTSS2SI64,
14200 IX86_BUILTIN_MAXPS,
14201 IX86_BUILTIN_MAXSS,
14202 IX86_BUILTIN_MINPS,
14203 IX86_BUILTIN_MINSS,
14205 IX86_BUILTIN_LOADUPS,
14206 IX86_BUILTIN_STOREUPS,
14207 IX86_BUILTIN_MOVSS,
14209 IX86_BUILTIN_MOVHLPS,
14210 IX86_BUILTIN_MOVLHPS,
14211 IX86_BUILTIN_LOADHPS,
14212 IX86_BUILTIN_LOADLPS,
14213 IX86_BUILTIN_STOREHPS,
14214 IX86_BUILTIN_STORELPS,
14216 IX86_BUILTIN_MASKMOVQ,
14217 IX86_BUILTIN_MOVMSKPS,
14218 IX86_BUILTIN_PMOVMSKB,
14220 IX86_BUILTIN_MOVNTPS,
14221 IX86_BUILTIN_MOVNTQ,
14223 IX86_BUILTIN_LOADDQU,
14224 IX86_BUILTIN_STOREDQU,
14226 IX86_BUILTIN_PACKSSWB,
14227 IX86_BUILTIN_PACKSSDW,
14228 IX86_BUILTIN_PACKUSWB,
14230 IX86_BUILTIN_PADDB,
14231 IX86_BUILTIN_PADDW,
14232 IX86_BUILTIN_PADDD,
14233 IX86_BUILTIN_PADDQ,
14234 IX86_BUILTIN_PADDSB,
14235 IX86_BUILTIN_PADDSW,
14236 IX86_BUILTIN_PADDUSB,
14237 IX86_BUILTIN_PADDUSW,
14238 IX86_BUILTIN_PSUBB,
14239 IX86_BUILTIN_PSUBW,
14240 IX86_BUILTIN_PSUBD,
14241 IX86_BUILTIN_PSUBQ,
14242 IX86_BUILTIN_PSUBSB,
14243 IX86_BUILTIN_PSUBSW,
14244 IX86_BUILTIN_PSUBUSB,
14245 IX86_BUILTIN_PSUBUSW,
14248 IX86_BUILTIN_PANDN,
14252 IX86_BUILTIN_PAVGB,
14253 IX86_BUILTIN_PAVGW,
14255 IX86_BUILTIN_PCMPEQB,
14256 IX86_BUILTIN_PCMPEQW,
14257 IX86_BUILTIN_PCMPEQD,
14258 IX86_BUILTIN_PCMPGTB,
14259 IX86_BUILTIN_PCMPGTW,
14260 IX86_BUILTIN_PCMPGTD,
14262 IX86_BUILTIN_PMADDWD,
14264 IX86_BUILTIN_PMAXSW,
14265 IX86_BUILTIN_PMAXUB,
14266 IX86_BUILTIN_PMINSW,
14267 IX86_BUILTIN_PMINUB,
14269 IX86_BUILTIN_PMULHUW,
14270 IX86_BUILTIN_PMULHW,
14271 IX86_BUILTIN_PMULLW,
14273 IX86_BUILTIN_PSADBW,
14274 IX86_BUILTIN_PSHUFW,
14276 IX86_BUILTIN_PSLLW,
14277 IX86_BUILTIN_PSLLD,
14278 IX86_BUILTIN_PSLLQ,
14279 IX86_BUILTIN_PSRAW,
14280 IX86_BUILTIN_PSRAD,
14281 IX86_BUILTIN_PSRLW,
14282 IX86_BUILTIN_PSRLD,
14283 IX86_BUILTIN_PSRLQ,
14284 IX86_BUILTIN_PSLLWI,
14285 IX86_BUILTIN_PSLLDI,
14286 IX86_BUILTIN_PSLLQI,
14287 IX86_BUILTIN_PSRAWI,
14288 IX86_BUILTIN_PSRADI,
14289 IX86_BUILTIN_PSRLWI,
14290 IX86_BUILTIN_PSRLDI,
14291 IX86_BUILTIN_PSRLQI,
14293 IX86_BUILTIN_PUNPCKHBW,
14294 IX86_BUILTIN_PUNPCKHWD,
14295 IX86_BUILTIN_PUNPCKHDQ,
14296 IX86_BUILTIN_PUNPCKLBW,
14297 IX86_BUILTIN_PUNPCKLWD,
14298 IX86_BUILTIN_PUNPCKLDQ,
14300 IX86_BUILTIN_SHUFPS,
14302 IX86_BUILTIN_RCPPS,
14303 IX86_BUILTIN_RCPSS,
14304 IX86_BUILTIN_RSQRTPS,
14305 IX86_BUILTIN_RSQRTSS,
14306 IX86_BUILTIN_SQRTPS,
14307 IX86_BUILTIN_SQRTSS,
14309 IX86_BUILTIN_UNPCKHPS,
14310 IX86_BUILTIN_UNPCKLPS,
14312 IX86_BUILTIN_ANDPS,
14313 IX86_BUILTIN_ANDNPS,
14315 IX86_BUILTIN_XORPS,
14318 IX86_BUILTIN_LDMXCSR,
14319 IX86_BUILTIN_STMXCSR,
14320 IX86_BUILTIN_SFENCE,
14322 /* 3DNow! Original */
14323 IX86_BUILTIN_FEMMS,
14324 IX86_BUILTIN_PAVGUSB,
14325 IX86_BUILTIN_PF2ID,
14326 IX86_BUILTIN_PFACC,
14327 IX86_BUILTIN_PFADD,
14328 IX86_BUILTIN_PFCMPEQ,
14329 IX86_BUILTIN_PFCMPGE,
14330 IX86_BUILTIN_PFCMPGT,
14331 IX86_BUILTIN_PFMAX,
14332 IX86_BUILTIN_PFMIN,
14333 IX86_BUILTIN_PFMUL,
14334 IX86_BUILTIN_PFRCP,
14335 IX86_BUILTIN_PFRCPIT1,
14336 IX86_BUILTIN_PFRCPIT2,
14337 IX86_BUILTIN_PFRSQIT1,
14338 IX86_BUILTIN_PFRSQRT,
14339 IX86_BUILTIN_PFSUB,
14340 IX86_BUILTIN_PFSUBR,
14341 IX86_BUILTIN_PI2FD,
14342 IX86_BUILTIN_PMULHRW,
14344 /* 3DNow! Athlon Extensions */
14345 IX86_BUILTIN_PF2IW,
14346 IX86_BUILTIN_PFNACC,
14347 IX86_BUILTIN_PFPNACC,
14348 IX86_BUILTIN_PI2FW,
14349 IX86_BUILTIN_PSWAPDSI,
14350 IX86_BUILTIN_PSWAPDSF,
14353 IX86_BUILTIN_ADDPD,
14354 IX86_BUILTIN_ADDSD,
14355 IX86_BUILTIN_DIVPD,
14356 IX86_BUILTIN_DIVSD,
14357 IX86_BUILTIN_MULPD,
14358 IX86_BUILTIN_MULSD,
14359 IX86_BUILTIN_SUBPD,
14360 IX86_BUILTIN_SUBSD,
14362 IX86_BUILTIN_CMPEQPD,
14363 IX86_BUILTIN_CMPLTPD,
14364 IX86_BUILTIN_CMPLEPD,
14365 IX86_BUILTIN_CMPGTPD,
14366 IX86_BUILTIN_CMPGEPD,
14367 IX86_BUILTIN_CMPNEQPD,
14368 IX86_BUILTIN_CMPNLTPD,
14369 IX86_BUILTIN_CMPNLEPD,
14370 IX86_BUILTIN_CMPNGTPD,
14371 IX86_BUILTIN_CMPNGEPD,
14372 IX86_BUILTIN_CMPORDPD,
14373 IX86_BUILTIN_CMPUNORDPD,
14374 IX86_BUILTIN_CMPNEPD,
14375 IX86_BUILTIN_CMPEQSD,
14376 IX86_BUILTIN_CMPLTSD,
14377 IX86_BUILTIN_CMPLESD,
14378 IX86_BUILTIN_CMPNEQSD,
14379 IX86_BUILTIN_CMPNLTSD,
14380 IX86_BUILTIN_CMPNLESD,
14381 IX86_BUILTIN_CMPORDSD,
14382 IX86_BUILTIN_CMPUNORDSD,
14383 IX86_BUILTIN_CMPNESD,
14385 IX86_BUILTIN_COMIEQSD,
14386 IX86_BUILTIN_COMILTSD,
14387 IX86_BUILTIN_COMILESD,
14388 IX86_BUILTIN_COMIGTSD,
14389 IX86_BUILTIN_COMIGESD,
14390 IX86_BUILTIN_COMINEQSD,
14391 IX86_BUILTIN_UCOMIEQSD,
14392 IX86_BUILTIN_UCOMILTSD,
14393 IX86_BUILTIN_UCOMILESD,
14394 IX86_BUILTIN_UCOMIGTSD,
14395 IX86_BUILTIN_UCOMIGESD,
14396 IX86_BUILTIN_UCOMINEQSD,
14398 IX86_BUILTIN_MAXPD,
14399 IX86_BUILTIN_MAXSD,
14400 IX86_BUILTIN_MINPD,
14401 IX86_BUILTIN_MINSD,
14403 IX86_BUILTIN_ANDPD,
14404 IX86_BUILTIN_ANDNPD,
14406 IX86_BUILTIN_XORPD,
14408 IX86_BUILTIN_SQRTPD,
14409 IX86_BUILTIN_SQRTSD,
14411 IX86_BUILTIN_UNPCKHPD,
14412 IX86_BUILTIN_UNPCKLPD,
14414 IX86_BUILTIN_SHUFPD,
14416 IX86_BUILTIN_LOADUPD,
14417 IX86_BUILTIN_STOREUPD,
14418 IX86_BUILTIN_MOVSD,
14420 IX86_BUILTIN_LOADHPD,
14421 IX86_BUILTIN_LOADLPD,
14423 IX86_BUILTIN_CVTDQ2PD,
14424 IX86_BUILTIN_CVTDQ2PS,
14426 IX86_BUILTIN_CVTPD2DQ,
14427 IX86_BUILTIN_CVTPD2PI,
14428 IX86_BUILTIN_CVTPD2PS,
14429 IX86_BUILTIN_CVTTPD2DQ,
14430 IX86_BUILTIN_CVTTPD2PI,
14432 IX86_BUILTIN_CVTPI2PD,
14433 IX86_BUILTIN_CVTSI2SD,
14434 IX86_BUILTIN_CVTSI642SD,
14436 IX86_BUILTIN_CVTSD2SI,
14437 IX86_BUILTIN_CVTSD2SI64,
14438 IX86_BUILTIN_CVTSD2SS,
14439 IX86_BUILTIN_CVTSS2SD,
14440 IX86_BUILTIN_CVTTSD2SI,
14441 IX86_BUILTIN_CVTTSD2SI64,
14443 IX86_BUILTIN_CVTPS2DQ,
14444 IX86_BUILTIN_CVTPS2PD,
14445 IX86_BUILTIN_CVTTPS2DQ,
14447 IX86_BUILTIN_MOVNTI,
14448 IX86_BUILTIN_MOVNTPD,
14449 IX86_BUILTIN_MOVNTDQ,
14452 IX86_BUILTIN_MASKMOVDQU,
14453 IX86_BUILTIN_MOVMSKPD,
14454 IX86_BUILTIN_PMOVMSKB128,
14456 IX86_BUILTIN_PACKSSWB128,
14457 IX86_BUILTIN_PACKSSDW128,
14458 IX86_BUILTIN_PACKUSWB128,
14460 IX86_BUILTIN_PADDB128,
14461 IX86_BUILTIN_PADDW128,
14462 IX86_BUILTIN_PADDD128,
14463 IX86_BUILTIN_PADDQ128,
14464 IX86_BUILTIN_PADDSB128,
14465 IX86_BUILTIN_PADDSW128,
14466 IX86_BUILTIN_PADDUSB128,
14467 IX86_BUILTIN_PADDUSW128,
14468 IX86_BUILTIN_PSUBB128,
14469 IX86_BUILTIN_PSUBW128,
14470 IX86_BUILTIN_PSUBD128,
14471 IX86_BUILTIN_PSUBQ128,
14472 IX86_BUILTIN_PSUBSB128,
14473 IX86_BUILTIN_PSUBSW128,
14474 IX86_BUILTIN_PSUBUSB128,
14475 IX86_BUILTIN_PSUBUSW128,
14477 IX86_BUILTIN_PAND128,
14478 IX86_BUILTIN_PANDN128,
14479 IX86_BUILTIN_POR128,
14480 IX86_BUILTIN_PXOR128,
14482 IX86_BUILTIN_PAVGB128,
14483 IX86_BUILTIN_PAVGW128,
14485 IX86_BUILTIN_PCMPEQB128,
14486 IX86_BUILTIN_PCMPEQW128,
14487 IX86_BUILTIN_PCMPEQD128,
14488 IX86_BUILTIN_PCMPGTB128,
14489 IX86_BUILTIN_PCMPGTW128,
14490 IX86_BUILTIN_PCMPGTD128,
14492 IX86_BUILTIN_PMADDWD128,
14494 IX86_BUILTIN_PMAXSW128,
14495 IX86_BUILTIN_PMAXUB128,
14496 IX86_BUILTIN_PMINSW128,
14497 IX86_BUILTIN_PMINUB128,
14499 IX86_BUILTIN_PMULUDQ,
14500 IX86_BUILTIN_PMULUDQ128,
14501 IX86_BUILTIN_PMULHUW128,
14502 IX86_BUILTIN_PMULHW128,
14503 IX86_BUILTIN_PMULLW128,
14505 IX86_BUILTIN_PSADBW128,
14506 IX86_BUILTIN_PSHUFHW,
14507 IX86_BUILTIN_PSHUFLW,
14508 IX86_BUILTIN_PSHUFD,
14510 IX86_BUILTIN_PSLLW128,
14511 IX86_BUILTIN_PSLLD128,
14512 IX86_BUILTIN_PSLLQ128,
14513 IX86_BUILTIN_PSRAW128,
14514 IX86_BUILTIN_PSRAD128,
14515 IX86_BUILTIN_PSRLW128,
14516 IX86_BUILTIN_PSRLD128,
14517 IX86_BUILTIN_PSRLQ128,
14518 IX86_BUILTIN_PSLLDQI128,
14519 IX86_BUILTIN_PSLLWI128,
14520 IX86_BUILTIN_PSLLDI128,
14521 IX86_BUILTIN_PSLLQI128,
14522 IX86_BUILTIN_PSRAWI128,
14523 IX86_BUILTIN_PSRADI128,
14524 IX86_BUILTIN_PSRLDQI128,
14525 IX86_BUILTIN_PSRLWI128,
14526 IX86_BUILTIN_PSRLDI128,
14527 IX86_BUILTIN_PSRLQI128,
14529 IX86_BUILTIN_PUNPCKHBW128,
14530 IX86_BUILTIN_PUNPCKHWD128,
14531 IX86_BUILTIN_PUNPCKHDQ128,
14532 IX86_BUILTIN_PUNPCKHQDQ128,
14533 IX86_BUILTIN_PUNPCKLBW128,
14534 IX86_BUILTIN_PUNPCKLWD128,
14535 IX86_BUILTIN_PUNPCKLDQ128,
14536 IX86_BUILTIN_PUNPCKLQDQ128,
14538 IX86_BUILTIN_CLFLUSH,
14539 IX86_BUILTIN_MFENCE,
14540 IX86_BUILTIN_LFENCE,
14542 /* Prescott New Instructions. */
14543 IX86_BUILTIN_ADDSUBPS,
14544 IX86_BUILTIN_HADDPS,
14545 IX86_BUILTIN_HSUBPS,
14546 IX86_BUILTIN_MOVSHDUP,
14547 IX86_BUILTIN_MOVSLDUP,
14548 IX86_BUILTIN_ADDSUBPD,
14549 IX86_BUILTIN_HADDPD,
14550 IX86_BUILTIN_HSUBPD,
14551 IX86_BUILTIN_LDDQU,
14553 IX86_BUILTIN_MONITOR,
14554 IX86_BUILTIN_MWAIT,
14557 IX86_BUILTIN_PHADDW,
14558 IX86_BUILTIN_PHADDD,
14559 IX86_BUILTIN_PHADDSW,
14560 IX86_BUILTIN_PHSUBW,
14561 IX86_BUILTIN_PHSUBD,
14562 IX86_BUILTIN_PHSUBSW,
14563 IX86_BUILTIN_PMADDUBSW,
14564 IX86_BUILTIN_PMULHRSW,
14565 IX86_BUILTIN_PSHUFB,
14566 IX86_BUILTIN_PSIGNB,
14567 IX86_BUILTIN_PSIGNW,
14568 IX86_BUILTIN_PSIGND,
14569 IX86_BUILTIN_PALIGNR,
14570 IX86_BUILTIN_PABSB,
14571 IX86_BUILTIN_PABSW,
14572 IX86_BUILTIN_PABSD,
14574 IX86_BUILTIN_PHADDW128,
14575 IX86_BUILTIN_PHADDD128,
14576 IX86_BUILTIN_PHADDSW128,
14577 IX86_BUILTIN_PHSUBW128,
14578 IX86_BUILTIN_PHSUBD128,
14579 IX86_BUILTIN_PHSUBSW128,
14580 IX86_BUILTIN_PMADDUBSW128,
14581 IX86_BUILTIN_PMULHRSW128,
14582 IX86_BUILTIN_PSHUFB128,
14583 IX86_BUILTIN_PSIGNB128,
14584 IX86_BUILTIN_PSIGNW128,
14585 IX86_BUILTIN_PSIGND128,
14586 IX86_BUILTIN_PALIGNR128,
14587 IX86_BUILTIN_PABSB128,
14588 IX86_BUILTIN_PABSW128,
14589 IX86_BUILTIN_PABSD128,
14591 IX86_BUILTIN_VEC_INIT_V2SI,
14592 IX86_BUILTIN_VEC_INIT_V4HI,
14593 IX86_BUILTIN_VEC_INIT_V8QI,
14594 IX86_BUILTIN_VEC_EXT_V2DF,
14595 IX86_BUILTIN_VEC_EXT_V2DI,
14596 IX86_BUILTIN_VEC_EXT_V4SF,
14597 IX86_BUILTIN_VEC_EXT_V4SI,
14598 IX86_BUILTIN_VEC_EXT_V8HI,
14599 IX86_BUILTIN_VEC_EXT_V2SI,
14600 IX86_BUILTIN_VEC_EXT_V4HI,
14601 IX86_BUILTIN_VEC_SET_V8HI,
14602 IX86_BUILTIN_VEC_SET_V4HI,
14607 #define def_builtin(MASK, NAME, TYPE, CODE) \
14609 if ((MASK) & target_flags \
14610 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14611 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14612 NULL, NULL_TREE); \
14615 /* Bits for builtin_description.flag. */
14617 /* Set when we don't support the comparison natively, and should
14618 swap_comparison in order to support it. */
14619 #define BUILTIN_DESC_SWAP_OPERANDS 1
14621 struct builtin_description
14623 const unsigned int mask;
14624 const enum insn_code icode;
14625 const char *const name;
14626 const enum ix86_builtins code;
14627 const enum rtx_code comparison;
14628 const unsigned int flag;
14631 static const struct builtin_description bdesc_comi[] =
14633 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14634 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14635 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14636 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14637 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14638 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14639 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14640 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14641 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14642 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14643 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14644 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14645 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14646 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14647 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14648 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14649 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14650 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14651 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14652 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14653 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14654 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14655 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14656 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14659 static const struct builtin_description bdesc_2arg[] =
14662 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14663 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14664 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14665 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14666 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14667 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14668 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14669 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14671 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14672 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14673 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14674 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14675 BUILTIN_DESC_SWAP_OPERANDS },
14676 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14677 BUILTIN_DESC_SWAP_OPERANDS },
14678 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14679 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14680 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14681 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14682 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14683 BUILTIN_DESC_SWAP_OPERANDS },
14684 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14685 BUILTIN_DESC_SWAP_OPERANDS },
14686 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14687 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14688 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14689 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14690 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14691 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14692 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14693 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14694 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14695 BUILTIN_DESC_SWAP_OPERANDS },
14696 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14697 BUILTIN_DESC_SWAP_OPERANDS },
14698 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14700 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14701 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14702 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14703 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14705 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14706 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14707 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14708 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14710 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14711 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14712 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14713 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14714 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14718 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14719 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14720 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14721 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14722 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14723 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14724 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14726 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14727 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14728 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14729 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14730 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14731 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14732 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14733 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14735 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14736 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14737 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14739 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14740 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14741 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14742 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14745 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14747 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14748 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14749 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14750 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14751 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14752 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14754 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14755 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14756 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14757 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14759 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14760 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14761 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14762 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14763 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14764 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14767 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14768 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14769 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14771 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14772 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14773 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14775 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14776 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14777 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14778 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14779 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14780 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14782 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14783 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14784 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14785 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14786 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14787 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14789 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14790 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14791 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14792 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14794 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14795 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14798 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14799 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14800 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14801 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14802 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14807 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14808 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14809 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14810 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14811 BUILTIN_DESC_SWAP_OPERANDS },
14812 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14813 BUILTIN_DESC_SWAP_OPERANDS },
14814 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14815 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14818 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14819 BUILTIN_DESC_SWAP_OPERANDS },
14820 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14821 BUILTIN_DESC_SWAP_OPERANDS },
14822 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14824 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14825 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14826 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14827 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14828 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14829 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14830 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14832 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14833 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14834 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14838 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14839 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14840 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14842 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14843 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14844 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14847 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14848 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14849 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14850 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14851 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14852 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14853 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14854 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14856 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14857 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14858 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14859 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14860 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14861 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14862 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14863 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14865 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14868 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14869 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14870 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14871 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14873 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14874 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14878 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14883 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14884 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14888 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14889 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14890 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14891 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14892 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14894 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14898 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14899 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14901 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14902 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14904 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14905 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14907 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14908 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14909 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14911 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14912 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14913 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14915 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14916 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14918 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14920 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14921 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14922 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14923 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14926 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14927 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14928 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14929 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14930 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14931 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14934 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14935 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14936 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14937 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14938 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14939 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14940 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14941 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
14942 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
14943 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
14944 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
14945 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
14946 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
14947 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
14948 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
14949 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
14950 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
14951 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
14952 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
14953 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
14954 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
14955 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
14956 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
14957 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
14960 static const struct builtin_description bdesc_1arg[] =
14962 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14963 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14965 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14966 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14967 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14969 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14970 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14971 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14972 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14973 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14974 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14976 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14977 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14979 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14982 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14984 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14985 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14986 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14987 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14988 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14990 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14992 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14993 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14994 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14995 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14997 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14998 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14999 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15002 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
15003 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
15006 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
15007 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
15008 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
15009 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
15010 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
15011 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15015 ix86_init_builtins (void)
15018 ix86_init_mmx_sse_builtins ();
15021 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15022 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15025 ix86_init_mmx_sse_builtins (void)
15027 const struct builtin_description * d;
15030 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15031 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15032 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15033 tree V2DI_type_node
15034 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15035 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15036 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15037 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15038 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15039 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15040 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15042 tree pchar_type_node = build_pointer_type (char_type_node);
15043 tree pcchar_type_node = build_pointer_type (
15044 build_type_variant (char_type_node, 1, 0));
15045 tree pfloat_type_node = build_pointer_type (float_type_node);
15046 tree pcfloat_type_node = build_pointer_type (
15047 build_type_variant (float_type_node, 1, 0));
15048 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15049 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15050 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15053 tree int_ftype_v4sf_v4sf
15054 = build_function_type_list (integer_type_node,
15055 V4SF_type_node, V4SF_type_node, NULL_TREE);
15056 tree v4si_ftype_v4sf_v4sf
15057 = build_function_type_list (V4SI_type_node,
15058 V4SF_type_node, V4SF_type_node, NULL_TREE);
15059 /* MMX/SSE/integer conversions. */
15060 tree int_ftype_v4sf
15061 = build_function_type_list (integer_type_node,
15062 V4SF_type_node, NULL_TREE);
15063 tree int64_ftype_v4sf
15064 = build_function_type_list (long_long_integer_type_node,
15065 V4SF_type_node, NULL_TREE);
15066 tree int_ftype_v8qi
15067 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15068 tree v4sf_ftype_v4sf_int
15069 = build_function_type_list (V4SF_type_node,
15070 V4SF_type_node, integer_type_node, NULL_TREE);
15071 tree v4sf_ftype_v4sf_int64
15072 = build_function_type_list (V4SF_type_node,
15073 V4SF_type_node, long_long_integer_type_node,
15075 tree v4sf_ftype_v4sf_v2si
15076 = build_function_type_list (V4SF_type_node,
15077 V4SF_type_node, V2SI_type_node, NULL_TREE);
15079 /* Miscellaneous. */
15080 tree v8qi_ftype_v4hi_v4hi
15081 = build_function_type_list (V8QI_type_node,
15082 V4HI_type_node, V4HI_type_node, NULL_TREE);
15083 tree v4hi_ftype_v2si_v2si
15084 = build_function_type_list (V4HI_type_node,
15085 V2SI_type_node, V2SI_type_node, NULL_TREE);
15086 tree v4sf_ftype_v4sf_v4sf_int
15087 = build_function_type_list (V4SF_type_node,
15088 V4SF_type_node, V4SF_type_node,
15089 integer_type_node, NULL_TREE);
15090 tree v2si_ftype_v4hi_v4hi
15091 = build_function_type_list (V2SI_type_node,
15092 V4HI_type_node, V4HI_type_node, NULL_TREE);
15093 tree v4hi_ftype_v4hi_int
15094 = build_function_type_list (V4HI_type_node,
15095 V4HI_type_node, integer_type_node, NULL_TREE);
15096 tree v4hi_ftype_v4hi_di
15097 = build_function_type_list (V4HI_type_node,
15098 V4HI_type_node, long_long_unsigned_type_node,
15100 tree v2si_ftype_v2si_di
15101 = build_function_type_list (V2SI_type_node,
15102 V2SI_type_node, long_long_unsigned_type_node,
15104 tree void_ftype_void
15105 = build_function_type (void_type_node, void_list_node);
15106 tree void_ftype_unsigned
15107 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15108 tree void_ftype_unsigned_unsigned
15109 = build_function_type_list (void_type_node, unsigned_type_node,
15110 unsigned_type_node, NULL_TREE);
15111 tree void_ftype_pcvoid_unsigned_unsigned
15112 = build_function_type_list (void_type_node, const_ptr_type_node,
15113 unsigned_type_node, unsigned_type_node,
15115 tree unsigned_ftype_void
15116 = build_function_type (unsigned_type_node, void_list_node);
15117 tree v2si_ftype_v4sf
15118 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15119 /* Loads/stores. */
15120 tree void_ftype_v8qi_v8qi_pchar
15121 = build_function_type_list (void_type_node,
15122 V8QI_type_node, V8QI_type_node,
15123 pchar_type_node, NULL_TREE);
15124 tree v4sf_ftype_pcfloat
15125 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15126 /* @@@ the type is bogus */
15127 tree v4sf_ftype_v4sf_pv2si
15128 = build_function_type_list (V4SF_type_node,
15129 V4SF_type_node, pv2si_type_node, NULL_TREE);
15130 tree void_ftype_pv2si_v4sf
15131 = build_function_type_list (void_type_node,
15132 pv2si_type_node, V4SF_type_node, NULL_TREE);
15133 tree void_ftype_pfloat_v4sf
15134 = build_function_type_list (void_type_node,
15135 pfloat_type_node, V4SF_type_node, NULL_TREE);
15136 tree void_ftype_pdi_di
15137 = build_function_type_list (void_type_node,
15138 pdi_type_node, long_long_unsigned_type_node,
15140 tree void_ftype_pv2di_v2di
15141 = build_function_type_list (void_type_node,
15142 pv2di_type_node, V2DI_type_node, NULL_TREE);
15143 /* Normal vector unops. */
15144 tree v4sf_ftype_v4sf
15145 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15146 tree v16qi_ftype_v16qi
15147 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15148 tree v8hi_ftype_v8hi
15149 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15150 tree v4si_ftype_v4si
15151 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15152 tree v8qi_ftype_v8qi
15153 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15154 tree v4hi_ftype_v4hi
15155 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15157 /* Normal vector binops. */
15158 tree v4sf_ftype_v4sf_v4sf
15159 = build_function_type_list (V4SF_type_node,
15160 V4SF_type_node, V4SF_type_node, NULL_TREE);
15161 tree v8qi_ftype_v8qi_v8qi
15162 = build_function_type_list (V8QI_type_node,
15163 V8QI_type_node, V8QI_type_node, NULL_TREE);
15164 tree v4hi_ftype_v4hi_v4hi
15165 = build_function_type_list (V4HI_type_node,
15166 V4HI_type_node, V4HI_type_node, NULL_TREE);
15167 tree v2si_ftype_v2si_v2si
15168 = build_function_type_list (V2SI_type_node,
15169 V2SI_type_node, V2SI_type_node, NULL_TREE);
15170 tree di_ftype_di_di
15171 = build_function_type_list (long_long_unsigned_type_node,
15172 long_long_unsigned_type_node,
15173 long_long_unsigned_type_node, NULL_TREE);
15175 tree di_ftype_di_di_int
15176 = build_function_type_list (long_long_unsigned_type_node,
15177 long_long_unsigned_type_node,
15178 long_long_unsigned_type_node,
15179 integer_type_node, NULL_TREE);
15181 tree v2si_ftype_v2sf
15182 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15183 tree v2sf_ftype_v2si
15184 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15185 tree v2si_ftype_v2si
15186 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15187 tree v2sf_ftype_v2sf
15188 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15189 tree v2sf_ftype_v2sf_v2sf
15190 = build_function_type_list (V2SF_type_node,
15191 V2SF_type_node, V2SF_type_node, NULL_TREE);
15192 tree v2si_ftype_v2sf_v2sf
15193 = build_function_type_list (V2SI_type_node,
15194 V2SF_type_node, V2SF_type_node, NULL_TREE);
15195 tree pint_type_node = build_pointer_type (integer_type_node);
15196 tree pdouble_type_node = build_pointer_type (double_type_node);
15197 tree pcdouble_type_node = build_pointer_type (
15198 build_type_variant (double_type_node, 1, 0));
15199 tree int_ftype_v2df_v2df
15200 = build_function_type_list (integer_type_node,
15201 V2DF_type_node, V2DF_type_node, NULL_TREE);
15203 tree void_ftype_pcvoid
15204 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15205 tree v4sf_ftype_v4si
15206 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15207 tree v4si_ftype_v4sf
15208 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15209 tree v2df_ftype_v4si
15210 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15211 tree v4si_ftype_v2df
15212 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15213 tree v2si_ftype_v2df
15214 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15215 tree v4sf_ftype_v2df
15216 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15217 tree v2df_ftype_v2si
15218 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15219 tree v2df_ftype_v4sf
15220 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15221 tree int_ftype_v2df
15222 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15223 tree int64_ftype_v2df
15224 = build_function_type_list (long_long_integer_type_node,
15225 V2DF_type_node, NULL_TREE);
15226 tree v2df_ftype_v2df_int
15227 = build_function_type_list (V2DF_type_node,
15228 V2DF_type_node, integer_type_node, NULL_TREE);
15229 tree v2df_ftype_v2df_int64
15230 = build_function_type_list (V2DF_type_node,
15231 V2DF_type_node, long_long_integer_type_node,
15233 tree v4sf_ftype_v4sf_v2df
15234 = build_function_type_list (V4SF_type_node,
15235 V4SF_type_node, V2DF_type_node, NULL_TREE);
15236 tree v2df_ftype_v2df_v4sf
15237 = build_function_type_list (V2DF_type_node,
15238 V2DF_type_node, V4SF_type_node, NULL_TREE);
15239 tree v2df_ftype_v2df_v2df_int
15240 = build_function_type_list (V2DF_type_node,
15241 V2DF_type_node, V2DF_type_node,
15244 tree v2df_ftype_v2df_pcdouble
15245 = build_function_type_list (V2DF_type_node,
15246 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15247 tree void_ftype_pdouble_v2df
15248 = build_function_type_list (void_type_node,
15249 pdouble_type_node, V2DF_type_node, NULL_TREE);
15250 tree void_ftype_pint_int
15251 = build_function_type_list (void_type_node,
15252 pint_type_node, integer_type_node, NULL_TREE);
15253 tree void_ftype_v16qi_v16qi_pchar
15254 = build_function_type_list (void_type_node,
15255 V16QI_type_node, V16QI_type_node,
15256 pchar_type_node, NULL_TREE);
15257 tree v2df_ftype_pcdouble
15258 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15259 tree v2df_ftype_v2df_v2df
15260 = build_function_type_list (V2DF_type_node,
15261 V2DF_type_node, V2DF_type_node, NULL_TREE);
15262 tree v16qi_ftype_v16qi_v16qi
15263 = build_function_type_list (V16QI_type_node,
15264 V16QI_type_node, V16QI_type_node, NULL_TREE);
15265 tree v8hi_ftype_v8hi_v8hi
15266 = build_function_type_list (V8HI_type_node,
15267 V8HI_type_node, V8HI_type_node, NULL_TREE);
15268 tree v4si_ftype_v4si_v4si
15269 = build_function_type_list (V4SI_type_node,
15270 V4SI_type_node, V4SI_type_node, NULL_TREE);
15271 tree v2di_ftype_v2di_v2di
15272 = build_function_type_list (V2DI_type_node,
15273 V2DI_type_node, V2DI_type_node, NULL_TREE);
15274 tree v2di_ftype_v2df_v2df
15275 = build_function_type_list (V2DI_type_node,
15276 V2DF_type_node, V2DF_type_node, NULL_TREE);
15277 tree v2df_ftype_v2df
15278 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15279 tree v2di_ftype_v2di_int
15280 = build_function_type_list (V2DI_type_node,
15281 V2DI_type_node, integer_type_node, NULL_TREE);
15282 tree v2di_ftype_v2di_v2di_int
15283 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15284 V2DI_type_node, integer_type_node, NULL_TREE);
15285 tree v4si_ftype_v4si_int
15286 = build_function_type_list (V4SI_type_node,
15287 V4SI_type_node, integer_type_node, NULL_TREE);
15288 tree v8hi_ftype_v8hi_int
15289 = build_function_type_list (V8HI_type_node,
15290 V8HI_type_node, integer_type_node, NULL_TREE);
15291 tree v8hi_ftype_v8hi_v2di
15292 = build_function_type_list (V8HI_type_node,
15293 V8HI_type_node, V2DI_type_node, NULL_TREE);
15294 tree v4si_ftype_v4si_v2di
15295 = build_function_type_list (V4SI_type_node,
15296 V4SI_type_node, V2DI_type_node, NULL_TREE);
15297 tree v4si_ftype_v8hi_v8hi
15298 = build_function_type_list (V4SI_type_node,
15299 V8HI_type_node, V8HI_type_node, NULL_TREE);
15300 tree di_ftype_v8qi_v8qi
15301 = build_function_type_list (long_long_unsigned_type_node,
15302 V8QI_type_node, V8QI_type_node, NULL_TREE);
15303 tree di_ftype_v2si_v2si
15304 = build_function_type_list (long_long_unsigned_type_node,
15305 V2SI_type_node, V2SI_type_node, NULL_TREE);
15306 tree v2di_ftype_v16qi_v16qi
15307 = build_function_type_list (V2DI_type_node,
15308 V16QI_type_node, V16QI_type_node, NULL_TREE);
15309 tree v2di_ftype_v4si_v4si
15310 = build_function_type_list (V2DI_type_node,
15311 V4SI_type_node, V4SI_type_node, NULL_TREE);
15312 tree int_ftype_v16qi
15313 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15314 tree v16qi_ftype_pcchar
15315 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15316 tree void_ftype_pchar_v16qi
15317 = build_function_type_list (void_type_node,
15318 pchar_type_node, V16QI_type_node, NULL_TREE);
15321 tree float128_type;
15324 /* The __float80 type. */
15325 if (TYPE_MODE (long_double_type_node) == XFmode)
15326 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15330 /* The __float80 type. */
15331 float80_type = make_node (REAL_TYPE);
15332 TYPE_PRECISION (float80_type) = 80;
15333 layout_type (float80_type);
15334 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15339 float128_type = make_node (REAL_TYPE);
15340 TYPE_PRECISION (float128_type) = 128;
15341 layout_type (float128_type);
15342 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15345 /* Add all builtins that are more or less simple operations on two
15347 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15349 /* Use one of the operands; the target can have a different mode for
15350 mask-generating compares. */
15351 enum machine_mode mode;
15356 mode = insn_data[d->icode].operand[1].mode;
15361 type = v16qi_ftype_v16qi_v16qi;
15364 type = v8hi_ftype_v8hi_v8hi;
15367 type = v4si_ftype_v4si_v4si;
15370 type = v2di_ftype_v2di_v2di;
15373 type = v2df_ftype_v2df_v2df;
15376 type = v4sf_ftype_v4sf_v4sf;
15379 type = v8qi_ftype_v8qi_v8qi;
15382 type = v4hi_ftype_v4hi_v4hi;
15385 type = v2si_ftype_v2si_v2si;
15388 type = di_ftype_di_di;
15392 gcc_unreachable ();
15395 /* Override for comparisons. */
15396 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15397 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15398 type = v4si_ftype_v4sf_v4sf;
15400 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15401 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15402 type = v2di_ftype_v2df_v2df;
15404 def_builtin (d->mask, d->name, type, d->code);
15407 /* Add all builtins that are more or less simple operations on 1 operand. */
15408 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15410 enum machine_mode mode;
15415 mode = insn_data[d->icode].operand[1].mode;
15420 type = v16qi_ftype_v16qi;
15423 type = v8hi_ftype_v8hi;
15426 type = v4si_ftype_v4si;
15429 type = v2df_ftype_v2df;
15432 type = v4sf_ftype_v4sf;
15435 type = v8qi_ftype_v8qi;
15438 type = v4hi_ftype_v4hi;
15441 type = v2si_ftype_v2si;
15448 def_builtin (d->mask, d->name, type, d->code);
15451 /* Add the remaining MMX insns with somewhat more complicated types. */
15452 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15453 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15454 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15455 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15457 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15458 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15459 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15461 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15462 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15464 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15465 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15467 /* comi/ucomi insns. */
15468 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15469 if (d->mask == MASK_SSE2)
15470 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15472 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15474 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15475 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15476 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15478 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15479 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15480 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15481 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15482 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15483 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15484 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15485 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15486 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15487 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15488 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15490 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15492 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15493 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15495 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15496 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15497 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15498 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15500 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15501 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15502 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15503 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15505 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15507 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15509 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15510 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15511 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15512 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15513 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15514 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15516 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15518 /* Original 3DNow! */
15519 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15520 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15521 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15522 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15523 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15524 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15525 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15526 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15527 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15528 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15529 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15530 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15531 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15532 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15533 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15534 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15535 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15536 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15537 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15538 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15540 /* 3DNow! extension as used in the Athlon CPU. */
15541 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15542 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15543 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15544 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15545 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15546 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15549 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15551 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15552 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15554 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15555 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15557 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15558 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15559 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15560 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15561 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15563 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15564 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15565 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15566 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15568 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15569 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15571 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15574 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15576 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15577 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15578 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15579 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15580 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15582 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15585 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15586 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15587 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15589 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15590 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15591 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15593 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15594 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15595 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15596 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15598 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15599 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15600 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15602 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15603 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15605 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15606 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15608 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15609 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15610 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15612 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15613 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15614 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15616 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15617 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15619 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15620 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15621 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15622 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15624 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15625 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15626 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15627 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15629 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15630 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15632 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15634 /* Prescott New Instructions. */
15635 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15636 void_ftype_pcvoid_unsigned_unsigned,
15637 IX86_BUILTIN_MONITOR);
15638 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15639 void_ftype_unsigned_unsigned,
15640 IX86_BUILTIN_MWAIT);
15641 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15643 IX86_BUILTIN_MOVSHDUP);
15644 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15646 IX86_BUILTIN_MOVSLDUP);
15647 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15648 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15651 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15652 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15653 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15654 IX86_BUILTIN_PALIGNR);
15656 /* Access to the vec_init patterns. */
15657 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15658 integer_type_node, NULL_TREE);
15659 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15660 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15662 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15663 short_integer_type_node,
15664 short_integer_type_node,
15665 short_integer_type_node, NULL_TREE);
15666 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15667 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15669 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15670 char_type_node, char_type_node,
15671 char_type_node, char_type_node,
15672 char_type_node, char_type_node,
15673 char_type_node, NULL_TREE);
15674 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15675 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15677 /* Access to the vec_extract patterns. */
15678 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15679 integer_type_node, NULL_TREE);
15680 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15681 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15683 ftype = build_function_type_list (long_long_integer_type_node,
15684 V2DI_type_node, integer_type_node,
15686 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15687 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15689 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15690 integer_type_node, NULL_TREE);
15691 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15692 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15694 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15695 integer_type_node, NULL_TREE);
15696 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15697 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15699 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15700 integer_type_node, NULL_TREE);
15701 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15702 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15704 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15705 integer_type_node, NULL_TREE);
15706 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15707 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15709 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15710 integer_type_node, NULL_TREE);
15711 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15712 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15714 /* Access to the vec_set patterns. */
15715 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15717 integer_type_node, NULL_TREE);
15718 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15719 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15721 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15723 integer_type_node, NULL_TREE);
15724 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15725 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15728 /* Errors in the source file can cause expand_expr to return const0_rtx
15729 where we expect a vector. To avoid crashing, use one of the vector
15730 clear instructions. */
15732 safe_vector_operand (rtx x, enum machine_mode mode)
15734 if (x == const0_rtx)
15735 x = CONST0_RTX (mode);
15739 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15742 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15745 tree arg0 = TREE_VALUE (arglist);
15746 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15747 rtx op0 = expand_normal (arg0);
15748 rtx op1 = expand_normal (arg1);
15749 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15750 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15751 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15753 if (VECTOR_MODE_P (mode0))
15754 op0 = safe_vector_operand (op0, mode0);
15755 if (VECTOR_MODE_P (mode1))
15756 op1 = safe_vector_operand (op1, mode1);
15758 if (optimize || !target
15759 || GET_MODE (target) != tmode
15760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15761 target = gen_reg_rtx (tmode);
15763 if (GET_MODE (op1) == SImode && mode1 == TImode)
15765 rtx x = gen_reg_rtx (V4SImode);
15766 emit_insn (gen_sse2_loadd (x, op1));
15767 op1 = gen_lowpart (TImode, x);
15770 /* The insn must want input operands in the same modes as the
15772 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15773 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15775 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15776 op0 = copy_to_mode_reg (mode0, op0);
15777 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15778 op1 = copy_to_mode_reg (mode1, op1);
15780 /* ??? Using ix86_fixup_binary_operands is problematic when
15781 we've got mismatched modes. Fake it. */
15787 if (tmode == mode0 && tmode == mode1)
15789 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15793 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15795 op0 = force_reg (mode0, op0);
15796 op1 = force_reg (mode1, op1);
15797 target = gen_reg_rtx (tmode);
15800 pat = GEN_FCN (icode) (target, op0, op1);
15807 /* Subroutine of ix86_expand_builtin to take care of stores. */
15810 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15813 tree arg0 = TREE_VALUE (arglist);
15814 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15815 rtx op0 = expand_normal (arg0);
15816 rtx op1 = expand_normal (arg1);
15817 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15818 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15820 if (VECTOR_MODE_P (mode1))
15821 op1 = safe_vector_operand (op1, mode1);
15823 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15824 op1 = copy_to_mode_reg (mode1, op1);
15826 pat = GEN_FCN (icode) (op0, op1);
15832 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15835 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15836 rtx target, int do_load)
15839 tree arg0 = TREE_VALUE (arglist);
15840 rtx op0 = expand_normal (arg0);
15841 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15842 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15844 if (optimize || !target
15845 || GET_MODE (target) != tmode
15846 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15847 target = gen_reg_rtx (tmode);
15849 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15852 if (VECTOR_MODE_P (mode0))
15853 op0 = safe_vector_operand (op0, mode0);
15855 if ((optimize && !register_operand (op0, mode0))
15856 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15857 op0 = copy_to_mode_reg (mode0, op0);
15860 pat = GEN_FCN (icode) (target, op0);
15867 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15868 sqrtss, rsqrtss, rcpss. */
15871 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15874 tree arg0 = TREE_VALUE (arglist);
15875 rtx op1, op0 = expand_normal (arg0);
15876 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15877 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15879 if (optimize || !target
15880 || GET_MODE (target) != tmode
15881 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15882 target = gen_reg_rtx (tmode);
15884 if (VECTOR_MODE_P (mode0))
15885 op0 = safe_vector_operand (op0, mode0);
15887 if ((optimize && !register_operand (op0, mode0))
15888 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15889 op0 = copy_to_mode_reg (mode0, op0);
15892 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15893 op1 = copy_to_mode_reg (mode0, op1);
15895 pat = GEN_FCN (icode) (target, op0, op1);
15902 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15905 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15909 tree arg0 = TREE_VALUE (arglist);
15910 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15911 rtx op0 = expand_normal (arg0);
15912 rtx op1 = expand_normal (arg1);
15914 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15915 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15916 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15917 enum rtx_code comparison = d->comparison;
15919 if (VECTOR_MODE_P (mode0))
15920 op0 = safe_vector_operand (op0, mode0);
15921 if (VECTOR_MODE_P (mode1))
15922 op1 = safe_vector_operand (op1, mode1);
15924 /* Swap operands if we have a comparison that isn't available in
15926 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15928 rtx tmp = gen_reg_rtx (mode1);
15929 emit_move_insn (tmp, op1);
15934 if (optimize || !target
15935 || GET_MODE (target) != tmode
15936 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15937 target = gen_reg_rtx (tmode);
15939 if ((optimize && !register_operand (op0, mode0))
15940 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15941 op0 = copy_to_mode_reg (mode0, op0);
15942 if ((optimize && !register_operand (op1, mode1))
15943 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15944 op1 = copy_to_mode_reg (mode1, op1);
15946 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15947 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15954 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15957 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15961 tree arg0 = TREE_VALUE (arglist);
15962 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15963 rtx op0 = expand_normal (arg0);
15964 rtx op1 = expand_normal (arg1);
15966 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15967 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15968 enum rtx_code comparison = d->comparison;
15970 if (VECTOR_MODE_P (mode0))
15971 op0 = safe_vector_operand (op0, mode0);
15972 if (VECTOR_MODE_P (mode1))
15973 op1 = safe_vector_operand (op1, mode1);
15975 /* Swap operands if we have a comparison that isn't available in
15977 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15984 target = gen_reg_rtx (SImode);
15985 emit_move_insn (target, const0_rtx);
15986 target = gen_rtx_SUBREG (QImode, target, 0);
15988 if ((optimize && !register_operand (op0, mode0))
15989 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15990 op0 = copy_to_mode_reg (mode0, op0);
15991 if ((optimize && !register_operand (op1, mode1))
15992 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15993 op1 = copy_to_mode_reg (mode1, op1);
15995 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15996 pat = GEN_FCN (d->icode) (op0, op1);
16000 emit_insn (gen_rtx_SET (VOIDmode,
16001 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16002 gen_rtx_fmt_ee (comparison, QImode,
16006 return SUBREG_REG (target);
16009 /* Return the integer constant in ARG. Constrain it to be in the range
16010 of the subparts of VEC_TYPE; issue an error if not. */
16013 get_element_number (tree vec_type, tree arg)
16015 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16017 if (!host_integerp (arg, 1)
16018 || (elt = tree_low_cst (arg, 1), elt > max))
16020 error ("selector must be an integer constant in the range 0..%wi", max);
16027 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16028 ix86_expand_vector_init. We DO have language-level syntax for this, in
16029 the form of (type){ init-list }. Except that since we can't place emms
16030 instructions from inside the compiler, we can't allow the use of MMX
16031 registers unless the user explicitly asks for it. So we do *not* define
16032 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16033 we have builtins invoked by mmintrin.h that gives us license to emit
16034 these sorts of instructions. */
16037 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16039 enum machine_mode tmode = TYPE_MODE (type);
16040 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16041 int i, n_elt = GET_MODE_NUNITS (tmode);
16042 rtvec v = rtvec_alloc (n_elt);
16044 gcc_assert (VECTOR_MODE_P (tmode));
16046 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16048 rtx x = expand_normal (TREE_VALUE (arglist));
16049 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16052 gcc_assert (arglist == NULL);
16054 if (!target || !register_operand (target, tmode))
16055 target = gen_reg_rtx (tmode);
16057 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16061 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16062 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16063 had a language-level syntax for referencing vector elements. */
16066 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16068 enum machine_mode tmode, mode0;
16073 arg0 = TREE_VALUE (arglist);
16074 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16076 op0 = expand_normal (arg0);
16077 elt = get_element_number (TREE_TYPE (arg0), arg1);
16079 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16080 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16081 gcc_assert (VECTOR_MODE_P (mode0));
16083 op0 = force_reg (mode0, op0);
16085 if (optimize || !target || !register_operand (target, tmode))
16086 target = gen_reg_rtx (tmode);
16088 ix86_expand_vector_extract (true, target, op0, elt);
16093 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16094 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16095 a language-level syntax for referencing vector elements. */
16098 ix86_expand_vec_set_builtin (tree arglist)
16100 enum machine_mode tmode, mode1;
16101 tree arg0, arg1, arg2;
16105 arg0 = TREE_VALUE (arglist);
16106 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16107 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16109 tmode = TYPE_MODE (TREE_TYPE (arg0));
16110 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16111 gcc_assert (VECTOR_MODE_P (tmode));
16113 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16114 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16115 elt = get_element_number (TREE_TYPE (arg0), arg2);
16117 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16118 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16120 op0 = force_reg (tmode, op0);
16121 op1 = force_reg (mode1, op1);
16123 ix86_expand_vector_set (true, op0, op1, elt);
16128 /* Expand an expression EXP that calls a built-in function,
16129 with result going to TARGET if that's convenient
16130 (and in mode MODE if that's convenient).
16131 SUBTARGET may be used as the target for computing one of EXP's operands.
16132 IGNORE is nonzero if the value is to be ignored. */
16135 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16136 enum machine_mode mode ATTRIBUTE_UNUSED,
16137 int ignore ATTRIBUTE_UNUSED)
16139 const struct builtin_description *d;
16141 enum insn_code icode;
16142 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16143 tree arglist = TREE_OPERAND (exp, 1);
16144 tree arg0, arg1, arg2;
16145 rtx op0, op1, op2, pat;
16146 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16147 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16151 case IX86_BUILTIN_EMMS:
16152 emit_insn (gen_mmx_emms ());
16155 case IX86_BUILTIN_SFENCE:
16156 emit_insn (gen_sse_sfence ());
16159 case IX86_BUILTIN_MASKMOVQ:
16160 case IX86_BUILTIN_MASKMOVDQU:
16161 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16162 ? CODE_FOR_mmx_maskmovq
16163 : CODE_FOR_sse2_maskmovdqu);
16164 /* Note the arg order is different from the operand order. */
16165 arg1 = TREE_VALUE (arglist);
16166 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16167 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16168 op0 = expand_normal (arg0);
16169 op1 = expand_normal (arg1);
16170 op2 = expand_normal (arg2);
16171 mode0 = insn_data[icode].operand[0].mode;
16172 mode1 = insn_data[icode].operand[1].mode;
16173 mode2 = insn_data[icode].operand[2].mode;
16175 op0 = force_reg (Pmode, op0);
16176 op0 = gen_rtx_MEM (mode1, op0);
16178 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16179 op0 = copy_to_mode_reg (mode0, op0);
16180 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16181 op1 = copy_to_mode_reg (mode1, op1);
16182 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16183 op2 = copy_to_mode_reg (mode2, op2);
16184 pat = GEN_FCN (icode) (op0, op1, op2);
16190 case IX86_BUILTIN_SQRTSS:
16191 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16192 case IX86_BUILTIN_RSQRTSS:
16193 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16194 case IX86_BUILTIN_RCPSS:
16195 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16197 case IX86_BUILTIN_LOADUPS:
16198 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16200 case IX86_BUILTIN_STOREUPS:
16201 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16203 case IX86_BUILTIN_LOADHPS:
16204 case IX86_BUILTIN_LOADLPS:
16205 case IX86_BUILTIN_LOADHPD:
16206 case IX86_BUILTIN_LOADLPD:
16207 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16208 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16209 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16210 : CODE_FOR_sse2_loadlpd);
16211 arg0 = TREE_VALUE (arglist);
16212 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16213 op0 = expand_normal (arg0);
16214 op1 = expand_normal (arg1);
16215 tmode = insn_data[icode].operand[0].mode;
16216 mode0 = insn_data[icode].operand[1].mode;
16217 mode1 = insn_data[icode].operand[2].mode;
16219 op0 = force_reg (mode0, op0);
16220 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16221 if (optimize || target == 0
16222 || GET_MODE (target) != tmode
16223 || !register_operand (target, tmode))
16224 target = gen_reg_rtx (tmode);
16225 pat = GEN_FCN (icode) (target, op0, op1);
16231 case IX86_BUILTIN_STOREHPS:
16232 case IX86_BUILTIN_STORELPS:
16233 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16234 : CODE_FOR_sse_storelps);
16235 arg0 = TREE_VALUE (arglist);
16236 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16237 op0 = expand_normal (arg0);
16238 op1 = expand_normal (arg1);
16239 mode0 = insn_data[icode].operand[0].mode;
16240 mode1 = insn_data[icode].operand[1].mode;
16242 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16243 op1 = force_reg (mode1, op1);
16245 pat = GEN_FCN (icode) (op0, op1);
16251 case IX86_BUILTIN_MOVNTPS:
16252 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16253 case IX86_BUILTIN_MOVNTQ:
16254 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16256 case IX86_BUILTIN_LDMXCSR:
16257 op0 = expand_normal (TREE_VALUE (arglist));
16258 target = assign_386_stack_local (SImode, SLOT_TEMP);
16259 emit_move_insn (target, op0);
16260 emit_insn (gen_sse_ldmxcsr (target));
16263 case IX86_BUILTIN_STMXCSR:
16264 target = assign_386_stack_local (SImode, SLOT_TEMP);
16265 emit_insn (gen_sse_stmxcsr (target));
16266 return copy_to_mode_reg (SImode, target);
16268 case IX86_BUILTIN_SHUFPS:
16269 case IX86_BUILTIN_SHUFPD:
16270 icode = (fcode == IX86_BUILTIN_SHUFPS
16271 ? CODE_FOR_sse_shufps
16272 : CODE_FOR_sse2_shufpd);
16273 arg0 = TREE_VALUE (arglist);
16274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16275 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16276 op0 = expand_normal (arg0);
16277 op1 = expand_normal (arg1);
16278 op2 = expand_normal (arg2);
16279 tmode = insn_data[icode].operand[0].mode;
16280 mode0 = insn_data[icode].operand[1].mode;
16281 mode1 = insn_data[icode].operand[2].mode;
16282 mode2 = insn_data[icode].operand[3].mode;
16284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16285 op0 = copy_to_mode_reg (mode0, op0);
16286 if ((optimize && !register_operand (op1, mode1))
16287 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16288 op1 = copy_to_mode_reg (mode1, op1);
16289 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16291 /* @@@ better error message */
16292 error ("mask must be an immediate");
16293 return gen_reg_rtx (tmode);
16295 if (optimize || target == 0
16296 || GET_MODE (target) != tmode
16297 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16298 target = gen_reg_rtx (tmode);
16299 pat = GEN_FCN (icode) (target, op0, op1, op2);
16305 case IX86_BUILTIN_PSHUFW:
16306 case IX86_BUILTIN_PSHUFD:
16307 case IX86_BUILTIN_PSHUFHW:
16308 case IX86_BUILTIN_PSHUFLW:
16309 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16310 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16311 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16312 : CODE_FOR_mmx_pshufw);
16313 arg0 = TREE_VALUE (arglist);
16314 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16315 op0 = expand_normal (arg0);
16316 op1 = expand_normal (arg1);
16317 tmode = insn_data[icode].operand[0].mode;
16318 mode1 = insn_data[icode].operand[1].mode;
16319 mode2 = insn_data[icode].operand[2].mode;
16321 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16322 op0 = copy_to_mode_reg (mode1, op0);
16323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16325 /* @@@ better error message */
16326 error ("mask must be an immediate");
16330 || GET_MODE (target) != tmode
16331 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16332 target = gen_reg_rtx (tmode);
16333 pat = GEN_FCN (icode) (target, op0, op1);
16339 case IX86_BUILTIN_PSLLDQI128:
16340 case IX86_BUILTIN_PSRLDQI128:
16341 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16342 : CODE_FOR_sse2_lshrti3);
16343 arg0 = TREE_VALUE (arglist);
16344 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16345 op0 = expand_normal (arg0);
16346 op1 = expand_normal (arg1);
16347 tmode = insn_data[icode].operand[0].mode;
16348 mode1 = insn_data[icode].operand[1].mode;
16349 mode2 = insn_data[icode].operand[2].mode;
16351 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16353 op0 = copy_to_reg (op0);
16354 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16356 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16358 error ("shift must be an immediate");
16361 target = gen_reg_rtx (V2DImode);
16362 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16368 case IX86_BUILTIN_FEMMS:
16369 emit_insn (gen_mmx_femms ());
16372 case IX86_BUILTIN_PAVGUSB:
16373 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16375 case IX86_BUILTIN_PF2ID:
16376 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16378 case IX86_BUILTIN_PFACC:
16379 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16381 case IX86_BUILTIN_PFADD:
16382 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16384 case IX86_BUILTIN_PFCMPEQ:
16385 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16387 case IX86_BUILTIN_PFCMPGE:
16388 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16390 case IX86_BUILTIN_PFCMPGT:
16391 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16393 case IX86_BUILTIN_PFMAX:
16394 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16396 case IX86_BUILTIN_PFMIN:
16397 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16399 case IX86_BUILTIN_PFMUL:
16400 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16402 case IX86_BUILTIN_PFRCP:
16403 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16405 case IX86_BUILTIN_PFRCPIT1:
16406 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16408 case IX86_BUILTIN_PFRCPIT2:
16409 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16411 case IX86_BUILTIN_PFRSQIT1:
16412 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16414 case IX86_BUILTIN_PFRSQRT:
16415 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16417 case IX86_BUILTIN_PFSUB:
16418 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16420 case IX86_BUILTIN_PFSUBR:
16421 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16423 case IX86_BUILTIN_PI2FD:
16424 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16426 case IX86_BUILTIN_PMULHRW:
16427 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16429 case IX86_BUILTIN_PF2IW:
16430 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16432 case IX86_BUILTIN_PFNACC:
16433 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16435 case IX86_BUILTIN_PFPNACC:
16436 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16438 case IX86_BUILTIN_PI2FW:
16439 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16441 case IX86_BUILTIN_PSWAPDSI:
16442 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16444 case IX86_BUILTIN_PSWAPDSF:
16445 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16447 case IX86_BUILTIN_SQRTSD:
16448 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16449 case IX86_BUILTIN_LOADUPD:
16450 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16451 case IX86_BUILTIN_STOREUPD:
16452 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16454 case IX86_BUILTIN_MFENCE:
16455 emit_insn (gen_sse2_mfence ());
16457 case IX86_BUILTIN_LFENCE:
16458 emit_insn (gen_sse2_lfence ());
16461 case IX86_BUILTIN_CLFLUSH:
16462 arg0 = TREE_VALUE (arglist);
16463 op0 = expand_normal (arg0);
16464 icode = CODE_FOR_sse2_clflush;
16465 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16466 op0 = copy_to_mode_reg (Pmode, op0);
16468 emit_insn (gen_sse2_clflush (op0));
16471 case IX86_BUILTIN_MOVNTPD:
16472 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16473 case IX86_BUILTIN_MOVNTDQ:
16474 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16475 case IX86_BUILTIN_MOVNTI:
16476 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16478 case IX86_BUILTIN_LOADDQU:
16479 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16480 case IX86_BUILTIN_STOREDQU:
16481 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16483 case IX86_BUILTIN_MONITOR:
16484 arg0 = TREE_VALUE (arglist);
16485 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16486 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16487 op0 = expand_normal (arg0);
16488 op1 = expand_normal (arg1);
16489 op2 = expand_normal (arg2);
16491 op0 = copy_to_mode_reg (Pmode, op0);
16493 op1 = copy_to_mode_reg (SImode, op1);
16495 op2 = copy_to_mode_reg (SImode, op2);
16497 emit_insn (gen_sse3_monitor (op0, op1, op2));
16499 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16502 case IX86_BUILTIN_MWAIT:
16503 arg0 = TREE_VALUE (arglist);
16504 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16505 op0 = expand_normal (arg0);
16506 op1 = expand_normal (arg1);
16508 op0 = copy_to_mode_reg (SImode, op0);
16510 op1 = copy_to_mode_reg (SImode, op1);
16511 emit_insn (gen_sse3_mwait (op0, op1));
16514 case IX86_BUILTIN_LDDQU:
16515 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16518 case IX86_BUILTIN_PALIGNR:
16519 case IX86_BUILTIN_PALIGNR128:
16520 if (fcode == IX86_BUILTIN_PALIGNR)
16522 icode = CODE_FOR_ssse3_palignrdi;
16527 icode = CODE_FOR_ssse3_palignrti;
16530 arg0 = TREE_VALUE (arglist);
16531 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16532 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16533 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16534 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16535 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16536 tmode = insn_data[icode].operand[0].mode;
16537 mode1 = insn_data[icode].operand[1].mode;
16538 mode2 = insn_data[icode].operand[2].mode;
16539 mode3 = insn_data[icode].operand[3].mode;
16541 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16543 op0 = copy_to_reg (op0);
16544 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16546 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16548 op1 = copy_to_reg (op1);
16549 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16551 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16553 error ("shift must be an immediate");
16556 target = gen_reg_rtx (mode);
16557 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16564 case IX86_BUILTIN_VEC_INIT_V2SI:
16565 case IX86_BUILTIN_VEC_INIT_V4HI:
16566 case IX86_BUILTIN_VEC_INIT_V8QI:
16567 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16569 case IX86_BUILTIN_VEC_EXT_V2DF:
16570 case IX86_BUILTIN_VEC_EXT_V2DI:
16571 case IX86_BUILTIN_VEC_EXT_V4SF:
16572 case IX86_BUILTIN_VEC_EXT_V4SI:
16573 case IX86_BUILTIN_VEC_EXT_V8HI:
16574 case IX86_BUILTIN_VEC_EXT_V2SI:
16575 case IX86_BUILTIN_VEC_EXT_V4HI:
16576 return ix86_expand_vec_ext_builtin (arglist, target);
16578 case IX86_BUILTIN_VEC_SET_V8HI:
16579 case IX86_BUILTIN_VEC_SET_V4HI:
16580 return ix86_expand_vec_set_builtin (arglist);
16586 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16587 if (d->code == fcode)
16589 /* Compares are treated specially. */
16590 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16591 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16592 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16593 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16594 return ix86_expand_sse_compare (d, arglist, target);
16596 return ix86_expand_binop_builtin (d->icode, arglist, target);
16599 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16600 if (d->code == fcode)
16601 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16603 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16604 if (d->code == fcode)
16605 return ix86_expand_sse_comi (d, arglist, target);
16607 gcc_unreachable ();
16610 /* Store OPERAND to the memory after reload is completed. This means
16611 that we can't easily use assign_stack_local. */
16613 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16617 gcc_assert (reload_completed);
16618 if (TARGET_RED_ZONE)
16620 result = gen_rtx_MEM (mode,
16621 gen_rtx_PLUS (Pmode,
16623 GEN_INT (-RED_ZONE_SIZE)));
16624 emit_move_insn (result, operand);
16626 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16632 operand = gen_lowpart (DImode, operand);
16636 gen_rtx_SET (VOIDmode,
16637 gen_rtx_MEM (DImode,
16638 gen_rtx_PRE_DEC (DImode,
16639 stack_pointer_rtx)),
16643 gcc_unreachable ();
16645 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16654 split_di (&operand, 1, operands, operands + 1);
16656 gen_rtx_SET (VOIDmode,
16657 gen_rtx_MEM (SImode,
16658 gen_rtx_PRE_DEC (Pmode,
16659 stack_pointer_rtx)),
16662 gen_rtx_SET (VOIDmode,
16663 gen_rtx_MEM (SImode,
16664 gen_rtx_PRE_DEC (Pmode,
16665 stack_pointer_rtx)),
16670 /* Store HImodes as SImodes. */
16671 operand = gen_lowpart (SImode, operand);
16675 gen_rtx_SET (VOIDmode,
16676 gen_rtx_MEM (GET_MODE (operand),
16677 gen_rtx_PRE_DEC (SImode,
16678 stack_pointer_rtx)),
16682 gcc_unreachable ();
16684 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16689 /* Free operand from the memory. */
16691 ix86_free_from_memory (enum machine_mode mode)
16693 if (!TARGET_RED_ZONE)
16697 if (mode == DImode || TARGET_64BIT)
16701 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16702 to pop or add instruction if registers are available. */
16703 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16704 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16709 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16710 QImode must go into class Q_REGS.
16711 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16712 movdf to do mem-to-mem moves through integer regs. */
16714 ix86_preferred_reload_class (rtx x, enum reg_class class)
16716 enum machine_mode mode = GET_MODE (x);
16718 /* We're only allowed to return a subclass of CLASS. Many of the
16719 following checks fail for NO_REGS, so eliminate that early. */
16720 if (class == NO_REGS)
16723 /* All classes can load zeros. */
16724 if (x == CONST0_RTX (mode))
16727 /* Force constants into memory if we are loading a (nonzero) constant into
16728 an MMX or SSE register. This is because there are no MMX/SSE instructions
16729 to load from a constant. */
16731 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16734 /* Prefer SSE regs only, if we can use them for math. */
16735 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16736 return SSE_CLASS_P (class) ? class : NO_REGS;
16738 /* Floating-point constants need more complex checks. */
16739 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16741 /* General regs can load everything. */
16742 if (reg_class_subset_p (class, GENERAL_REGS))
16745 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16746 zero above. We only want to wind up preferring 80387 registers if
16747 we plan on doing computation with them. */
16749 && standard_80387_constant_p (x))
16751 /* Limit class to non-sse. */
16752 if (class == FLOAT_SSE_REGS)
16754 if (class == FP_TOP_SSE_REGS)
16756 if (class == FP_SECOND_SSE_REGS)
16757 return FP_SECOND_REG;
16758 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16765 /* Generally when we see PLUS here, it's the function invariant
16766 (plus soft-fp const_int). Which can only be computed into general
16768 if (GET_CODE (x) == PLUS)
16769 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16771 /* QImode constants are easy to load, but non-constant QImode data
16772 must go into Q_REGS. */
16773 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16775 if (reg_class_subset_p (class, Q_REGS))
16777 if (reg_class_subset_p (Q_REGS, class))
16785 /* Discourage putting floating-point values in SSE registers unless
16786 SSE math is being used, and likewise for the 387 registers. */
16788 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16790 enum machine_mode mode = GET_MODE (x);
16792 /* Restrict the output reload class to the register bank that we are doing
16793 math on. If we would like not to return a subset of CLASS, reject this
16794 alternative: if reload cannot do this, it will still use its choice. */
16795 mode = GET_MODE (x);
16796 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16797 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16799 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16801 if (class == FP_TOP_SSE_REGS)
16803 else if (class == FP_SECOND_SSE_REGS)
16804 return FP_SECOND_REG;
16806 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16812 /* If we are copying between general and FP registers, we need a memory
16813 location. The same is true for SSE and MMX registers.
16815 The macro can't work reliably when one of the CLASSES is class containing
16816 registers from multiple units (SSE, MMX, integer). We avoid this by never
16817 combining those units in single alternative in the machine description.
16818 Ensure that this constraint holds to avoid unexpected surprises.
16820 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16821 enforce these sanity checks. */
16824 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16825 enum machine_mode mode, int strict)
16827 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16828 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16829 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16830 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16831 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16832 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16834 gcc_assert (!strict);
16838 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16841 /* ??? This is a lie. We do have moves between mmx/general, and for
16842 mmx/sse2. But by saying we need secondary memory we discourage the
16843 register allocator from using the mmx registers unless needed. */
16844 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16847 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16849 /* SSE1 doesn't have any direct moves from other classes. */
16853 /* If the target says that inter-unit moves are more expensive
16854 than moving through memory, then don't generate them. */
16855 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16858 /* Between SSE and general, we have moves no larger than word size. */
16859 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16862 /* ??? For the cost of one register reformat penalty, we could use
16863 the same instructions to move SFmode and DFmode data, but the
16864 relevant move patterns don't support those alternatives. */
16865 if (mode == SFmode || mode == DFmode)
16872 /* Return true if the registers in CLASS cannot represent the change from
16873 modes FROM to TO. */
16876 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16877 enum reg_class class)
16882 /* x87 registers can't do subreg at all, as all values are reformatted
16883 to extended precision. */
16884 if (MAYBE_FLOAT_CLASS_P (class))
16887 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16889 /* Vector registers do not support QI or HImode loads. If we don't
16890 disallow a change to these modes, reload will assume it's ok to
16891 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16892 the vec_dupv4hi pattern. */
16893 if (GET_MODE_SIZE (from) < 4)
16896 /* Vector registers do not support subreg with nonzero offsets, which
16897 are otherwise valid for integer registers. Since we can't see
16898 whether we have a nonzero offset from here, prohibit all
16899 nonparadoxical subregs changing size. */
16900 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16907 /* Return the cost of moving data from a register in class CLASS1 to
16908 one in class CLASS2.
16910 It is not required that the cost always equal 2 when FROM is the same as TO;
16911 on some machines it is expensive to move between registers if they are not
16912 general registers. */
16915 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16916 enum reg_class class2)
16918 /* In case we require secondary memory, compute cost of the store followed
16919 by load. In order to avoid bad register allocation choices, we need
16920 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16922 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16926 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16927 MEMORY_MOVE_COST (mode, class1, 1));
16928 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16929 MEMORY_MOVE_COST (mode, class2, 1));
16931 /* In case of copying from general_purpose_register we may emit multiple
16932 stores followed by single load causing memory size mismatch stall.
16933 Count this as arbitrarily high cost of 20. */
16934 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16937 /* In the case of FP/MMX moves, the registers actually overlap, and we
16938 have to switch modes in order to treat them differently. */
16939 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16940 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16946 /* Moves between SSE/MMX and integer unit are expensive. */
16947 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16948 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16949 return ix86_cost->mmxsse_to_integer;
16950 if (MAYBE_FLOAT_CLASS_P (class1))
16951 return ix86_cost->fp_move;
16952 if (MAYBE_SSE_CLASS_P (class1))
16953 return ix86_cost->sse_move;
16954 if (MAYBE_MMX_CLASS_P (class1))
16955 return ix86_cost->mmx_move;
16959 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16962 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16964 /* Flags and only flags can only hold CCmode values. */
16965 if (CC_REGNO_P (regno))
16966 return GET_MODE_CLASS (mode) == MODE_CC;
16967 if (GET_MODE_CLASS (mode) == MODE_CC
16968 || GET_MODE_CLASS (mode) == MODE_RANDOM
16969 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16971 if (FP_REGNO_P (regno))
16972 return VALID_FP_MODE_P (mode);
16973 if (SSE_REGNO_P (regno))
16975 /* We implement the move patterns for all vector modes into and
16976 out of SSE registers, even when no operation instructions
16978 return (VALID_SSE_REG_MODE (mode)
16979 || VALID_SSE2_REG_MODE (mode)
16980 || VALID_MMX_REG_MODE (mode)
16981 || VALID_MMX_REG_MODE_3DNOW (mode));
16983 if (MMX_REGNO_P (regno))
16985 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16986 so if the register is available at all, then we can move data of
16987 the given mode into or out of it. */
16988 return (VALID_MMX_REG_MODE (mode)
16989 || VALID_MMX_REG_MODE_3DNOW (mode));
16992 if (mode == QImode)
16994 /* Take care for QImode values - they can be in non-QI regs,
16995 but then they do cause partial register stalls. */
16996 if (regno < 4 || TARGET_64BIT)
16998 if (!TARGET_PARTIAL_REG_STALL)
17000 return reload_in_progress || reload_completed;
17002 /* We handle both integer and floats in the general purpose registers. */
17003 else if (VALID_INT_MODE_P (mode))
17005 else if (VALID_FP_MODE_P (mode))
17007 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
17008 on to use that value in smaller contexts, this can easily force a
17009 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
17010 supporting DImode, allow it. */
17011 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17017 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17018 tieable integer mode. */
17021 ix86_tieable_integer_mode_p (enum machine_mode mode)
17030 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17033 return TARGET_64BIT;
17040 /* Return true if MODE1 is accessible in a register that can hold MODE2
17041 without copying. That is, all register classes that can hold MODE2
17042 can also hold MODE1. */
17045 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17047 if (mode1 == mode2)
17050 if (ix86_tieable_integer_mode_p (mode1)
17051 && ix86_tieable_integer_mode_p (mode2))
17054 /* MODE2 being XFmode implies fp stack or general regs, which means we
17055 can tie any smaller floating point modes to it. Note that we do not
17056 tie this with TFmode. */
17057 if (mode2 == XFmode)
17058 return mode1 == SFmode || mode1 == DFmode;
17060 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17061 that we can tie it with SFmode. */
17062 if (mode2 == DFmode)
17063 return mode1 == SFmode;
17065 /* If MODE2 is only appropriate for an SSE register, then tie with
17066 any other mode acceptable to SSE registers. */
17067 if (GET_MODE_SIZE (mode2) >= 8
17068 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17069 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17071 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17072 with any other mode acceptable to MMX registers. */
17073 if (GET_MODE_SIZE (mode2) == 8
17074 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17075 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17080 /* Return the cost of moving data of mode M between a
17081 register and memory. A value of 2 is the default; this cost is
17082 relative to those in `REGISTER_MOVE_COST'.
17084 If moving between registers and memory is more expensive than
17085 between two registers, you should define this macro to express the
17088 Model also increased moving costs of QImode registers in non
17092 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17094 if (FLOAT_CLASS_P (class))
17111 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17113 if (SSE_CLASS_P (class))
17116 switch (GET_MODE_SIZE (mode))
17130 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17132 if (MMX_CLASS_P (class))
17135 switch (GET_MODE_SIZE (mode))
17146 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17148 switch (GET_MODE_SIZE (mode))
17152 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17153 : ix86_cost->movzbl_load);
17155 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17156 : ix86_cost->int_store[0] + 4);
17159 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17161 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17162 if (mode == TFmode)
17164 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17165 * (((int) GET_MODE_SIZE (mode)
17166 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17170 /* Compute a (partial) cost for rtx X. Return true if the complete
17171 cost has been computed, and false if subexpressions should be
17172 scanned. In either case, *TOTAL contains the cost result. */
17175 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17177 enum machine_mode mode = GET_MODE (x);
17185 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17187 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17189 else if (flag_pic && SYMBOLIC_CONST (x)
17191 || (!GET_CODE (x) != LABEL_REF
17192 && (GET_CODE (x) != SYMBOL_REF
17193 || !SYMBOL_REF_LOCAL_P (x)))))
17200 if (mode == VOIDmode)
17203 switch (standard_80387_constant_p (x))
17208 default: /* Other constants */
17213 /* Start with (MEM (SYMBOL_REF)), since that's where
17214 it'll probably end up. Add a penalty for size. */
17215 *total = (COSTS_N_INSNS (1)
17216 + (flag_pic != 0 && !TARGET_64BIT)
17217 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17223 /* The zero extensions is often completely free on x86_64, so make
17224 it as cheap as possible. */
17225 if (TARGET_64BIT && mode == DImode
17226 && GET_MODE (XEXP (x, 0)) == SImode)
17228 else if (TARGET_ZERO_EXTEND_WITH_AND)
17229 *total = ix86_cost->add;
17231 *total = ix86_cost->movzx;
17235 *total = ix86_cost->movsx;
17239 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17240 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17242 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17245 *total = ix86_cost->add;
17248 if ((value == 2 || value == 3)
17249 && ix86_cost->lea <= ix86_cost->shift_const)
17251 *total = ix86_cost->lea;
17261 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17263 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17265 if (INTVAL (XEXP (x, 1)) > 32)
17266 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17268 *total = ix86_cost->shift_const * 2;
17272 if (GET_CODE (XEXP (x, 1)) == AND)
17273 *total = ix86_cost->shift_var * 2;
17275 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17280 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17281 *total = ix86_cost->shift_const;
17283 *total = ix86_cost->shift_var;
17288 if (FLOAT_MODE_P (mode))
17290 *total = ix86_cost->fmul;
17295 rtx op0 = XEXP (x, 0);
17296 rtx op1 = XEXP (x, 1);
17298 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17300 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17301 for (nbits = 0; value != 0; value &= value - 1)
17305 /* This is arbitrary. */
17308 /* Compute costs correctly for widening multiplication. */
17309 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17310 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17311 == GET_MODE_SIZE (mode))
17313 int is_mulwiden = 0;
17314 enum machine_mode inner_mode = GET_MODE (op0);
17316 if (GET_CODE (op0) == GET_CODE (op1))
17317 is_mulwiden = 1, op1 = XEXP (op1, 0);
17318 else if (GET_CODE (op1) == CONST_INT)
17320 if (GET_CODE (op0) == SIGN_EXTEND)
17321 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17324 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17328 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17331 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17332 + nbits * ix86_cost->mult_bit
17333 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17342 if (FLOAT_MODE_P (mode))
17343 *total = ix86_cost->fdiv;
17345 *total = ix86_cost->divide[MODE_INDEX (mode)];
17349 if (FLOAT_MODE_P (mode))
17350 *total = ix86_cost->fadd;
17351 else if (GET_MODE_CLASS (mode) == MODE_INT
17352 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17354 if (GET_CODE (XEXP (x, 0)) == PLUS
17355 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17356 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17357 && CONSTANT_P (XEXP (x, 1)))
17359 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17360 if (val == 2 || val == 4 || val == 8)
17362 *total = ix86_cost->lea;
17363 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17364 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17366 *total += rtx_cost (XEXP (x, 1), outer_code);
17370 else if (GET_CODE (XEXP (x, 0)) == MULT
17371 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17373 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17374 if (val == 2 || val == 4 || val == 8)
17376 *total = ix86_cost->lea;
17377 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17378 *total += rtx_cost (XEXP (x, 1), outer_code);
17382 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17384 *total = ix86_cost->lea;
17385 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17386 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17387 *total += rtx_cost (XEXP (x, 1), outer_code);
17394 if (FLOAT_MODE_P (mode))
17396 *total = ix86_cost->fadd;
17404 if (!TARGET_64BIT && mode == DImode)
17406 *total = (ix86_cost->add * 2
17407 + (rtx_cost (XEXP (x, 0), outer_code)
17408 << (GET_MODE (XEXP (x, 0)) != DImode))
17409 + (rtx_cost (XEXP (x, 1), outer_code)
17410 << (GET_MODE (XEXP (x, 1)) != DImode)));
17416 if (FLOAT_MODE_P (mode))
17418 *total = ix86_cost->fchs;
17424 if (!TARGET_64BIT && mode == DImode)
17425 *total = ix86_cost->add * 2;
17427 *total = ix86_cost->add;
17431 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17432 && XEXP (XEXP (x, 0), 1) == const1_rtx
17433 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17434 && XEXP (x, 1) == const0_rtx)
17436 /* This kind of construct is implemented using test[bwl].
17437 Treat it as if we had an AND. */
17438 *total = (ix86_cost->add
17439 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17440 + rtx_cost (const1_rtx, outer_code));
17446 if (!TARGET_SSE_MATH
17448 || (mode == DFmode && !TARGET_SSE2))
17449 /* For standard 80387 constants, raise the cost to prevent
17450 compress_float_constant() to generate load from memory. */
17451 switch (standard_80387_constant_p (XEXP (x, 0)))
17461 *total = (x86_ext_80387_constants & TUNEMASK
17468 if (FLOAT_MODE_P (mode))
17469 *total = ix86_cost->fabs;
17473 if (FLOAT_MODE_P (mode))
17474 *total = ix86_cost->fsqrt;
17478 if (XINT (x, 1) == UNSPEC_TP)
17489 static int current_machopic_label_num;
17491 /* Given a symbol name and its associated stub, write out the
17492 definition of the stub. */
17495 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17497 unsigned int length;
17498 char *binder_name, *symbol_name, lazy_ptr_name[32];
17499 int label = ++current_machopic_label_num;
17501 /* For 64-bit we shouldn't get here. */
17502 gcc_assert (!TARGET_64BIT);
17504 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17505 symb = (*targetm.strip_name_encoding) (symb);
17507 length = strlen (stub);
17508 binder_name = alloca (length + 32);
17509 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17511 length = strlen (symb);
17512 symbol_name = alloca (length + 32);
17513 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17515 sprintf (lazy_ptr_name, "L%d$lz", label);
17518 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17520 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17522 fprintf (file, "%s:\n", stub);
17523 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17527 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17528 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17529 fprintf (file, "\tjmp\t*%%edx\n");
17532 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17534 fprintf (file, "%s:\n", binder_name);
17538 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17539 fprintf (file, "\tpushl\t%%eax\n");
17542 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17544 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17546 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17547 fprintf (file, "%s:\n", lazy_ptr_name);
17548 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17549 fprintf (file, "\t.long %s\n", binder_name);
17553 darwin_x86_file_end (void)
17555 darwin_file_end ();
17558 #endif /* TARGET_MACHO */
17560 /* Order the registers for register allocator. */
17563 x86_order_regs_for_local_alloc (void)
17568 /* First allocate the local general purpose registers. */
17569 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17570 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17571 reg_alloc_order [pos++] = i;
17573 /* Global general purpose registers. */
17574 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17575 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17576 reg_alloc_order [pos++] = i;
17578 /* x87 registers come first in case we are doing FP math
17580 if (!TARGET_SSE_MATH)
17581 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17582 reg_alloc_order [pos++] = i;
17584 /* SSE registers. */
17585 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17586 reg_alloc_order [pos++] = i;
17587 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17588 reg_alloc_order [pos++] = i;
17590 /* x87 registers. */
17591 if (TARGET_SSE_MATH)
17592 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17593 reg_alloc_order [pos++] = i;
17595 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17596 reg_alloc_order [pos++] = i;
17598 /* Initialize the rest of array as we do not allocate some registers
17600 while (pos < FIRST_PSEUDO_REGISTER)
17601 reg_alloc_order [pos++] = 0;
17604 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17605 struct attribute_spec.handler. */
17607 ix86_handle_struct_attribute (tree *node, tree name,
17608 tree args ATTRIBUTE_UNUSED,
17609 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17612 if (DECL_P (*node))
17614 if (TREE_CODE (*node) == TYPE_DECL)
17615 type = &TREE_TYPE (*node);
17620 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17621 || TREE_CODE (*type) == UNION_TYPE)))
17623 warning (OPT_Wattributes, "%qs attribute ignored",
17624 IDENTIFIER_POINTER (name));
17625 *no_add_attrs = true;
17628 else if ((is_attribute_p ("ms_struct", name)
17629 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17630 || ((is_attribute_p ("gcc_struct", name)
17631 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17633 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17634 IDENTIFIER_POINTER (name));
17635 *no_add_attrs = true;
17642 ix86_ms_bitfield_layout_p (tree record_type)
17644 return (TARGET_MS_BITFIELD_LAYOUT &&
17645 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17646 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17649 /* Returns an expression indicating where the this parameter is
17650 located on entry to the FUNCTION. */
17653 x86_this_parameter (tree function)
17655 tree type = TREE_TYPE (function);
17659 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17660 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17663 if (ix86_function_regparm (type, function) > 0)
17667 parm = TYPE_ARG_TYPES (type);
17668 /* Figure out whether or not the function has a variable number of
17670 for (; parm; parm = TREE_CHAIN (parm))
17671 if (TREE_VALUE (parm) == void_type_node)
17673 /* If not, the this parameter is in the first argument. */
17677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17679 return gen_rtx_REG (SImode, regno);
17683 if (aggregate_value_p (TREE_TYPE (type), type))
17684 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17686 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17689 /* Determine whether x86_output_mi_thunk can succeed. */
17692 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17693 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17694 HOST_WIDE_INT vcall_offset, tree function)
17696 /* 64-bit can handle anything. */
17700 /* For 32-bit, everything's fine if we have one free register. */
17701 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17704 /* Need a free register for vcall_offset. */
17708 /* Need a free register for GOT references. */
17709 if (flag_pic && !(*targetm.binds_local_p) (function))
17712 /* Otherwise ok. */
17716 /* Output the assembler code for a thunk function. THUNK_DECL is the
17717 declaration for the thunk function itself, FUNCTION is the decl for
17718 the target function. DELTA is an immediate constant offset to be
17719 added to THIS. If VCALL_OFFSET is nonzero, the word at
17720 *(*this + vcall_offset) should be added to THIS. */
17723 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17724 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17725 HOST_WIDE_INT vcall_offset, tree function)
17728 rtx this = x86_this_parameter (function);
17731 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17732 pull it in now and let DELTA benefit. */
17735 else if (vcall_offset)
17737 /* Put the this parameter into %eax. */
17739 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17740 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17743 this_reg = NULL_RTX;
17745 /* Adjust the this parameter by a fixed constant. */
17748 xops[0] = GEN_INT (delta);
17749 xops[1] = this_reg ? this_reg : this;
17752 if (!x86_64_general_operand (xops[0], DImode))
17754 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17756 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17760 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17763 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17766 /* Adjust the this parameter by a value stored in the vtable. */
17770 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17773 int tmp_regno = 2 /* ECX */;
17774 if (lookup_attribute ("fastcall",
17775 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17776 tmp_regno = 0 /* EAX */;
17777 tmp = gen_rtx_REG (SImode, tmp_regno);
17780 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17783 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17785 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17787 /* Adjust the this parameter. */
17788 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17789 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17791 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17792 xops[0] = GEN_INT (vcall_offset);
17794 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17795 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17797 xops[1] = this_reg;
17799 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17801 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17804 /* If necessary, drop THIS back to its stack slot. */
17805 if (this_reg && this_reg != this)
17807 xops[0] = this_reg;
17809 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17812 xops[0] = XEXP (DECL_RTL (function), 0);
17815 if (!flag_pic || (*targetm.binds_local_p) (function))
17816 output_asm_insn ("jmp\t%P0", xops);
17819 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17820 tmp = gen_rtx_CONST (Pmode, tmp);
17821 tmp = gen_rtx_MEM (QImode, tmp);
17823 output_asm_insn ("jmp\t%A0", xops);
17828 if (!flag_pic || (*targetm.binds_local_p) (function))
17829 output_asm_insn ("jmp\t%P0", xops);
17834 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17835 tmp = (gen_rtx_SYMBOL_REF
17837 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17838 tmp = gen_rtx_MEM (QImode, tmp);
17840 output_asm_insn ("jmp\t%0", xops);
17843 #endif /* TARGET_MACHO */
17845 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17846 output_set_got (tmp, NULL_RTX);
17849 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17850 output_asm_insn ("jmp\t{*}%1", xops);
17856 x86_file_start (void)
17858 default_file_start ();
17860 darwin_file_start ();
17862 if (X86_FILE_START_VERSION_DIRECTIVE)
17863 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17864 if (X86_FILE_START_FLTUSED)
17865 fputs ("\t.global\t__fltused\n", asm_out_file);
17866 if (ix86_asm_dialect == ASM_INTEL)
17867 fputs ("\t.intel_syntax\n", asm_out_file);
17871 x86_field_alignment (tree field, int computed)
17873 enum machine_mode mode;
17874 tree type = TREE_TYPE (field);
17876 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17878 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17879 ? get_inner_array_type (type) : type);
17880 if (mode == DFmode || mode == DCmode
17881 || GET_MODE_CLASS (mode) == MODE_INT
17882 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17883 return MIN (32, computed);
17887 /* Output assembler code to FILE to increment profiler label # LABELNO
17888 for profiling a function entry. */
17890 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17895 #ifndef NO_PROFILE_COUNTERS
17896 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17898 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17902 #ifndef NO_PROFILE_COUNTERS
17903 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17905 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17909 #ifndef NO_PROFILE_COUNTERS
17910 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17911 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17913 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17917 #ifndef NO_PROFILE_COUNTERS
17918 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17919 PROFILE_COUNT_REGISTER);
17921 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17925 /* We don't have exact information about the insn sizes, but we may assume
17926 quite safely that we are informed about all 1 byte insns and memory
17927 address sizes. This is enough to eliminate unnecessary padding in
17931 min_insn_size (rtx insn)
17935 if (!INSN_P (insn) || !active_insn_p (insn))
17938 /* Discard alignments we've emit and jump instructions. */
17939 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17940 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17942 if (GET_CODE (insn) == JUMP_INSN
17943 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17944 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17947 /* Important case - calls are always 5 bytes.
17948 It is common to have many calls in the row. */
17949 if (GET_CODE (insn) == CALL_INSN
17950 && symbolic_reference_mentioned_p (PATTERN (insn))
17951 && !SIBLING_CALL_P (insn))
17953 if (get_attr_length (insn) <= 1)
17956 /* For normal instructions we may rely on the sizes of addresses
17957 and the presence of symbol to require 4 bytes of encoding.
17958 This is not the case for jumps where references are PC relative. */
17959 if (GET_CODE (insn) != JUMP_INSN)
17961 l = get_attr_length_address (insn);
17962 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17971 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17975 ix86_avoid_jump_misspredicts (void)
17977 rtx insn, start = get_insns ();
17978 int nbytes = 0, njumps = 0;
17981 /* Look for all minimal intervals of instructions containing 4 jumps.
17982 The intervals are bounded by START and INSN. NBYTES is the total
17983 size of instructions in the interval including INSN and not including
17984 START. When the NBYTES is smaller than 16 bytes, it is possible
17985 that the end of START and INSN ends up in the same 16byte page.
17987 The smallest offset in the page INSN can start is the case where START
17988 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17989 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17991 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17994 nbytes += min_insn_size (insn);
17996 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17997 INSN_UID (insn), min_insn_size (insn));
17998 if ((GET_CODE (insn) == JUMP_INSN
17999 && GET_CODE (PATTERN (insn)) != ADDR_VEC
18000 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
18001 || GET_CODE (insn) == CALL_INSN)
18008 start = NEXT_INSN (start);
18009 if ((GET_CODE (start) == JUMP_INSN
18010 && GET_CODE (PATTERN (start)) != ADDR_VEC
18011 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18012 || GET_CODE (start) == CALL_INSN)
18013 njumps--, isjump = 1;
18016 nbytes -= min_insn_size (start);
18018 gcc_assert (njumps >= 0);
18020 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18021 INSN_UID (start), INSN_UID (insn), nbytes);
18023 if (njumps == 3 && isjump && nbytes < 16)
18025 int padsize = 15 - nbytes + min_insn_size (insn);
18028 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18029 INSN_UID (insn), padsize);
18030 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18035 /* AMD Athlon works faster
18036 when RET is not destination of conditional jump or directly preceded
18037 by other jump instruction. We avoid the penalty by inserting NOP just
18038 before the RET instructions in such cases. */
18040 ix86_pad_returns (void)
18045 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18047 basic_block bb = e->src;
18048 rtx ret = BB_END (bb);
18050 bool replace = false;
18052 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18053 || !maybe_hot_bb_p (bb))
18055 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18056 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18058 if (prev && GET_CODE (prev) == CODE_LABEL)
18063 FOR_EACH_EDGE (e, ei, bb->preds)
18064 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18065 && !(e->flags & EDGE_FALLTHRU))
18070 prev = prev_active_insn (ret);
18072 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18073 || GET_CODE (prev) == CALL_INSN))
18075 /* Empty functions get branch mispredict even when the jump destination
18076 is not visible to us. */
18077 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18082 emit_insn_before (gen_return_internal_long (), ret);
18088 /* Implement machine specific optimizations. We implement padding of returns
18089 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18093 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18094 ix86_pad_returns ();
18095 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18096 ix86_avoid_jump_misspredicts ();
18099 /* Return nonzero when QImode register that must be represented via REX prefix
18102 x86_extended_QIreg_mentioned_p (rtx insn)
18105 extract_insn_cached (insn);
18106 for (i = 0; i < recog_data.n_operands; i++)
18107 if (REG_P (recog_data.operand[i])
18108 && REGNO (recog_data.operand[i]) >= 4)
18113 /* Return nonzero when P points to register encoded via REX prefix.
18114 Called via for_each_rtx. */
18116 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18118 unsigned int regno;
18121 regno = REGNO (*p);
18122 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18125 /* Return true when INSN mentions register that must be encoded using REX
18128 x86_extended_reg_mentioned_p (rtx insn)
18130 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18133 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18134 optabs would emit if we didn't have TFmode patterns. */
18137 x86_emit_floatuns (rtx operands[2])
18139 rtx neglab, donelab, i0, i1, f0, in, out;
18140 enum machine_mode mode, inmode;
18142 inmode = GET_MODE (operands[1]);
18143 gcc_assert (inmode == SImode || inmode == DImode);
18146 in = force_reg (inmode, operands[1]);
18147 mode = GET_MODE (out);
18148 neglab = gen_label_rtx ();
18149 donelab = gen_label_rtx ();
18150 i1 = gen_reg_rtx (Pmode);
18151 f0 = gen_reg_rtx (mode);
18153 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18155 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18156 emit_jump_insn (gen_jump (donelab));
18159 emit_label (neglab);
18161 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18162 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18163 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18164 expand_float (f0, i0, 0);
18165 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18167 emit_label (donelab);
18170 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18171 with all elements equal to VAR. Return true if successful. */
18174 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18175 rtx target, rtx val)
18177 enum machine_mode smode, wsmode, wvmode;
18192 val = force_reg (GET_MODE_INNER (mode), val);
18193 x = gen_rtx_VEC_DUPLICATE (mode, val);
18194 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18200 if (TARGET_SSE || TARGET_3DNOW_A)
18202 val = gen_lowpart (SImode, val);
18203 x = gen_rtx_TRUNCATE (HImode, val);
18204 x = gen_rtx_VEC_DUPLICATE (mode, x);
18205 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18227 /* Extend HImode to SImode using a paradoxical SUBREG. */
18228 tmp1 = gen_reg_rtx (SImode);
18229 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18230 /* Insert the SImode value as low element of V4SImode vector. */
18231 tmp2 = gen_reg_rtx (V4SImode);
18232 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18233 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18234 CONST0_RTX (V4SImode),
18236 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18237 /* Cast the V4SImode vector back to a V8HImode vector. */
18238 tmp1 = gen_reg_rtx (V8HImode);
18239 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18240 /* Duplicate the low short through the whole low SImode word. */
18241 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18242 /* Cast the V8HImode vector back to a V4SImode vector. */
18243 tmp2 = gen_reg_rtx (V4SImode);
18244 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18245 /* Replicate the low element of the V4SImode vector. */
18246 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18247 /* Cast the V2SImode back to V8HImode, and store in target. */
18248 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18259 /* Extend QImode to SImode using a paradoxical SUBREG. */
18260 tmp1 = gen_reg_rtx (SImode);
18261 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18262 /* Insert the SImode value as low element of V4SImode vector. */
18263 tmp2 = gen_reg_rtx (V4SImode);
18264 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18265 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18266 CONST0_RTX (V4SImode),
18268 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18269 /* Cast the V4SImode vector back to a V16QImode vector. */
18270 tmp1 = gen_reg_rtx (V16QImode);
18271 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18272 /* Duplicate the low byte through the whole low SImode word. */
18273 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18274 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18275 /* Cast the V16QImode vector back to a V4SImode vector. */
18276 tmp2 = gen_reg_rtx (V4SImode);
18277 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18278 /* Replicate the low element of the V4SImode vector. */
18279 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18280 /* Cast the V2SImode back to V16QImode, and store in target. */
18281 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18289 /* Replicate the value once into the next wider mode and recurse. */
18290 val = convert_modes (wsmode, smode, val, true);
18291 x = expand_simple_binop (wsmode, ASHIFT, val,
18292 GEN_INT (GET_MODE_BITSIZE (smode)),
18293 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18294 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18296 x = gen_reg_rtx (wvmode);
18297 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18298 gcc_unreachable ();
18299 emit_move_insn (target, gen_lowpart (mode, x));
18307 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18308 whose ONE_VAR element is VAR, and other elements are zero. Return true
18312 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18313 rtx target, rtx var, int one_var)
18315 enum machine_mode vsimode;
18331 var = force_reg (GET_MODE_INNER (mode), var);
18332 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18333 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18338 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18339 new_target = gen_reg_rtx (mode);
18341 new_target = target;
18342 var = force_reg (GET_MODE_INNER (mode), var);
18343 x = gen_rtx_VEC_DUPLICATE (mode, var);
18344 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18345 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18348 /* We need to shuffle the value to the correct position, so
18349 create a new pseudo to store the intermediate result. */
18351 /* With SSE2, we can use the integer shuffle insns. */
18352 if (mode != V4SFmode && TARGET_SSE2)
18354 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18356 GEN_INT (one_var == 1 ? 0 : 1),
18357 GEN_INT (one_var == 2 ? 0 : 1),
18358 GEN_INT (one_var == 3 ? 0 : 1)));
18359 if (target != new_target)
18360 emit_move_insn (target, new_target);
18364 /* Otherwise convert the intermediate result to V4SFmode and
18365 use the SSE1 shuffle instructions. */
18366 if (mode != V4SFmode)
18368 tmp = gen_reg_rtx (V4SFmode);
18369 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18374 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18376 GEN_INT (one_var == 1 ? 0 : 1),
18377 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18378 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18380 if (mode != V4SFmode)
18381 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18382 else if (tmp != target)
18383 emit_move_insn (target, tmp);
18385 else if (target != new_target)
18386 emit_move_insn (target, new_target);
18391 vsimode = V4SImode;
18397 vsimode = V2SImode;
18403 /* Zero extend the variable element to SImode and recurse. */
18404 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18406 x = gen_reg_rtx (vsimode);
18407 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18409 gcc_unreachable ();
18411 emit_move_insn (target, gen_lowpart (mode, x));
18419 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18420 consisting of the values in VALS. It is known that all elements
18421 except ONE_VAR are constants. Return true if successful. */
18424 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18425 rtx target, rtx vals, int one_var)
18427 rtx var = XVECEXP (vals, 0, one_var);
18428 enum machine_mode wmode;
18431 const_vec = copy_rtx (vals);
18432 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18433 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18441 /* For the two element vectors, it's just as easy to use
18442 the general case. */
18458 /* There's no way to set one QImode entry easily. Combine
18459 the variable value with its adjacent constant value, and
18460 promote to an HImode set. */
18461 x = XVECEXP (vals, 0, one_var ^ 1);
18464 var = convert_modes (HImode, QImode, var, true);
18465 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18466 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18467 x = GEN_INT (INTVAL (x) & 0xff);
18471 var = convert_modes (HImode, QImode, var, true);
18472 x = gen_int_mode (INTVAL (x) << 8, HImode);
18474 if (x != const0_rtx)
18475 var = expand_simple_binop (HImode, IOR, var, x, var,
18476 1, OPTAB_LIB_WIDEN);
18478 x = gen_reg_rtx (wmode);
18479 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18480 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18482 emit_move_insn (target, gen_lowpart (mode, x));
18489 emit_move_insn (target, const_vec);
18490 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18494 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18495 all values variable, and none identical. */
18498 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18499 rtx target, rtx vals)
18501 enum machine_mode half_mode = GET_MODE_INNER (mode);
18502 rtx op0 = NULL, op1 = NULL;
18503 bool use_vec_concat = false;
18509 if (!mmx_ok && !TARGET_SSE)
18515 /* For the two element vectors, we always implement VEC_CONCAT. */
18516 op0 = XVECEXP (vals, 0, 0);
18517 op1 = XVECEXP (vals, 0, 1);
18518 use_vec_concat = true;
18522 half_mode = V2SFmode;
18525 half_mode = V2SImode;
18531 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18532 Recurse to load the two halves. */
18534 op0 = gen_reg_rtx (half_mode);
18535 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18536 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18538 op1 = gen_reg_rtx (half_mode);
18539 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18540 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18542 use_vec_concat = true;
18553 gcc_unreachable ();
18556 if (use_vec_concat)
18558 if (!register_operand (op0, half_mode))
18559 op0 = force_reg (half_mode, op0);
18560 if (!register_operand (op1, half_mode))
18561 op1 = force_reg (half_mode, op1);
18563 emit_insn (gen_rtx_SET (VOIDmode, target,
18564 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18568 int i, j, n_elts, n_words, n_elt_per_word;
18569 enum machine_mode inner_mode;
18570 rtx words[4], shift;
18572 inner_mode = GET_MODE_INNER (mode);
18573 n_elts = GET_MODE_NUNITS (mode);
18574 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18575 n_elt_per_word = n_elts / n_words;
18576 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18578 for (i = 0; i < n_words; ++i)
18580 rtx word = NULL_RTX;
18582 for (j = 0; j < n_elt_per_word; ++j)
18584 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18585 elt = convert_modes (word_mode, inner_mode, elt, true);
18591 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18592 word, 1, OPTAB_LIB_WIDEN);
18593 word = expand_simple_binop (word_mode, IOR, word, elt,
18594 word, 1, OPTAB_LIB_WIDEN);
18602 emit_move_insn (target, gen_lowpart (mode, words[0]));
18603 else if (n_words == 2)
18605 rtx tmp = gen_reg_rtx (mode);
18606 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18607 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18608 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18609 emit_move_insn (target, tmp);
18611 else if (n_words == 4)
18613 rtx tmp = gen_reg_rtx (V4SImode);
18614 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18615 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18616 emit_move_insn (target, gen_lowpart (mode, tmp));
18619 gcc_unreachable ();
18623 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18624 instructions unless MMX_OK is true. */
18627 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18629 enum machine_mode mode = GET_MODE (target);
18630 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18631 int n_elts = GET_MODE_NUNITS (mode);
18632 int n_var = 0, one_var = -1;
18633 bool all_same = true, all_const_zero = true;
18637 for (i = 0; i < n_elts; ++i)
18639 x = XVECEXP (vals, 0, i);
18640 if (!CONSTANT_P (x))
18641 n_var++, one_var = i;
18642 else if (x != CONST0_RTX (inner_mode))
18643 all_const_zero = false;
18644 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18648 /* Constants are best loaded from the constant pool. */
18651 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18655 /* If all values are identical, broadcast the value. */
18657 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18658 XVECEXP (vals, 0, 0)))
18661 /* Values where only one field is non-constant are best loaded from
18662 the pool and overwritten via move later. */
18666 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18667 XVECEXP (vals, 0, one_var),
18671 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18675 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18679 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18681 enum machine_mode mode = GET_MODE (target);
18682 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18683 bool use_vec_merge = false;
18692 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18693 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18695 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18697 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18698 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18708 /* For the two element vectors, we implement a VEC_CONCAT with
18709 the extraction of the other element. */
18711 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18712 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18715 op0 = val, op1 = tmp;
18717 op0 = tmp, op1 = val;
18719 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18720 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18728 use_vec_merge = true;
18732 /* tmp = target = A B C D */
18733 tmp = copy_to_reg (target);
18734 /* target = A A B B */
18735 emit_insn (gen_sse_unpcklps (target, target, target));
18736 /* target = X A B B */
18737 ix86_expand_vector_set (false, target, val, 0);
18738 /* target = A X C D */
18739 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18740 GEN_INT (1), GEN_INT (0),
18741 GEN_INT (2+4), GEN_INT (3+4)));
18745 /* tmp = target = A B C D */
18746 tmp = copy_to_reg (target);
18747 /* tmp = X B C D */
18748 ix86_expand_vector_set (false, tmp, val, 0);
18749 /* target = A B X D */
18750 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18751 GEN_INT (0), GEN_INT (1),
18752 GEN_INT (0+4), GEN_INT (3+4)));
18756 /* tmp = target = A B C D */
18757 tmp = copy_to_reg (target);
18758 /* tmp = X B C D */
18759 ix86_expand_vector_set (false, tmp, val, 0);
18760 /* target = A B X D */
18761 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18762 GEN_INT (0), GEN_INT (1),
18763 GEN_INT (2+4), GEN_INT (0+4)));
18767 gcc_unreachable ();
18772 /* Element 0 handled by vec_merge below. */
18775 use_vec_merge = true;
18781 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18782 store into element 0, then shuffle them back. */
18786 order[0] = GEN_INT (elt);
18787 order[1] = const1_rtx;
18788 order[2] = const2_rtx;
18789 order[3] = GEN_INT (3);
18790 order[elt] = const0_rtx;
18792 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18793 order[1], order[2], order[3]));
18795 ix86_expand_vector_set (false, target, val, 0);
18797 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18798 order[1], order[2], order[3]));
18802 /* For SSE1, we have to reuse the V4SF code. */
18803 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18804 gen_lowpart (SFmode, val), elt);
18809 use_vec_merge = TARGET_SSE2;
18812 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18823 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18824 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18825 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18829 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18831 emit_move_insn (mem, target);
18833 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18834 emit_move_insn (tmp, val);
18836 emit_move_insn (target, mem);
18841 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18843 enum machine_mode mode = GET_MODE (vec);
18844 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18845 bool use_vec_extr = false;
18858 use_vec_extr = true;
18870 tmp = gen_reg_rtx (mode);
18871 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18872 GEN_INT (elt), GEN_INT (elt),
18873 GEN_INT (elt+4), GEN_INT (elt+4)));
18877 tmp = gen_reg_rtx (mode);
18878 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18882 gcc_unreachable ();
18885 use_vec_extr = true;
18900 tmp = gen_reg_rtx (mode);
18901 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18902 GEN_INT (elt), GEN_INT (elt),
18903 GEN_INT (elt), GEN_INT (elt)));
18907 tmp = gen_reg_rtx (mode);
18908 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18912 gcc_unreachable ();
18915 use_vec_extr = true;
18920 /* For SSE1, we have to reuse the V4SF code. */
18921 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18922 gen_lowpart (V4SFmode, vec), elt);
18928 use_vec_extr = TARGET_SSE2;
18931 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18936 /* ??? Could extract the appropriate HImode element and shift. */
18943 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18944 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18946 /* Let the rtl optimizers know about the zero extension performed. */
18947 if (inner_mode == HImode)
18949 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18950 target = gen_lowpart (SImode, target);
18953 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18957 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18959 emit_move_insn (mem, vec);
18961 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18962 emit_move_insn (target, tmp);
18966 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18967 pattern to reduce; DEST is the destination; IN is the input vector. */
18970 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18972 rtx tmp1, tmp2, tmp3;
18974 tmp1 = gen_reg_rtx (V4SFmode);
18975 tmp2 = gen_reg_rtx (V4SFmode);
18976 tmp3 = gen_reg_rtx (V4SFmode);
18978 emit_insn (gen_sse_movhlps (tmp1, in, in));
18979 emit_insn (fn (tmp2, tmp1, in));
18981 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18982 GEN_INT (1), GEN_INT (1),
18983 GEN_INT (1+4), GEN_INT (1+4)));
18984 emit_insn (fn (dest, tmp2, tmp3));
18987 /* Target hook for scalar_mode_supported_p. */
18989 ix86_scalar_mode_supported_p (enum machine_mode mode)
18991 if (DECIMAL_FLOAT_MODE_P (mode))
18994 return default_scalar_mode_supported_p (mode);
18997 /* Implements target hook vector_mode_supported_p. */
18999 ix86_vector_mode_supported_p (enum machine_mode mode)
19001 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19003 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19005 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
19007 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19012 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19014 We do this in the new i386 backend to maintain source compatibility
19015 with the old cc0-based compiler. */
19018 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19019 tree inputs ATTRIBUTE_UNUSED,
19022 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19024 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19026 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19031 /* Return true if this goes in small data/bss. */
19034 ix86_in_large_data_p (tree exp)
19036 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19039 /* Functions are never large data. */
19040 if (TREE_CODE (exp) == FUNCTION_DECL)
19043 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19045 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19046 if (strcmp (section, ".ldata") == 0
19047 || strcmp (section, ".lbss") == 0)
19053 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19055 /* If this is an incomplete type with size 0, then we can't put it
19056 in data because it might be too big when completed. */
19057 if (!size || size > ix86_section_threshold)
19064 ix86_encode_section_info (tree decl, rtx rtl, int first)
19066 default_encode_section_info (decl, rtl, first);
19068 if (TREE_CODE (decl) == VAR_DECL
19069 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19070 && ix86_in_large_data_p (decl))
19071 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19074 /* Worker function for REVERSE_CONDITION. */
19077 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19079 return (mode != CCFPmode && mode != CCFPUmode
19080 ? reverse_condition (code)
19081 : reverse_condition_maybe_unordered (code));
19084 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19088 output_387_reg_move (rtx insn, rtx *operands)
19090 if (REG_P (operands[1])
19091 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19093 if (REGNO (operands[0]) == FIRST_STACK_REG)
19094 return output_387_ffreep (operands, 0);
19095 return "fstp\t%y0";
19097 if (STACK_TOP_P (operands[0]))
19098 return "fld%z1\t%y1";
19102 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19103 FP status register is set. */
19106 ix86_emit_fp_unordered_jump (rtx label)
19108 rtx reg = gen_reg_rtx (HImode);
19111 emit_insn (gen_x86_fnstsw_1 (reg));
19113 if (TARGET_USE_SAHF)
19115 emit_insn (gen_x86_sahf_1 (reg));
19117 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19118 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19122 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19124 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19125 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19128 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19129 gen_rtx_LABEL_REF (VOIDmode, label),
19131 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19132 emit_jump_insn (temp);
19135 /* Output code to perform a log1p XFmode calculation. */
19137 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19139 rtx label1 = gen_label_rtx ();
19140 rtx label2 = gen_label_rtx ();
19142 rtx tmp = gen_reg_rtx (XFmode);
19143 rtx tmp2 = gen_reg_rtx (XFmode);
19145 emit_insn (gen_absxf2 (tmp, op1));
19146 emit_insn (gen_cmpxf (tmp,
19147 CONST_DOUBLE_FROM_REAL_VALUE (
19148 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19150 emit_jump_insn (gen_bge (label1));
19152 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19153 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19154 emit_jump (label2);
19156 emit_label (label1);
19157 emit_move_insn (tmp, CONST1_RTX (XFmode));
19158 emit_insn (gen_addxf3 (tmp, op1, tmp));
19159 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19160 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19162 emit_label (label2);
19165 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19168 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19171 /* With Binutils 2.15, the "@unwind" marker must be specified on
19172 every occurrence of the ".eh_frame" section, not just the first
19175 && strcmp (name, ".eh_frame") == 0)
19177 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19178 flags & SECTION_WRITE ? "aw" : "a");
19181 default_elf_asm_named_section (name, flags, decl);
19184 /* Return the mangling of TYPE if it is an extended fundamental type. */
19186 static const char *
19187 ix86_mangle_fundamental_type (tree type)
19189 switch (TYPE_MODE (type))
19192 /* __float128 is "g". */
19195 /* "long double" or __float80 is "e". */
19202 /* For 32-bit code we can save PIC register setup by using
19203 __stack_chk_fail_local hidden function instead of calling
19204 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19205 register, so it is better to call __stack_chk_fail directly. */
19208 ix86_stack_protect_fail (void)
19210 return TARGET_64BIT
19211 ? default_external_stack_protect_fail ()
19212 : default_hidden_stack_protect_fail ();
19215 /* Select a format to encode pointers in exception handling data. CODE
19216 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19217 true if the symbol may be affected by dynamic relocations.
19219 ??? All x86 object file formats are capable of representing this.
19220 After all, the relocation needed is the same as for the call insn.
19221 Whether or not a particular assembler allows us to enter such, I
19222 guess we'll have to see. */
19224 asm_preferred_eh_data_format (int code, int global)
19228 int type = DW_EH_PE_sdata8;
19230 || ix86_cmodel == CM_SMALL_PIC
19231 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19232 type = DW_EH_PE_sdata4;
19233 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19235 if (ix86_cmodel == CM_SMALL
19236 || (ix86_cmodel == CM_MEDIUM && code))
19237 return DW_EH_PE_udata4;
19238 return DW_EH_PE_absptr;
19241 /* Expand copysign from SIGN to the positive value ABS_VALUE
19242 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19245 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19247 enum machine_mode mode = GET_MODE (sign);
19248 rtx sgn = gen_reg_rtx (mode);
19249 if (mask == NULL_RTX)
19251 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19252 if (!VECTOR_MODE_P (mode))
19254 /* We need to generate a scalar mode mask in this case. */
19255 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19256 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19257 mask = gen_reg_rtx (mode);
19258 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19262 mask = gen_rtx_NOT (mode, mask);
19263 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19264 gen_rtx_AND (mode, mask, sign)));
19265 emit_insn (gen_rtx_SET (VOIDmode, result,
19266 gen_rtx_IOR (mode, abs_value, sgn)));
19269 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19270 mask for masking out the sign-bit is stored in *SMASK, if that is
19273 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19275 enum machine_mode mode = GET_MODE (op0);
19278 xa = gen_reg_rtx (mode);
19279 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19280 if (!VECTOR_MODE_P (mode))
19282 /* We need to generate a scalar mode mask in this case. */
19283 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19284 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19285 mask = gen_reg_rtx (mode);
19286 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19288 emit_insn (gen_rtx_SET (VOIDmode, xa,
19289 gen_rtx_AND (mode, op0, mask)));
19297 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19298 swapping the operands if SWAP_OPERANDS is true. The expanded
19299 code is a forward jump to a newly created label in case the
19300 comparison is true. The generated label rtx is returned. */
19302 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19303 bool swap_operands)
19314 label = gen_label_rtx ();
19315 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19316 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19317 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19318 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19320 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19321 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19322 JUMP_LABEL (tmp) = label;
19327 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19328 using comparison code CODE. Operands are swapped for the comparison if
19329 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19331 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19332 bool swap_operands)
19334 enum machine_mode mode = GET_MODE (op0);
19335 rtx mask = gen_reg_rtx (mode);
19344 if (mode == DFmode)
19345 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19346 gen_rtx_fmt_ee (code, mode, op0, op1)));
19348 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19349 gen_rtx_fmt_ee (code, mode, op0, op1)));
19354 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19355 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19357 ix86_gen_TWO52 (enum machine_mode mode)
19359 REAL_VALUE_TYPE TWO52r;
19362 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19363 TWO52 = const_double_from_real_value (TWO52r, mode);
19364 TWO52 = force_reg (mode, TWO52);
19369 /* Expand SSE sequence for computing lround from OP1 storing
19372 ix86_expand_lround (rtx op0, rtx op1)
19374 /* C code for the stuff we're doing below:
19375 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19378 enum machine_mode mode = GET_MODE (op1);
19379 const struct real_format *fmt;
19380 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19383 /* load nextafter (0.5, 0.0) */
19384 fmt = REAL_MODE_FORMAT (mode);
19385 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19386 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19388 /* adj = copysign (0.5, op1) */
19389 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19390 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19392 /* adj = op1 + adj */
19393 expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
19395 /* op0 = (imode)adj */
19396 expand_fix (op0, adj, 0);
19399 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19402 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19404 /* C code for the stuff we're doing below (for do_floor):
19406 xi -= (double)xi > op1 ? 1 : 0;
19409 enum machine_mode fmode = GET_MODE (op1);
19410 enum machine_mode imode = GET_MODE (op0);
19411 rtx ireg, freg, label;
19413 /* reg = (long)op1 */
19414 ireg = gen_reg_rtx (imode);
19415 expand_fix (ireg, op1, 0);
19417 /* freg = (double)reg */
19418 freg = gen_reg_rtx (fmode);
19419 expand_float (freg, ireg, 0);
19421 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19422 label = ix86_expand_sse_compare_and_jump (UNLE,
19423 freg, op1, !do_floor);
19424 expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19425 ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
19426 emit_label (label);
19427 LABEL_NUSES (label) = 1;
19429 emit_move_insn (op0, ireg);
19432 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19433 result in OPERAND0. */
19435 ix86_expand_rint (rtx operand0, rtx operand1)
19437 /* C code for the stuff we're doing below:
19438 xa = fabs (operand1);
19439 if (!isless (xa, 2**52))
19441 xa = xa + 2**52 - 2**52;
19442 return copysign (xa, operand1);
19444 enum machine_mode mode = GET_MODE (operand0);
19445 rtx res, xa, label, TWO52, mask;
19447 res = gen_reg_rtx (mode);
19448 emit_move_insn (res, operand1);
19450 /* xa = abs (operand1) */
19451 xa = ix86_expand_sse_fabs (res, &mask);
19453 /* if (!isless (xa, TWO52)) goto label; */
19454 TWO52 = ix86_gen_TWO52 (mode);
19455 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19457 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19458 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19460 ix86_sse_copysign_to_positive (res, xa, res, mask);
19462 emit_label (label);
19463 LABEL_NUSES (label) = 1;
19465 emit_move_insn (operand0, res);
19468 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19471 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19473 /* C code for the stuff we expand below.
19474 double xa = fabs (x), x2;
19475 if (!isless (xa, TWO52))
19477 xa = xa + TWO52 - TWO52;
19478 x2 = copysign (xa, x);
19487 enum machine_mode mode = GET_MODE (operand0);
19488 rtx xa, TWO52, tmp, label, one, res, mask;
19490 TWO52 = ix86_gen_TWO52 (mode);
19492 /* Temporary for holding the result, initialized to the input
19493 operand to ease control flow. */
19494 res = gen_reg_rtx (mode);
19495 emit_move_insn (res, operand1);
19497 /* xa = abs (operand1) */
19498 xa = ix86_expand_sse_fabs (res, &mask);
19500 /* if (!isless (xa, TWO52)) goto label; */
19501 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19503 /* xa = xa + TWO52 - TWO52; */
19504 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19505 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19507 /* xa = copysign (xa, operand1) */
19508 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19510 /* generate 1.0 or -1.0 */
19511 one = force_reg (mode,
19512 const_double_from_real_value (do_floor
19513 ? dconst1 : dconstm1, mode));
19515 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19516 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19517 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19518 gen_rtx_AND (mode, one, tmp)));
19519 /* We always need to subtract here to preserve signed zero. */
19520 expand_simple_binop (mode, MINUS,
19521 xa, tmp, res, 0, OPTAB_DIRECT);
19523 emit_label (label);
19524 LABEL_NUSES (label) = 1;
19526 emit_move_insn (operand0, res);
19529 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19532 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19534 /* C code for the stuff we expand below.
19535 double xa = fabs (x), x2;
19536 if (!isless (xa, TWO52))
19538 x2 = (double)(long)x;
19545 if (HONOR_SIGNED_ZEROS (mode))
19546 return copysign (x2, x);
19549 enum machine_mode mode = GET_MODE (operand0);
19550 rtx xa, xi, TWO52, tmp, label, one, res, mask;
19552 TWO52 = ix86_gen_TWO52 (mode);
19554 /* Temporary for holding the result, initialized to the input
19555 operand to ease control flow. */
19556 res = gen_reg_rtx (mode);
19557 emit_move_insn (res, operand1);
19559 /* xa = abs (operand1) */
19560 xa = ix86_expand_sse_fabs (res, &mask);
19562 /* if (!isless (xa, TWO52)) goto label; */
19563 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19565 /* xa = (double)(long)x */
19566 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19567 expand_fix (xi, res, 0);
19568 expand_float (xa, xi, 0);
19571 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19573 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19574 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19575 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19576 gen_rtx_AND (mode, one, tmp)));
19577 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19578 xa, tmp, res, 0, OPTAB_DIRECT);
19580 if (HONOR_SIGNED_ZEROS (mode))
19581 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19583 emit_label (label);
19584 LABEL_NUSES (label) = 1;
19586 emit_move_insn (operand0, res);
19589 /* Expand SSE sequence for computing round from OPERAND1 storing
19590 into OPERAND0. Sequence that works without relying on DImode truncation
19591 via cvttsd2siq that is only available on 64bit targets. */
19593 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19595 /* C code for the stuff we expand below.
19596 double xa = fabs (x), xa2, x2;
19597 if (!isless (xa, TWO52))
19599 Using the absolute value and copying back sign makes
19600 -0.0 -> -0.0 correct.
19601 xa2 = xa + TWO52 - TWO52;
19606 else if (dxa > 0.5)
19608 x2 = copysign (xa2, x);
19611 enum machine_mode mode = GET_MODE (operand0);
19612 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19614 TWO52 = ix86_gen_TWO52 (mode);
19616 /* Temporary for holding the result, initialized to the input
19617 operand to ease control flow. */
19618 res = gen_reg_rtx (mode);
19619 emit_move_insn (res, operand1);
19621 /* xa = abs (operand1) */
19622 xa = ix86_expand_sse_fabs (res, &mask);
19624 /* if (!isless (xa, TWO52)) goto label; */
19625 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19627 /* xa2 = xa + TWO52 - TWO52; */
19628 xa2 = gen_reg_rtx (mode);
19629 expand_simple_binop (mode, PLUS, xa, TWO52, xa2, 0, OPTAB_DIRECT);
19630 expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19632 /* dxa = xa2 - xa; */
19633 dxa = gen_reg_rtx (mode);
19634 expand_simple_binop (mode, MINUS, xa2, xa, dxa, 0, OPTAB_DIRECT);
19636 /* generate 0.5, 1.0 and -0.5 */
19637 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19638 one = gen_reg_rtx (mode);
19639 expand_simple_binop (mode, PLUS, half, half, one, 0, OPTAB_DIRECT);
19640 mhalf = gen_reg_rtx (mode);
19641 expand_simple_binop (mode, MINUS, half, one, mhalf, 0, OPTAB_DIRECT);
19644 tmp = gen_reg_rtx (mode);
19645 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19646 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19647 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19648 gen_rtx_AND (mode, one, tmp)));
19649 expand_simple_binop (mode, MINUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19650 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19651 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19652 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19653 gen_rtx_AND (mode, one, tmp)));
19654 expand_simple_binop (mode, PLUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19656 /* res = copysign (xa2, operand1) */
19657 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19659 emit_label (label);
19660 LABEL_NUSES (label) = 1;
19662 emit_move_insn (operand0, res);
19665 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19668 ix86_expand_trunc (rtx operand0, rtx operand1)
19670 /* C code for SSE variant we expand below.
19671 double xa = fabs (x), x2;
19672 if (!isless (xa, TWO52))
19674 x2 = (double)(long)x;
19675 if (HONOR_SIGNED_ZEROS (mode))
19676 return copysign (x2, x);
19679 enum machine_mode mode = GET_MODE (operand0);
19680 rtx xa, xi, TWO52, label, res, mask;
19682 TWO52 = ix86_gen_TWO52 (mode);
19684 /* Temporary for holding the result, initialized to the input
19685 operand to ease control flow. */
19686 res = gen_reg_rtx (mode);
19687 emit_move_insn (res, operand1);
19689 /* xa = abs (operand1) */
19690 xa = ix86_expand_sse_fabs (res, &mask);
19692 /* if (!isless (xa, TWO52)) goto label; */
19693 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19695 /* x = (double)(long)x */
19696 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19697 expand_fix (xi, res, 0);
19698 expand_float (res, xi, 0);
19700 if (HONOR_SIGNED_ZEROS (mode))
19701 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
19703 emit_label (label);
19704 LABEL_NUSES (label) = 1;
19706 emit_move_insn (operand0, res);
19709 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19712 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19714 enum machine_mode mode = GET_MODE (operand0);
19715 rtx xa, mask, TWO52, label, one, res, smask;
19717 /* C code for SSE variant we expand below.
19718 double xa = fabs (x), x2;
19719 if (!isless (xa, TWO52))
19721 xa2 = xa + TWO52 - TWO52;
19725 x2 = copysign (xa2, x);
19729 TWO52 = ix86_gen_TWO52 (mode);
19731 /* Temporary for holding the result, initialized to the input
19732 operand to ease control flow. */
19733 res = gen_reg_rtx (mode);
19734 emit_move_insn (res, operand1);
19736 /* xa = abs (operand1) */
19737 xa = ix86_expand_sse_fabs (res, &smask);
19739 /* if (!isless (xa, TWO52)) goto label; */
19740 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19742 /* res = xa + TWO52 - TWO52; */
19743 expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
19744 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19747 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19749 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19750 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19751 emit_insn (gen_rtx_SET (VOIDmode, mask,
19752 gen_rtx_AND (mode, mask, one)));
19753 expand_simple_binop (mode, MINUS,
19754 res, mask, res, 0, OPTAB_DIRECT);
19756 /* res = copysign (res, operand1) */
19757 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19759 emit_label (label);
19760 LABEL_NUSES (label) = 1;
19762 emit_move_insn (operand0, res);
19765 /* Expand SSE sequence for computing round from OPERAND1 storing
19768 ix86_expand_round (rtx operand0, rtx operand1)
19770 /* C code for the stuff we're doing below:
19771 double xa = fabs (x);
19772 if (!isless (xa, TWO52))
19774 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19775 return copysign (xa, x);
19777 enum machine_mode mode = GET_MODE (operand0);
19778 rtx res, TWO52, xa, label, xi, half, mask;
19779 const struct real_format *fmt;
19780 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19782 /* Temporary for holding the result, initialized to the input
19783 operand to ease control flow. */
19784 res = gen_reg_rtx (mode);
19785 emit_move_insn (res, operand1);
19787 TWO52 = ix86_gen_TWO52 (mode);
19788 xa = ix86_expand_sse_fabs (res, &mask);
19789 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19791 /* load nextafter (0.5, 0.0) */
19792 fmt = REAL_MODE_FORMAT (mode);
19793 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19794 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19796 /* xa = xa + 0.5 */
19797 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
19798 expand_simple_binop (mode, PLUS, xa, half, xa, 0, OPTAB_DIRECT);
19800 /* xa = (double)(int64_t)xa */
19801 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19802 expand_fix (xi, xa, 0);
19803 expand_float (xa, xi, 0);
19805 /* res = copysign (xa, operand1) */
19806 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
19808 emit_label (label);
19809 LABEL_NUSES (label) = 1;
19811 emit_move_insn (operand0, res);
19814 #include "gt-i386.h"