1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs k6_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (2), /* cost of a lea instruction */
340 COSTS_N_INSNS (1), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (3), /* HI */
344 COSTS_N_INSNS (3), /* SI */
345 COSTS_N_INSNS (3), /* DI */
346 COSTS_N_INSNS (3)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (18), /* HI */
350 COSTS_N_INSNS (18), /* SI */
351 COSTS_N_INSNS (18), /* DI */
352 COSTS_N_INSNS (18)}, /* other */
353 COSTS_N_INSNS (2), /* cost of movsx */
354 COSTS_N_INSNS (2), /* cost of movzx */
355 8, /* "large" insn */
357 3, /* cost for loading QImode using movzbl */
358 {4, 5, 4}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {2, 3, 2}, /* cost of storing integer registers */
362 4, /* cost of reg,reg fld/fst */
363 {6, 6, 6}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 4, 4}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
367 2, /* cost of moving MMX register */
368 {2, 2}, /* cost of loading MMX registers
369 in SImode and DImode */
370 {2, 2}, /* cost of storing MMX registers
371 in SImode and DImode */
372 2, /* cost of moving SSE register */
373 {2, 2, 8}, /* cost of loading SSE registers
374 in SImode, DImode and TImode */
375 {2, 2, 8}, /* cost of storing SSE registers
376 in SImode, DImode and TImode */
377 6, /* MMX or SSE register to integer */
378 32, /* size of prefetch block */
379 1, /* number of parallel prefetches */
381 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
382 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
383 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
384 COSTS_N_INSNS (2), /* cost of FABS instruction. */
385 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
386 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
390 struct processor_costs athlon_cost = {
391 COSTS_N_INSNS (1), /* cost of an add instruction */
392 COSTS_N_INSNS (2), /* cost of a lea instruction */
393 COSTS_N_INSNS (1), /* variable shift costs */
394 COSTS_N_INSNS (1), /* constant shift costs */
395 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
396 COSTS_N_INSNS (5), /* HI */
397 COSTS_N_INSNS (5), /* SI */
398 COSTS_N_INSNS (5), /* DI */
399 COSTS_N_INSNS (5)}, /* other */
400 0, /* cost of multiply per each bit set */
401 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
402 COSTS_N_INSNS (26), /* HI */
403 COSTS_N_INSNS (42), /* SI */
404 COSTS_N_INSNS (74), /* DI */
405 COSTS_N_INSNS (74)}, /* other */
406 COSTS_N_INSNS (1), /* cost of movsx */
407 COSTS_N_INSNS (1), /* cost of movzx */
408 8, /* "large" insn */
410 4, /* cost for loading QImode using movzbl */
411 {3, 4, 3}, /* cost of loading integer registers
412 in QImode, HImode and SImode.
413 Relative to reg-reg move (2). */
414 {3, 4, 3}, /* cost of storing integer registers */
415 4, /* cost of reg,reg fld/fst */
416 {4, 4, 12}, /* cost of loading fp registers
417 in SFmode, DFmode and XFmode */
418 {6, 6, 8}, /* cost of storing fp registers
419 in SFmode, DFmode and XFmode */
420 2, /* cost of moving MMX register */
421 {4, 4}, /* cost of loading MMX registers
422 in SImode and DImode */
423 {4, 4}, /* cost of storing MMX registers
424 in SImode and DImode */
425 2, /* cost of moving SSE register */
426 {4, 4, 6}, /* cost of loading SSE registers
427 in SImode, DImode and TImode */
428 {4, 4, 5}, /* cost of storing SSE registers
429 in SImode, DImode and TImode */
430 5, /* MMX or SSE register to integer */
431 64, /* size of prefetch block */
432 6, /* number of parallel prefetches */
434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
436 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
443 struct processor_costs k8_cost = {
444 COSTS_N_INSNS (1), /* cost of an add instruction */
445 COSTS_N_INSNS (2), /* cost of a lea instruction */
446 COSTS_N_INSNS (1), /* variable shift costs */
447 COSTS_N_INSNS (1), /* constant shift costs */
448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
449 COSTS_N_INSNS (4), /* HI */
450 COSTS_N_INSNS (3), /* SI */
451 COSTS_N_INSNS (4), /* DI */
452 COSTS_N_INSNS (5)}, /* other */
453 0, /* cost of multiply per each bit set */
454 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
455 COSTS_N_INSNS (26), /* HI */
456 COSTS_N_INSNS (42), /* SI */
457 COSTS_N_INSNS (74), /* DI */
458 COSTS_N_INSNS (74)}, /* other */
459 COSTS_N_INSNS (1), /* cost of movsx */
460 COSTS_N_INSNS (1), /* cost of movzx */
461 8, /* "large" insn */
463 4, /* cost for loading QImode using movzbl */
464 {3, 4, 3}, /* cost of loading integer registers
465 in QImode, HImode and SImode.
466 Relative to reg-reg move (2). */
467 {3, 4, 3}, /* cost of storing integer registers */
468 4, /* cost of reg,reg fld/fst */
469 {4, 4, 12}, /* cost of loading fp registers
470 in SFmode, DFmode and XFmode */
471 {6, 6, 8}, /* cost of storing fp registers
472 in SFmode, DFmode and XFmode */
473 2, /* cost of moving MMX register */
474 {3, 3}, /* cost of loading MMX registers
475 in SImode and DImode */
476 {4, 4}, /* cost of storing MMX registers
477 in SImode and DImode */
478 2, /* cost of moving SSE register */
479 {4, 3, 6}, /* cost of loading SSE registers
480 in SImode, DImode and TImode */
481 {4, 4, 5}, /* cost of storing SSE registers
482 in SImode, DImode and TImode */
483 5, /* MMX or SSE register to integer */
484 64, /* size of prefetch block */
485 6, /* number of parallel prefetches */
487 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (2), /* cost of FABS instruction. */
491 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
496 struct processor_costs pentium4_cost = {
497 COSTS_N_INSNS (1), /* cost of an add instruction */
498 COSTS_N_INSNS (3), /* cost of a lea instruction */
499 COSTS_N_INSNS (4), /* variable shift costs */
500 COSTS_N_INSNS (4), /* constant shift costs */
501 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
502 COSTS_N_INSNS (15), /* HI */
503 COSTS_N_INSNS (15), /* SI */
504 COSTS_N_INSNS (15), /* DI */
505 COSTS_N_INSNS (15)}, /* other */
506 0, /* cost of multiply per each bit set */
507 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
508 COSTS_N_INSNS (56), /* HI */
509 COSTS_N_INSNS (56), /* SI */
510 COSTS_N_INSNS (56), /* DI */
511 COSTS_N_INSNS (56)}, /* other */
512 COSTS_N_INSNS (1), /* cost of movsx */
513 COSTS_N_INSNS (1), /* cost of movzx */
514 16, /* "large" insn */
516 2, /* cost for loading QImode using movzbl */
517 {4, 5, 4}, /* cost of loading integer registers
518 in QImode, HImode and SImode.
519 Relative to reg-reg move (2). */
520 {2, 3, 2}, /* cost of storing integer registers */
521 2, /* cost of reg,reg fld/fst */
522 {2, 2, 6}, /* cost of loading fp registers
523 in SFmode, DFmode and XFmode */
524 {4, 4, 6}, /* cost of storing fp registers
525 in SFmode, DFmode and XFmode */
526 2, /* cost of moving MMX register */
527 {2, 2}, /* cost of loading MMX registers
528 in SImode and DImode */
529 {2, 2}, /* cost of storing MMX registers
530 in SImode and DImode */
531 12, /* cost of moving SSE register */
532 {12, 12, 12}, /* cost of loading SSE registers
533 in SImode, DImode and TImode */
534 {2, 2, 8}, /* cost of storing SSE registers
535 in SImode, DImode and TImode */
536 10, /* MMX or SSE register to integer */
537 64, /* size of prefetch block */
538 6, /* number of parallel prefetches */
540 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (2), /* cost of FABS instruction. */
544 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
549 struct processor_costs nocona_cost = {
550 COSTS_N_INSNS (1), /* cost of an add instruction */
551 COSTS_N_INSNS (1), /* cost of a lea instruction */
552 COSTS_N_INSNS (1), /* variable shift costs */
553 COSTS_N_INSNS (1), /* constant shift costs */
554 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
555 COSTS_N_INSNS (10), /* HI */
556 COSTS_N_INSNS (10), /* SI */
557 COSTS_N_INSNS (10), /* DI */
558 COSTS_N_INSNS (10)}, /* other */
559 0, /* cost of multiply per each bit set */
560 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
561 COSTS_N_INSNS (66), /* HI */
562 COSTS_N_INSNS (66), /* SI */
563 COSTS_N_INSNS (66), /* DI */
564 COSTS_N_INSNS (66)}, /* other */
565 COSTS_N_INSNS (1), /* cost of movsx */
566 COSTS_N_INSNS (1), /* cost of movzx */
567 16, /* "large" insn */
569 4, /* cost for loading QImode using movzbl */
570 {4, 4, 4}, /* cost of loading integer registers
571 in QImode, HImode and SImode.
572 Relative to reg-reg move (2). */
573 {4, 4, 4}, /* cost of storing integer registers */
574 3, /* cost of reg,reg fld/fst */
575 {12, 12, 12}, /* cost of loading fp registers
576 in SFmode, DFmode and XFmode */
577 {4, 4, 4}, /* cost of storing fp registers
578 in SFmode, DFmode and XFmode */
579 6, /* cost of moving MMX register */
580 {12, 12}, /* cost of loading MMX registers
581 in SImode and DImode */
582 {12, 12}, /* cost of storing MMX registers
583 in SImode and DImode */
584 6, /* cost of moving SSE register */
585 {12, 12, 12}, /* cost of loading SSE registers
586 in SImode, DImode and TImode */
587 {12, 12, 12}, /* cost of storing SSE registers
588 in SImode, DImode and TImode */
589 8, /* MMX or SSE register to integer */
590 128, /* size of prefetch block */
591 8, /* number of parallel prefetches */
593 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
594 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
595 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
596 COSTS_N_INSNS (3), /* cost of FABS instruction. */
597 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
598 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
601 /* Generic64 should produce code tuned for Nocona and K8. */
603 struct processor_costs generic64_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 /* On all chips taken into consideration lea is 2 cycles and more. With
606 this cost however our current implementation of synth_mult results in
607 use of unnecessary temporary registers causing regression on several
608 SPECfp benchmarks. */
609 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (2)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (26), /* HI */
620 COSTS_N_INSNS (42), /* SI */
621 COSTS_N_INSNS (74), /* DI */
622 COSTS_N_INSNS (74)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {4, 4, 4}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {4, 4, 4}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {12, 12, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {8, 8}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {8, 8}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {8, 8, 8}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {8, 8, 8}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 5, /* MMX or SSE register to integer */
648 64, /* size of prefetch block */
649 6, /* number of parallel prefetches */
650 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
651 is increased to perhaps more appropriate value of 5. */
653 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
655 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
656 COSTS_N_INSNS (8), /* cost of FABS instruction. */
657 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
658 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
661 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
663 struct processor_costs generic32_cost = {
664 COSTS_N_INSNS (1), /* cost of an add instruction */
665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
666 COSTS_N_INSNS (1), /* variable shift costs */
667 COSTS_N_INSNS (1), /* constant shift costs */
668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
669 COSTS_N_INSNS (4), /* HI */
670 COSTS_N_INSNS (3), /* SI */
671 COSTS_N_INSNS (4), /* DI */
672 COSTS_N_INSNS (2)}, /* other */
673 0, /* cost of multiply per each bit set */
674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
675 COSTS_N_INSNS (26), /* HI */
676 COSTS_N_INSNS (42), /* SI */
677 COSTS_N_INSNS (74), /* DI */
678 COSTS_N_INSNS (74)}, /* other */
679 COSTS_N_INSNS (1), /* cost of movsx */
680 COSTS_N_INSNS (1), /* cost of movzx */
681 8, /* "large" insn */
683 4, /* cost for loading QImode using movzbl */
684 {4, 4, 4}, /* cost of loading integer registers
685 in QImode, HImode and SImode.
686 Relative to reg-reg move (2). */
687 {4, 4, 4}, /* cost of storing integer registers */
688 4, /* cost of reg,reg fld/fst */
689 {12, 12, 12}, /* cost of loading fp registers
690 in SFmode, DFmode and XFmode */
691 {6, 6, 8}, /* cost of storing fp registers
692 in SFmode, DFmode and XFmode */
693 2, /* cost of moving MMX register */
694 {8, 8}, /* cost of loading MMX registers
695 in SImode and DImode */
696 {8, 8}, /* cost of storing MMX registers
697 in SImode and DImode */
698 2, /* cost of moving SSE register */
699 {8, 8, 8}, /* cost of loading SSE registers
700 in SImode, DImode and TImode */
701 {8, 8, 8}, /* cost of storing SSE registers
702 in SImode, DImode and TImode */
703 5, /* MMX or SSE register to integer */
704 64, /* size of prefetch block */
705 6, /* number of parallel prefetches */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 const struct processor_costs *ix86_cost = &pentium_cost;
717 /* Processor feature/optimization bitmasks. */
718 #define m_386 (1<<PROCESSOR_I386)
719 #define m_486 (1<<PROCESSOR_I486)
720 #define m_PENT (1<<PROCESSOR_PENTIUM)
721 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
722 #define m_K6 (1<<PROCESSOR_K6)
723 #define m_ATHLON (1<<PROCESSOR_ATHLON)
724 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
725 #define m_K8 (1<<PROCESSOR_K8)
726 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
727 #define m_NOCONA (1<<PROCESSOR_NOCONA)
728 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
729 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
730 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
732 /* Generic instruction choice should be common subset of supported CPUs
733 (PPro/PENT4/NOCONA/Athlon/K8). */
735 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
736 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
737 generic because it is not working well with PPro base chips. */
738 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
739 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
740 const int x86_zero_extend_with_and = m_486 | m_PENT;
741 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
742 const int x86_double_with_add = ~m_386;
743 const int x86_use_bit_test = m_386;
744 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
745 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
746 const int x86_fisttp = m_NOCONA;
747 const int x86_3dnow_a = m_ATHLON_K8;
748 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
749 /* Branch hints were put in P4 based on simulation result. But
750 after P4 was made, no performance benefit was observed with
751 branch hints. It also increases the code size. As the result,
752 icc never generates branch hints. */
753 const int x86_branch_hints = 0;
754 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
755 /* We probably ought to watch for partial register stalls on Generic32
756 compilation setting as well. However in current implementation the
757 partial register stalls are not eliminated very well - they can
758 be introduced via subregs synthesized by combine and can happen
759 in caller/callee saving sequences.
760 Because this option pays back little on PPro based chips and is in conflict
761 with partial reg. dependencies used by Athlon/P4 based chips, it is better
762 to leave it off for generic32 for now. */
763 const int x86_partial_reg_stall = m_PPRO;
764 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
765 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
766 const int x86_use_mov0 = m_K6;
767 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
768 const int x86_read_modify_write = ~m_PENT;
769 const int x86_read_modify = ~(m_PENT | m_PPRO);
770 const int x86_split_long_moves = m_PPRO;
771 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
772 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
773 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
774 const int x86_qimode_math = ~(0);
775 const int x86_promote_qi_regs = 0;
776 /* On PPro this flag is meant to avoid partial register stalls. Just like
777 the x86_partial_reg_stall this option might be considered for Generic32
778 if our scheme for avoiding partial stalls was more effective. */
779 const int x86_himode_math = ~(m_PPRO);
780 const int x86_promote_hi_regs = m_PPRO;
781 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
782 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
783 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
784 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
785 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
786 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
787 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
788 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
789 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
790 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
791 const int x86_shift1 = ~m_486;
792 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
793 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
794 that thread 128bit SSE registers as single units versus K8 based chips that
795 divide SSE registers to two 64bit halves.
796 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
797 to allow register renaming on 128bit SSE units, but usually results in one
798 extra microop on 64bit SSE units. Experimental results shows that disabling
799 this option on P4 brings over 20% SPECfp regression, while enabling it on
800 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
802 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
803 /* Set for machines where the type and dependencies are resolved on SSE
804 register parts instead of whole registers, so we may maintain just
805 lower part of scalar values in proper format leaving the upper part
807 const int x86_sse_split_regs = m_ATHLON_K8;
808 const int x86_sse_typeless_stores = m_ATHLON_K8;
809 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
810 const int x86_use_ffreep = m_ATHLON_K8;
811 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
812 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
814 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
815 integer data in xmm registers. Which results in pretty abysmal code. */
816 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
818 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
819 /* Some CPU cores are not able to predict more than 4 branch instructions in
820 the 16 byte window. */
821 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
822 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
823 const int x86_use_bt = m_ATHLON_K8;
824 /* Compare and exchange was added for 80486. */
825 const int x86_cmpxchg = ~m_386;
826 /* Compare and exchange 8 bytes was added for pentium. */
827 const int x86_cmpxchg8b = ~(m_386 | m_486);
828 /* Compare and exchange 16 bytes was added for nocona. */
829 const int x86_cmpxchg16b = m_NOCONA;
830 /* Exchange and add was added for 80486. */
831 const int x86_xadd = ~m_386;
832 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
834 /* In case the average insn count for single function invocation is
835 lower than this constant, emit fast (but longer) prologue and
837 #define FAST_PROLOGUE_INSN_COUNT 20
839 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
840 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
841 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
842 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
844 /* Array of the smallest class containing reg number REGNO, indexed by
845 REGNO. Used by REGNO_REG_CLASS in i386.h. */
847 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
850 AREG, DREG, CREG, BREG,
852 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
854 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
855 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
858 /* flags, fpsr, dirflag, frame */
859 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
860 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
862 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
864 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
865 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
866 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
870 /* The "default" register map used in 32bit mode. */
872 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
874 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
875 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
876 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
877 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
878 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
879 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
880 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
883 static int const x86_64_int_parameter_registers[6] =
885 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
886 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
889 static int const x86_64_int_return_registers[4] =
891 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
894 /* The "default" register map used in 64bit mode. */
895 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
897 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
898 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
899 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
900 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
901 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
902 8,9,10,11,12,13,14,15, /* extended integer registers */
903 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
906 /* Define the register numbers to be used in Dwarf debugging information.
907 The SVR4 reference port C compiler uses the following register numbers
908 in its Dwarf output code:
909 0 for %eax (gcc regno = 0)
910 1 for %ecx (gcc regno = 2)
911 2 for %edx (gcc regno = 1)
912 3 for %ebx (gcc regno = 3)
913 4 for %esp (gcc regno = 7)
914 5 for %ebp (gcc regno = 6)
915 6 for %esi (gcc regno = 4)
916 7 for %edi (gcc regno = 5)
917 The following three DWARF register numbers are never generated by
918 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
919 believes these numbers have these meanings.
920 8 for %eip (no gcc equivalent)
921 9 for %eflags (gcc regno = 17)
922 10 for %trapno (no gcc equivalent)
923 It is not at all clear how we should number the FP stack registers
924 for the x86 architecture. If the version of SDB on x86/svr4 were
925 a bit less brain dead with respect to floating-point then we would
926 have a precedent to follow with respect to DWARF register numbers
927 for x86 FP registers, but the SDB on x86/svr4 is so completely
928 broken with respect to FP registers that it is hardly worth thinking
929 of it as something to strive for compatibility with.
930 The version of x86/svr4 SDB I have at the moment does (partially)
931 seem to believe that DWARF register number 11 is associated with
932 the x86 register %st(0), but that's about all. Higher DWARF
933 register numbers don't seem to be associated with anything in
934 particular, and even for DWARF regno 11, SDB only seems to under-
935 stand that it should say that a variable lives in %st(0) (when
936 asked via an `=' command) if we said it was in DWARF regno 11,
937 but SDB still prints garbage when asked for the value of the
938 variable in question (via a `/' command).
939 (Also note that the labels SDB prints for various FP stack regs
940 when doing an `x' command are all wrong.)
941 Note that these problems generally don't affect the native SVR4
942 C compiler because it doesn't allow the use of -O with -g and
943 because when it is *not* optimizing, it allocates a memory
944 location for each floating-point variable, and the memory
945 location is what gets described in the DWARF AT_location
946 attribute for the variable in question.
947 Regardless of the severe mental illness of the x86/svr4 SDB, we
948 do something sensible here and we use the following DWARF
949 register numbers. Note that these are all stack-top-relative
951 11 for %st(0) (gcc regno = 8)
952 12 for %st(1) (gcc regno = 9)
953 13 for %st(2) (gcc regno = 10)
954 14 for %st(3) (gcc regno = 11)
955 15 for %st(4) (gcc regno = 12)
956 16 for %st(5) (gcc regno = 13)
957 17 for %st(6) (gcc regno = 14)
958 18 for %st(7) (gcc regno = 15)
960 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
962 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
963 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
964 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
965 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
966 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
967 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
971 /* Test and compare insns in i386.md store the information needed to
972 generate branch and scc insns here. */
974 rtx ix86_compare_op0 = NULL_RTX;
975 rtx ix86_compare_op1 = NULL_RTX;
976 rtx ix86_compare_emitted = NULL_RTX;
978 /* Size of the register save area. */
979 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
981 /* Define the structure for the machine field in struct function. */
983 struct stack_local_entry GTY(())
988 struct stack_local_entry *next;
991 /* Structure describing stack frame layout.
992 Stack grows downward:
998 saved frame pointer if frame_pointer_needed
999 <- HARD_FRAME_POINTER
1004 [va_arg registers] (
1005 > to_allocate <- FRAME_POINTER
1015 HOST_WIDE_INT frame;
1017 int outgoing_arguments_size;
1020 HOST_WIDE_INT to_allocate;
1021 /* The offsets relative to ARG_POINTER. */
1022 HOST_WIDE_INT frame_pointer_offset;
1023 HOST_WIDE_INT hard_frame_pointer_offset;
1024 HOST_WIDE_INT stack_pointer_offset;
1026 /* When save_regs_using_mov is set, emit prologue using
1027 move instead of push instructions. */
1028 bool save_regs_using_mov;
1031 /* Code model option. */
1032 enum cmodel ix86_cmodel;
1034 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1036 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1038 /* Which unit we are generating floating point math for. */
1039 enum fpmath_unit ix86_fpmath;
1041 /* Which cpu are we scheduling for. */
1042 enum processor_type ix86_tune;
1043 /* Which instruction set architecture to use. */
1044 enum processor_type ix86_arch;
1046 /* true if sse prefetch instruction is not NOOP. */
1047 int x86_prefetch_sse;
1049 /* ix86_regparm_string as a number */
1050 static int ix86_regparm;
1052 /* -mstackrealign option */
1053 extern int ix86_force_align_arg_pointer;
1054 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1056 /* Preferred alignment for stack boundary in bits. */
1057 unsigned int ix86_preferred_stack_boundary;
1059 /* Values 1-5: see jump.c */
1060 int ix86_branch_cost;
1062 /* Variables which are this size or smaller are put in the data/bss
1063 or ldata/lbss sections. */
1065 int ix86_section_threshold = 65536;
1067 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1068 char internal_label_prefix[16];
1069 int internal_label_prefix_len;
1071 static bool ix86_handle_option (size_t, const char *, int);
1072 static void output_pic_addr_const (FILE *, rtx, int);
1073 static void put_condition_code (enum rtx_code, enum machine_mode,
1075 static const char *get_some_local_dynamic_name (void);
1076 static int get_some_local_dynamic_name_1 (rtx *, void *);
1077 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1078 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1080 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1081 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1083 static rtx get_thread_pointer (int);
1084 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1085 static void get_pc_thunk_name (char [32], unsigned int);
1086 static rtx gen_push (rtx);
1087 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1088 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1089 static struct machine_function * ix86_init_machine_status (void);
1090 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1091 static int ix86_nsaved_regs (void);
1092 static void ix86_emit_save_regs (void);
1093 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1094 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1095 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1096 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1097 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1098 static rtx ix86_expand_aligntest (rtx, int);
1099 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1100 static int ix86_issue_rate (void);
1101 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1102 static int ia32_multipass_dfa_lookahead (void);
1103 static void ix86_init_mmx_sse_builtins (void);
1104 static rtx x86_this_parameter (tree);
1105 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1106 HOST_WIDE_INT, tree);
1107 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1108 static void x86_file_start (void);
1109 static void ix86_reorg (void);
1110 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1111 static tree ix86_build_builtin_va_list (void);
1112 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1114 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1115 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1116 static bool ix86_vector_mode_supported_p (enum machine_mode);
1118 static int ix86_address_cost (rtx);
1119 static bool ix86_cannot_force_const_mem (rtx);
1120 static rtx ix86_delegitimize_address (rtx);
1122 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1124 struct builtin_description;
1125 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1127 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1129 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1130 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1131 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1132 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1133 static rtx safe_vector_operand (rtx, enum machine_mode);
1134 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1135 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1136 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1137 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1138 static int ix86_fp_comparison_cost (enum rtx_code code);
1139 static unsigned int ix86_select_alt_pic_regnum (void);
1140 static int ix86_save_reg (unsigned int, int);
1141 static void ix86_compute_frame_layout (struct ix86_frame *);
1142 static int ix86_comp_type_attributes (tree, tree);
1143 static int ix86_function_regparm (tree, tree);
1144 const struct attribute_spec ix86_attribute_table[];
1145 static bool ix86_function_ok_for_sibcall (tree, tree);
1146 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1147 static int ix86_value_regno (enum machine_mode, tree, tree);
1148 static bool contains_128bit_aligned_vector_p (tree);
1149 static rtx ix86_struct_value_rtx (tree, int);
1150 static bool ix86_ms_bitfield_layout_p (tree);
1151 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1152 static int extended_reg_mentioned_1 (rtx *, void *);
1153 static bool ix86_rtx_costs (rtx, int, int, int *);
1154 static int min_insn_size (rtx);
1155 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1156 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1157 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1159 static void ix86_init_builtins (void);
1160 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1161 static const char *ix86_mangle_fundamental_type (tree);
1162 static tree ix86_stack_protect_fail (void);
1163 static rtx ix86_internal_arg_pointer (void);
1164 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1166 /* This function is only used on Solaris. */
1167 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1170 /* Register class used for passing given 64bit part of the argument.
1171 These represent classes as documented by the PS ABI, with the exception
1172 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1173 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1175 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1176 whenever possible (upper half does contain padding).
1178 enum x86_64_reg_class
1181 X86_64_INTEGER_CLASS,
1182 X86_64_INTEGERSI_CLASS,
1189 X86_64_COMPLEX_X87_CLASS,
1192 static const char * const x86_64_reg_class_name[] = {
1193 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1194 "sseup", "x87", "x87up", "cplx87", "no"
1197 #define MAX_CLASSES 4
1199 /* Table of constants used by fldpi, fldln2, etc.... */
1200 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1201 static bool ext_80387_constants_init = 0;
1202 static void init_ext_80387_constants (void);
1203 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1204 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1205 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1206 static section *x86_64_elf_select_section (tree decl, int reloc,
1207 unsigned HOST_WIDE_INT align)
1210 /* Initialize the GCC target structure. */
1211 #undef TARGET_ATTRIBUTE_TABLE
1212 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1214 # undef TARGET_MERGE_DECL_ATTRIBUTES
1215 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1218 #undef TARGET_COMP_TYPE_ATTRIBUTES
1219 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1221 #undef TARGET_INIT_BUILTINS
1222 #define TARGET_INIT_BUILTINS ix86_init_builtins
1223 #undef TARGET_EXPAND_BUILTIN
1224 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1226 #undef TARGET_ASM_FUNCTION_EPILOGUE
1227 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1229 #undef TARGET_ENCODE_SECTION_INFO
1230 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1231 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1233 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1236 #undef TARGET_ASM_OPEN_PAREN
1237 #define TARGET_ASM_OPEN_PAREN ""
1238 #undef TARGET_ASM_CLOSE_PAREN
1239 #define TARGET_ASM_CLOSE_PAREN ""
1241 #undef TARGET_ASM_ALIGNED_HI_OP
1242 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1243 #undef TARGET_ASM_ALIGNED_SI_OP
1244 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1246 #undef TARGET_ASM_ALIGNED_DI_OP
1247 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1250 #undef TARGET_ASM_UNALIGNED_HI_OP
1251 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1252 #undef TARGET_ASM_UNALIGNED_SI_OP
1253 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1254 #undef TARGET_ASM_UNALIGNED_DI_OP
1255 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1257 #undef TARGET_SCHED_ADJUST_COST
1258 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1259 #undef TARGET_SCHED_ISSUE_RATE
1260 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1261 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1262 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1263 ia32_multipass_dfa_lookahead
1265 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1266 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1269 #undef TARGET_HAVE_TLS
1270 #define TARGET_HAVE_TLS true
1272 #undef TARGET_CANNOT_FORCE_CONST_MEM
1273 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1274 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1275 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1277 #undef TARGET_DELEGITIMIZE_ADDRESS
1278 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1280 #undef TARGET_MS_BITFIELD_LAYOUT_P
1281 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1284 #undef TARGET_BINDS_LOCAL_P
1285 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1288 #undef TARGET_ASM_OUTPUT_MI_THUNK
1289 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1290 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1291 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1293 #undef TARGET_ASM_FILE_START
1294 #define TARGET_ASM_FILE_START x86_file_start
1296 #undef TARGET_DEFAULT_TARGET_FLAGS
1297 #define TARGET_DEFAULT_TARGET_FLAGS \
1299 | TARGET_64BIT_DEFAULT \
1300 | TARGET_SUBTARGET_DEFAULT \
1301 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1303 #undef TARGET_HANDLE_OPTION
1304 #define TARGET_HANDLE_OPTION ix86_handle_option
1306 #undef TARGET_RTX_COSTS
1307 #define TARGET_RTX_COSTS ix86_rtx_costs
1308 #undef TARGET_ADDRESS_COST
1309 #define TARGET_ADDRESS_COST ix86_address_cost
1311 #undef TARGET_FIXED_CONDITION_CODE_REGS
1312 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1313 #undef TARGET_CC_MODES_COMPATIBLE
1314 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1316 #undef TARGET_MACHINE_DEPENDENT_REORG
1317 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1319 #undef TARGET_BUILD_BUILTIN_VA_LIST
1320 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1322 #undef TARGET_MD_ASM_CLOBBERS
1323 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1325 #undef TARGET_PROMOTE_PROTOTYPES
1326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1327 #undef TARGET_STRUCT_VALUE_RTX
1328 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1329 #undef TARGET_SETUP_INCOMING_VARARGS
1330 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1331 #undef TARGET_MUST_PASS_IN_STACK
1332 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1333 #undef TARGET_PASS_BY_REFERENCE
1334 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1335 #undef TARGET_INTERNAL_ARG_POINTER
1336 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1337 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1338 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1341 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1344 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1347 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1350 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1351 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1354 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1355 #undef TARGET_INSERT_ATTRIBUTES
1356 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1359 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1360 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1362 #undef TARGET_STACK_PROTECT_FAIL
1363 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1365 #undef TARGET_FUNCTION_VALUE
1366 #define TARGET_FUNCTION_VALUE ix86_function_value
1368 struct gcc_target targetm = TARGET_INITIALIZER;
1371 /* The svr4 ABI for the i386 says that records and unions are returned
1373 #ifndef DEFAULT_PCC_STRUCT_RETURN
1374 #define DEFAULT_PCC_STRUCT_RETURN 1
1377 /* Implement TARGET_HANDLE_OPTION. */
1380 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1387 target_flags &= ~MASK_3DNOW_A;
1388 target_flags_explicit |= MASK_3DNOW_A;
1395 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1396 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1403 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1404 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1411 target_flags &= ~MASK_SSE3;
1412 target_flags_explicit |= MASK_SSE3;
1421 /* Sometimes certain combinations of command options do not make
1422 sense on a particular target machine. You can define a macro
1423 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1424 defined, is executed once just after all the command options have
1427 Don't use this macro to turn on various extra optimizations for
1428 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1431 override_options (void)
1434 int ix86_tune_defaulted = 0;
1436 /* Comes from final.c -- no real reason to change it. */
1437 #define MAX_CODE_ALIGN 16
1441 const struct processor_costs *cost; /* Processor costs */
1442 const int target_enable; /* Target flags to enable. */
1443 const int target_disable; /* Target flags to disable. */
1444 const int align_loop; /* Default alignments. */
1445 const int align_loop_max_skip;
1446 const int align_jump;
1447 const int align_jump_max_skip;
1448 const int align_func;
1450 const processor_target_table[PROCESSOR_max] =
1452 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1453 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1454 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1455 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1456 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1457 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1458 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1459 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1460 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1461 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1462 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1465 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1468 const char *const name; /* processor name or nickname. */
1469 const enum processor_type processor;
1470 const enum pta_flags
1476 PTA_PREFETCH_SSE = 16,
1482 const processor_alias_table[] =
1484 {"i386", PROCESSOR_I386, 0},
1485 {"i486", PROCESSOR_I486, 0},
1486 {"i586", PROCESSOR_PENTIUM, 0},
1487 {"pentium", PROCESSOR_PENTIUM, 0},
1488 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1489 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1490 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1491 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1492 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1493 {"i686", PROCESSOR_PENTIUMPRO, 0},
1494 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1495 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1496 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1497 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1498 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1499 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1500 | PTA_MMX | PTA_PREFETCH_SSE},
1501 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1502 | PTA_MMX | PTA_PREFETCH_SSE},
1503 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1504 | PTA_MMX | PTA_PREFETCH_SSE},
1505 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1506 | PTA_MMX | PTA_PREFETCH_SSE},
1507 {"k6", PROCESSOR_K6, PTA_MMX},
1508 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1509 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1510 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1512 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1513 | PTA_3DNOW | PTA_3DNOW_A},
1514 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1515 | PTA_3DNOW_A | PTA_SSE},
1516 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1517 | PTA_3DNOW_A | PTA_SSE},
1518 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1519 | PTA_3DNOW_A | PTA_SSE},
1520 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1521 | PTA_SSE | PTA_SSE2 },
1522 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1523 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1524 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1525 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1526 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1527 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1528 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1529 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1530 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1531 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1534 int const pta_size = ARRAY_SIZE (processor_alias_table);
1536 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1537 SUBTARGET_OVERRIDE_OPTIONS;
1540 /* Set the default values for switches whose default depends on TARGET_64BIT
1541 in case they weren't overwritten by command line options. */
1544 if (flag_omit_frame_pointer == 2)
1545 flag_omit_frame_pointer = 1;
1546 if (flag_asynchronous_unwind_tables == 2)
1547 flag_asynchronous_unwind_tables = 1;
1548 if (flag_pcc_struct_return == 2)
1549 flag_pcc_struct_return = 0;
1553 if (flag_omit_frame_pointer == 2)
1554 flag_omit_frame_pointer = 0;
1555 if (flag_asynchronous_unwind_tables == 2)
1556 flag_asynchronous_unwind_tables = 0;
1557 if (flag_pcc_struct_return == 2)
1558 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1561 /* Need to check -mtune=generic first. */
1562 if (ix86_tune_string)
1564 if (!strcmp (ix86_tune_string, "generic")
1565 || !strcmp (ix86_tune_string, "i686")
1566 /* As special support for cross compilers we read -mtune=native
1567 as -mtune=generic. With native compilers we won't see the
1568 -mtune=native, as it was changed by the driver. */
1569 || !strcmp (ix86_tune_string, "native"))
1572 ix86_tune_string = "generic64";
1574 ix86_tune_string = "generic32";
1576 else if (!strncmp (ix86_tune_string, "generic", 7))
1577 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1581 if (ix86_arch_string)
1582 ix86_tune_string = ix86_arch_string;
1583 if (!ix86_tune_string)
1585 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1586 ix86_tune_defaulted = 1;
1589 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1590 need to use a sensible tune option. */
1591 if (!strcmp (ix86_tune_string, "generic")
1592 || !strcmp (ix86_tune_string, "x86-64")
1593 || !strcmp (ix86_tune_string, "i686"))
1596 ix86_tune_string = "generic64";
1598 ix86_tune_string = "generic32";
1601 if (!strcmp (ix86_tune_string, "x86-64"))
1602 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1603 "-mtune=generic instead as appropriate.");
1605 if (!ix86_arch_string)
1606 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1607 if (!strcmp (ix86_arch_string, "generic"))
1608 error ("generic CPU can be used only for -mtune= switch");
1609 if (!strncmp (ix86_arch_string, "generic", 7))
1610 error ("bad value (%s) for -march= switch", ix86_arch_string);
1612 if (ix86_cmodel_string != 0)
1614 if (!strcmp (ix86_cmodel_string, "small"))
1615 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1616 else if (!strcmp (ix86_cmodel_string, "medium"))
1617 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1619 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1620 else if (!strcmp (ix86_cmodel_string, "32"))
1621 ix86_cmodel = CM_32;
1622 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1623 ix86_cmodel = CM_KERNEL;
1624 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1625 ix86_cmodel = CM_LARGE;
1627 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1631 ix86_cmodel = CM_32;
1633 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1635 if (ix86_asm_string != 0)
1638 && !strcmp (ix86_asm_string, "intel"))
1639 ix86_asm_dialect = ASM_INTEL;
1640 else if (!strcmp (ix86_asm_string, "att"))
1641 ix86_asm_dialect = ASM_ATT;
1643 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1645 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1646 error ("code model %qs not supported in the %s bit mode",
1647 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1648 if (ix86_cmodel == CM_LARGE)
1649 sorry ("code model %<large%> not supported yet");
1650 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1651 sorry ("%i-bit mode not compiled in",
1652 (target_flags & MASK_64BIT) ? 64 : 32);
1654 for (i = 0; i < pta_size; i++)
1655 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1657 ix86_arch = processor_alias_table[i].processor;
1658 /* Default cpu tuning to the architecture. */
1659 ix86_tune = ix86_arch;
1660 if (processor_alias_table[i].flags & PTA_MMX
1661 && !(target_flags_explicit & MASK_MMX))
1662 target_flags |= MASK_MMX;
1663 if (processor_alias_table[i].flags & PTA_3DNOW
1664 && !(target_flags_explicit & MASK_3DNOW))
1665 target_flags |= MASK_3DNOW;
1666 if (processor_alias_table[i].flags & PTA_3DNOW_A
1667 && !(target_flags_explicit & MASK_3DNOW_A))
1668 target_flags |= MASK_3DNOW_A;
1669 if (processor_alias_table[i].flags & PTA_SSE
1670 && !(target_flags_explicit & MASK_SSE))
1671 target_flags |= MASK_SSE;
1672 if (processor_alias_table[i].flags & PTA_SSE2
1673 && !(target_flags_explicit & MASK_SSE2))
1674 target_flags |= MASK_SSE2;
1675 if (processor_alias_table[i].flags & PTA_SSE3
1676 && !(target_flags_explicit & MASK_SSE3))
1677 target_flags |= MASK_SSE3;
1678 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1679 x86_prefetch_sse = true;
1680 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1681 error ("CPU you selected does not support x86-64 "
1687 error ("bad value (%s) for -march= switch", ix86_arch_string);
1689 for (i = 0; i < pta_size; i++)
1690 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1692 ix86_tune = processor_alias_table[i].processor;
1693 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1695 if (ix86_tune_defaulted)
1697 ix86_tune_string = "x86-64";
1698 for (i = 0; i < pta_size; i++)
1699 if (! strcmp (ix86_tune_string,
1700 processor_alias_table[i].name))
1702 ix86_tune = processor_alias_table[i].processor;
1705 error ("CPU you selected does not support x86-64 "
1708 /* Intel CPUs have always interpreted SSE prefetch instructions as
1709 NOPs; so, we can enable SSE prefetch instructions even when
1710 -mtune (rather than -march) points us to a processor that has them.
1711 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1712 higher processors. */
1713 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1714 x86_prefetch_sse = true;
1718 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1721 ix86_cost = &size_cost;
1723 ix86_cost = processor_target_table[ix86_tune].cost;
1724 target_flags |= processor_target_table[ix86_tune].target_enable;
1725 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1727 /* Arrange to set up i386_stack_locals for all functions. */
1728 init_machine_status = ix86_init_machine_status;
1730 /* Validate -mregparm= value. */
1731 if (ix86_regparm_string)
1733 i = atoi (ix86_regparm_string);
1734 if (i < 0 || i > REGPARM_MAX)
1735 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1741 ix86_regparm = REGPARM_MAX;
1743 /* If the user has provided any of the -malign-* options,
1744 warn and use that value only if -falign-* is not set.
1745 Remove this code in GCC 3.2 or later. */
1746 if (ix86_align_loops_string)
1748 warning (0, "-malign-loops is obsolete, use -falign-loops");
1749 if (align_loops == 0)
1751 i = atoi (ix86_align_loops_string);
1752 if (i < 0 || i > MAX_CODE_ALIGN)
1753 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1755 align_loops = 1 << i;
1759 if (ix86_align_jumps_string)
1761 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1762 if (align_jumps == 0)
1764 i = atoi (ix86_align_jumps_string);
1765 if (i < 0 || i > MAX_CODE_ALIGN)
1766 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1768 align_jumps = 1 << i;
1772 if (ix86_align_funcs_string)
1774 warning (0, "-malign-functions is obsolete, use -falign-functions");
1775 if (align_functions == 0)
1777 i = atoi (ix86_align_funcs_string);
1778 if (i < 0 || i > MAX_CODE_ALIGN)
1779 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1781 align_functions = 1 << i;
1785 /* Default align_* from the processor table. */
1786 if (align_loops == 0)
1788 align_loops = processor_target_table[ix86_tune].align_loop;
1789 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1791 if (align_jumps == 0)
1793 align_jumps = processor_target_table[ix86_tune].align_jump;
1794 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1796 if (align_functions == 0)
1798 align_functions = processor_target_table[ix86_tune].align_func;
1801 /* Validate -mpreferred-stack-boundary= value, or provide default.
1802 The default of 128 bits is for Pentium III's SSE __m128, but we
1803 don't want additional code to keep the stack aligned when
1804 optimizing for code size. */
1805 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1807 if (ix86_preferred_stack_boundary_string)
1809 i = atoi (ix86_preferred_stack_boundary_string);
1810 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1811 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1812 TARGET_64BIT ? 4 : 2);
1814 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1817 /* Validate -mbranch-cost= value, or provide default. */
1818 ix86_branch_cost = ix86_cost->branch_cost;
1819 if (ix86_branch_cost_string)
1821 i = atoi (ix86_branch_cost_string);
1823 error ("-mbranch-cost=%d is not between 0 and 5", i);
1825 ix86_branch_cost = i;
1827 if (ix86_section_threshold_string)
1829 i = atoi (ix86_section_threshold_string);
1831 error ("-mlarge-data-threshold=%d is negative", i);
1833 ix86_section_threshold = i;
1836 if (ix86_tls_dialect_string)
1838 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1839 ix86_tls_dialect = TLS_DIALECT_GNU;
1840 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1841 ix86_tls_dialect = TLS_DIALECT_GNU2;
1842 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1843 ix86_tls_dialect = TLS_DIALECT_SUN;
1845 error ("bad value (%s) for -mtls-dialect= switch",
1846 ix86_tls_dialect_string);
1849 /* Keep nonleaf frame pointers. */
1850 if (flag_omit_frame_pointer)
1851 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1852 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1853 flag_omit_frame_pointer = 1;
1855 /* If we're doing fast math, we don't care about comparison order
1856 wrt NaNs. This lets us use a shorter comparison sequence. */
1857 if (flag_unsafe_math_optimizations)
1858 target_flags &= ~MASK_IEEE_FP;
1860 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1861 since the insns won't need emulation. */
1862 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1863 target_flags &= ~MASK_NO_FANCY_MATH_387;
1865 /* Likewise, if the target doesn't have a 387, or we've specified
1866 software floating point, don't use 387 inline intrinsics. */
1868 target_flags |= MASK_NO_FANCY_MATH_387;
1870 /* Turn on SSE2 builtins for -msse3. */
1872 target_flags |= MASK_SSE2;
1874 /* Turn on SSE builtins for -msse2. */
1876 target_flags |= MASK_SSE;
1878 /* Turn on MMX builtins for -msse. */
1881 target_flags |= MASK_MMX & ~target_flags_explicit;
1882 x86_prefetch_sse = true;
1885 /* Turn on MMX builtins for 3Dnow. */
1887 target_flags |= MASK_MMX;
1891 if (TARGET_ALIGN_DOUBLE)
1892 error ("-malign-double makes no sense in the 64bit mode");
1894 error ("-mrtd calling convention not supported in the 64bit mode");
1896 /* Enable by default the SSE and MMX builtins. Do allow the user to
1897 explicitly disable any of these. In particular, disabling SSE and
1898 MMX for kernel code is extremely useful. */
1900 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1901 & ~target_flags_explicit);
1905 /* i386 ABI does not specify red zone. It still makes sense to use it
1906 when programmer takes care to stack from being destroyed. */
1907 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1908 target_flags |= MASK_NO_RED_ZONE;
1911 /* Accept -msseregparm only if at least SSE support is enabled. */
1912 if (TARGET_SSEREGPARM
1914 error ("-msseregparm used without SSE enabled");
1916 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1918 if (ix86_fpmath_string != 0)
1920 if (! strcmp (ix86_fpmath_string, "387"))
1921 ix86_fpmath = FPMATH_387;
1922 else if (! strcmp (ix86_fpmath_string, "sse"))
1926 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1927 ix86_fpmath = FPMATH_387;
1930 ix86_fpmath = FPMATH_SSE;
1932 else if (! strcmp (ix86_fpmath_string, "387,sse")
1933 || ! strcmp (ix86_fpmath_string, "sse,387"))
1937 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1938 ix86_fpmath = FPMATH_387;
1940 else if (!TARGET_80387)
1942 warning (0, "387 instruction set disabled, using SSE arithmetics");
1943 ix86_fpmath = FPMATH_SSE;
1946 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1949 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1952 /* If the i387 is disabled, then do not return values in it. */
1954 target_flags &= ~MASK_FLOAT_RETURNS;
1956 if ((x86_accumulate_outgoing_args & TUNEMASK)
1957 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1959 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1961 /* ??? Unwind info is not correct around the CFG unless either a frame
1962 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1963 unwind info generation to be aware of the CFG and propagating states
1965 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1966 || flag_exceptions || flag_non_call_exceptions)
1967 && flag_omit_frame_pointer
1968 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1970 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1971 warning (0, "unwind tables currently require either a frame pointer "
1972 "or -maccumulate-outgoing-args for correctness");
1973 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1976 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1979 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1980 p = strchr (internal_label_prefix, 'X');
1981 internal_label_prefix_len = p - internal_label_prefix;
1985 /* When scheduling description is not available, disable scheduler pass
1986 so it won't slow down the compilation and make x87 code slower. */
1987 if (!TARGET_SCHEDULE)
1988 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1991 /* switch to the appropriate section for output of DECL.
1992 DECL is either a `VAR_DECL' node or a constant of some sort.
1993 RELOC indicates whether forming the initial value of DECL requires
1994 link-time relocations. */
1997 x86_64_elf_select_section (tree decl, int reloc,
1998 unsigned HOST_WIDE_INT align)
2000 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2001 && ix86_in_large_data_p (decl))
2003 const char *sname = NULL;
2004 unsigned int flags = SECTION_WRITE;
2005 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2010 case SECCAT_DATA_REL:
2011 sname = ".ldata.rel";
2013 case SECCAT_DATA_REL_LOCAL:
2014 sname = ".ldata.rel.local";
2016 case SECCAT_DATA_REL_RO:
2017 sname = ".ldata.rel.ro";
2019 case SECCAT_DATA_REL_RO_LOCAL:
2020 sname = ".ldata.rel.ro.local";
2024 flags |= SECTION_BSS;
2027 case SECCAT_RODATA_MERGE_STR:
2028 case SECCAT_RODATA_MERGE_STR_INIT:
2029 case SECCAT_RODATA_MERGE_CONST:
2033 case SECCAT_SRODATA:
2040 /* We don't split these for medium model. Place them into
2041 default sections and hope for best. */
2046 /* We might get called with string constants, but get_named_section
2047 doesn't like them as they are not DECLs. Also, we need to set
2048 flags in that case. */
2050 return get_section (sname, flags, NULL);
2051 return get_named_section (decl, sname, reloc);
2054 return default_elf_select_section (decl, reloc, align);
2057 /* Build up a unique section name, expressed as a
2058 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2059 RELOC indicates whether the initial value of EXP requires
2060 link-time relocations. */
2063 x86_64_elf_unique_section (tree decl, int reloc)
2065 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2066 && ix86_in_large_data_p (decl))
2068 const char *prefix = NULL;
2069 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2070 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2072 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2075 case SECCAT_DATA_REL:
2076 case SECCAT_DATA_REL_LOCAL:
2077 case SECCAT_DATA_REL_RO:
2078 case SECCAT_DATA_REL_RO_LOCAL:
2079 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2082 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2085 case SECCAT_RODATA_MERGE_STR:
2086 case SECCAT_RODATA_MERGE_STR_INIT:
2087 case SECCAT_RODATA_MERGE_CONST:
2088 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2090 case SECCAT_SRODATA:
2097 /* We don't split these for medium model. Place them into
2098 default sections and hope for best. */
2106 plen = strlen (prefix);
2108 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2109 name = targetm.strip_name_encoding (name);
2110 nlen = strlen (name);
2112 string = alloca (nlen + plen + 1);
2113 memcpy (string, prefix, plen);
2114 memcpy (string + plen, name, nlen + 1);
2116 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2120 default_unique_section (decl, reloc);
2123 #ifdef COMMON_ASM_OP
2124 /* This says how to output assembler code to declare an
2125 uninitialized external linkage data object.
2127 For medium model x86-64 we need to use .largecomm opcode for
2130 x86_elf_aligned_common (FILE *file,
2131 const char *name, unsigned HOST_WIDE_INT size,
2134 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2135 && size > (unsigned int)ix86_section_threshold)
2136 fprintf (file, ".largecomm\t");
2138 fprintf (file, "%s", COMMON_ASM_OP);
2139 assemble_name (file, name);
2140 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2141 size, align / BITS_PER_UNIT);
2144 /* Utility function for targets to use in implementing
2145 ASM_OUTPUT_ALIGNED_BSS. */
2148 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2149 const char *name, unsigned HOST_WIDE_INT size,
2152 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2153 && size > (unsigned int)ix86_section_threshold)
2154 switch_to_section (get_named_section (decl, ".lbss", 0));
2156 switch_to_section (bss_section);
2157 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2158 #ifdef ASM_DECLARE_OBJECT_NAME
2159 last_assemble_variable_decl = decl;
2160 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2162 /* Standard thing is just output label for the object. */
2163 ASM_OUTPUT_LABEL (file, name);
2164 #endif /* ASM_DECLARE_OBJECT_NAME */
2165 ASM_OUTPUT_SKIP (file, size ? size : 1);
2170 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2172 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2173 make the problem with not enough registers even worse. */
2174 #ifdef INSN_SCHEDULING
2176 flag_schedule_insns = 0;
2180 /* The Darwin libraries never set errno, so we might as well
2181 avoid calling them when that's the only reason we would. */
2182 flag_errno_math = 0;
2184 /* The default values of these switches depend on the TARGET_64BIT
2185 that is not known at this moment. Mark these values with 2 and
2186 let user the to override these. In case there is no command line option
2187 specifying them, we will set the defaults in override_options. */
2189 flag_omit_frame_pointer = 2;
2190 flag_pcc_struct_return = 2;
2191 flag_asynchronous_unwind_tables = 2;
2192 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2193 SUBTARGET_OPTIMIZATION_OPTIONS;
2197 /* Table of valid machine attributes. */
2198 const struct attribute_spec ix86_attribute_table[] =
2200 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2201 /* Stdcall attribute says callee is responsible for popping arguments
2202 if they are not variable. */
2203 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2204 /* Fastcall attribute says callee is responsible for popping arguments
2205 if they are not variable. */
2206 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2207 /* Cdecl attribute says the callee is a normal C declaration */
2208 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2209 /* Regparm attribute specifies how many integer arguments are to be
2210 passed in registers. */
2211 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2212 /* Sseregparm attribute says we are using x86_64 calling conventions
2213 for FP arguments. */
2214 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2215 /* force_align_arg_pointer says this function realigns the stack at entry. */
2216 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2217 false, true, true, ix86_handle_cconv_attribute },
2218 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2219 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2220 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2221 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2223 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2224 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2225 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2226 SUBTARGET_ATTRIBUTE_TABLE,
2228 { NULL, 0, 0, false, false, false, NULL }
2231 /* Decide whether we can make a sibling call to a function. DECL is the
2232 declaration of the function being targeted by the call and EXP is the
2233 CALL_EXPR representing the call. */
2236 ix86_function_ok_for_sibcall (tree decl, tree exp)
2241 /* If we are generating position-independent code, we cannot sibcall
2242 optimize any indirect call, or a direct call to a global function,
2243 as the PLT requires %ebx be live. */
2244 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2251 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2252 if (POINTER_TYPE_P (func))
2253 func = TREE_TYPE (func);
2256 /* Check that the return value locations are the same. Like
2257 if we are returning floats on the 80387 register stack, we cannot
2258 make a sibcall from a function that doesn't return a float to a
2259 function that does or, conversely, from a function that does return
2260 a float to a function that doesn't; the necessary stack adjustment
2261 would not be executed. This is also the place we notice
2262 differences in the return value ABI. Note that it is ok for one
2263 of the functions to have void return type as long as the return
2264 value of the other is passed in a register. */
2265 a = ix86_function_value (TREE_TYPE (exp), func, false);
2266 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2268 if (STACK_REG_P (a) || STACK_REG_P (b))
2270 if (!rtx_equal_p (a, b))
2273 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2275 else if (!rtx_equal_p (a, b))
2278 /* If this call is indirect, we'll need to be able to use a call-clobbered
2279 register for the address of the target function. Make sure that all
2280 such registers are not used for passing parameters. */
2281 if (!decl && !TARGET_64BIT)
2285 /* We're looking at the CALL_EXPR, we need the type of the function. */
2286 type = TREE_OPERAND (exp, 0); /* pointer expression */
2287 type = TREE_TYPE (type); /* pointer type */
2288 type = TREE_TYPE (type); /* function type */
2290 if (ix86_function_regparm (type, NULL) >= 3)
2292 /* ??? Need to count the actual number of registers to be used,
2293 not the possible number of registers. Fix later. */
2298 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2299 /* Dllimport'd functions are also called indirectly. */
2300 if (decl && DECL_DLLIMPORT_P (decl)
2301 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2305 /* If we forced aligned the stack, then sibcalling would unalign the
2306 stack, which may break the called function. */
2307 if (cfun->machine->force_align_arg_pointer)
2310 /* Otherwise okay. That also includes certain types of indirect calls. */
2314 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2315 calling convention attributes;
2316 arguments as in struct attribute_spec.handler. */
2319 ix86_handle_cconv_attribute (tree *node, tree name,
2321 int flags ATTRIBUTE_UNUSED,
2324 if (TREE_CODE (*node) != FUNCTION_TYPE
2325 && TREE_CODE (*node) != METHOD_TYPE
2326 && TREE_CODE (*node) != FIELD_DECL
2327 && TREE_CODE (*node) != TYPE_DECL)
2329 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2330 IDENTIFIER_POINTER (name));
2331 *no_add_attrs = true;
2335 /* Can combine regparm with all attributes but fastcall. */
2336 if (is_attribute_p ("regparm", name))
2340 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2342 error ("fastcall and regparm attributes are not compatible");
2345 cst = TREE_VALUE (args);
2346 if (TREE_CODE (cst) != INTEGER_CST)
2348 warning (OPT_Wattributes,
2349 "%qs attribute requires an integer constant argument",
2350 IDENTIFIER_POINTER (name));
2351 *no_add_attrs = true;
2353 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2355 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2356 IDENTIFIER_POINTER (name), REGPARM_MAX);
2357 *no_add_attrs = true;
2361 && lookup_attribute (ix86_force_align_arg_pointer_string,
2362 TYPE_ATTRIBUTES (*node))
2363 && compare_tree_int (cst, REGPARM_MAX-1))
2365 error ("%s functions limited to %d register parameters",
2366 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2374 warning (OPT_Wattributes, "%qs attribute ignored",
2375 IDENTIFIER_POINTER (name));
2376 *no_add_attrs = true;
2380 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2381 if (is_attribute_p ("fastcall", name))
2383 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2385 error ("fastcall and cdecl attributes are not compatible");
2387 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2389 error ("fastcall and stdcall attributes are not compatible");
2391 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2393 error ("fastcall and regparm attributes are not compatible");
2397 /* Can combine stdcall with fastcall (redundant), regparm and
2399 else if (is_attribute_p ("stdcall", name))
2401 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2403 error ("stdcall and cdecl attributes are not compatible");
2405 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2407 error ("stdcall and fastcall attributes are not compatible");
2411 /* Can combine cdecl with regparm and sseregparm. */
2412 else if (is_attribute_p ("cdecl", name))
2414 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2416 error ("stdcall and cdecl attributes are not compatible");
2418 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2420 error ("fastcall and cdecl attributes are not compatible");
2424 /* Can combine sseregparm with all attributes. */
2429 /* Return 0 if the attributes for two types are incompatible, 1 if they
2430 are compatible, and 2 if they are nearly compatible (which causes a
2431 warning to be generated). */
2434 ix86_comp_type_attributes (tree type1, tree type2)
2436 /* Check for mismatch of non-default calling convention. */
2437 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2439 if (TREE_CODE (type1) != FUNCTION_TYPE)
2442 /* Check for mismatched fastcall/regparm types. */
2443 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2444 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2445 || (ix86_function_regparm (type1, NULL)
2446 != ix86_function_regparm (type2, NULL)))
2449 /* Check for mismatched sseregparm types. */
2450 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2451 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2454 /* Check for mismatched return types (cdecl vs stdcall). */
2455 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2456 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2462 /* Return the regparm value for a function with the indicated TYPE and DECL.
2463 DECL may be NULL when calling function indirectly
2464 or considering a libcall. */
2467 ix86_function_regparm (tree type, tree decl)
2470 int regparm = ix86_regparm;
2471 bool user_convention = false;
2475 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2478 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2479 user_convention = true;
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2485 user_convention = true;
2488 /* Use register calling convention for local functions when possible. */
2489 if (!TARGET_64BIT && !user_convention && decl
2490 && flag_unit_at_a_time && !profile_flag)
2492 struct cgraph_local_info *i = cgraph_local_info (decl);
2495 int local_regparm, globals = 0, regno;
2497 /* Make sure no regparm register is taken by a global register
2499 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2500 if (global_regs[local_regparm])
2502 /* We can't use regparm(3) for nested functions as these use
2503 static chain pointer in third argument. */
2504 if (local_regparm == 3
2505 && decl_function_context (decl)
2506 && !DECL_NO_STATIC_CHAIN (decl))
2508 /* If the function realigns its stackpointer, the
2509 prologue will clobber %ecx. If we've already
2510 generated code for the callee, the callee
2511 DECL_STRUCT_FUNCTION is gone, so we fall back to
2512 scanning the attributes for the self-realigning
2514 if ((DECL_STRUCT_FUNCTION (decl)
2515 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2516 || (!DECL_STRUCT_FUNCTION (decl)
2517 && lookup_attribute (ix86_force_align_arg_pointer_string,
2518 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2520 /* Each global register variable increases register preassure,
2521 so the more global reg vars there are, the smaller regparm
2522 optimization use, unless requested by the user explicitly. */
2523 for (regno = 0; regno < 6; regno++)
2524 if (global_regs[regno])
2527 = globals < local_regparm ? local_regparm - globals : 0;
2529 if (local_regparm > regparm)
2530 regparm = local_regparm;
2537 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2538 in SSE registers for a function with the indicated TYPE and DECL.
2539 DECL may be NULL when calling function indirectly
2540 or considering a libcall. Otherwise return 0. */
2543 ix86_function_sseregparm (tree type, tree decl)
2545 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2546 by the sseregparm attribute. */
2547 if (TARGET_SSEREGPARM
2549 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2554 error ("Calling %qD with attribute sseregparm without "
2555 "SSE/SSE2 enabled", decl);
2557 error ("Calling %qT with attribute sseregparm without "
2558 "SSE/SSE2 enabled", type);
2565 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2566 in SSE registers even for 32-bit mode and not just 3, but up to
2567 8 SSE arguments in registers. */
2568 if (!TARGET_64BIT && decl
2569 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2571 struct cgraph_local_info *i = cgraph_local_info (decl);
2573 return TARGET_SSE2 ? 2 : 1;
2579 /* Return true if EAX is live at the start of the function. Used by
2580 ix86_expand_prologue to determine if we need special help before
2581 calling allocate_stack_worker. */
2584 ix86_eax_live_at_start_p (void)
2586 /* Cheat. Don't bother working forward from ix86_function_regparm
2587 to the function type to whether an actual argument is located in
2588 eax. Instead just look at cfg info, which is still close enough
2589 to correct at this point. This gives false positives for broken
2590 functions that might use uninitialized data that happens to be
2591 allocated in eax, but who cares? */
2592 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2595 /* Value is the number of bytes of arguments automatically
2596 popped when returning from a subroutine call.
2597 FUNDECL is the declaration node of the function (as a tree),
2598 FUNTYPE is the data type of the function (as a tree),
2599 or for a library call it is an identifier node for the subroutine name.
2600 SIZE is the number of bytes of arguments passed on the stack.
2602 On the 80386, the RTD insn may be used to pop them if the number
2603 of args is fixed, but if the number is variable then the caller
2604 must pop them all. RTD can't be used for library calls now
2605 because the library is compiled with the Unix compiler.
2606 Use of RTD is a selectable option, since it is incompatible with
2607 standard Unix calling sequences. If the option is not selected,
2608 the caller must always pop the args.
2610 The attribute stdcall is equivalent to RTD on a per module basis. */
2613 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2615 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2617 /* Cdecl functions override -mrtd, and never pop the stack. */
2618 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2620 /* Stdcall and fastcall functions will pop the stack if not
2622 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2623 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2627 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2628 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2629 == void_type_node)))
2633 /* Lose any fake structure return argument if it is passed on the stack. */
2634 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2636 && !KEEP_AGGREGATE_RETURN_POINTER)
2638 int nregs = ix86_function_regparm (funtype, fundecl);
2641 return GET_MODE_SIZE (Pmode);
2647 /* Argument support functions. */
2649 /* Return true when register may be used to pass function parameters. */
2651 ix86_function_arg_regno_p (int regno)
2655 return (regno < REGPARM_MAX
2656 || (TARGET_MMX && MMX_REGNO_P (regno)
2657 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2658 || (TARGET_SSE && SSE_REGNO_P (regno)
2659 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2661 if (TARGET_SSE && SSE_REGNO_P (regno)
2662 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2664 /* RAX is used as hidden argument to va_arg functions. */
2667 for (i = 0; i < REGPARM_MAX; i++)
2668 if (regno == x86_64_int_parameter_registers[i])
2673 /* Return if we do not know how to pass TYPE solely in registers. */
2676 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2678 if (must_pass_in_stack_var_size_or_pad (mode, type))
2681 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2682 The layout_type routine is crafty and tries to trick us into passing
2683 currently unsupported vector types on the stack by using TImode. */
2684 return (!TARGET_64BIT && mode == TImode
2685 && type && TREE_CODE (type) != VECTOR_TYPE);
2688 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2689 for a call to a function whose data type is FNTYPE.
2690 For a library call, FNTYPE is 0. */
2693 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2694 tree fntype, /* tree ptr for function decl */
2695 rtx libname, /* SYMBOL_REF of library name or 0 */
2698 static CUMULATIVE_ARGS zero_cum;
2699 tree param, next_param;
2701 if (TARGET_DEBUG_ARG)
2703 fprintf (stderr, "\ninit_cumulative_args (");
2705 fprintf (stderr, "fntype code = %s, ret code = %s",
2706 tree_code_name[(int) TREE_CODE (fntype)],
2707 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2709 fprintf (stderr, "no fntype");
2712 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2717 /* Set up the number of registers to use for passing arguments. */
2718 cum->nregs = ix86_regparm;
2720 cum->sse_nregs = SSE_REGPARM_MAX;
2722 cum->mmx_nregs = MMX_REGPARM_MAX;
2723 cum->warn_sse = true;
2724 cum->warn_mmx = true;
2725 cum->maybe_vaarg = false;
2727 /* Use ecx and edx registers if function has fastcall attribute,
2728 else look for regparm information. */
2729 if (fntype && !TARGET_64BIT)
2731 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2737 cum->nregs = ix86_function_regparm (fntype, fndecl);
2740 /* Set up the number of SSE registers used for passing SFmode
2741 and DFmode arguments. Warn for mismatching ABI. */
2742 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2744 /* Determine if this function has variable arguments. This is
2745 indicated by the last argument being 'void_type_mode' if there
2746 are no variable arguments. If there are variable arguments, then
2747 we won't pass anything in registers in 32-bit mode. */
2749 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2751 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2752 param != 0; param = next_param)
2754 next_param = TREE_CHAIN (param);
2755 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2765 cum->float_in_sse = 0;
2767 cum->maybe_vaarg = true;
2771 if ((!fntype && !libname)
2772 || (fntype && !TYPE_ARG_TYPES (fntype)))
2773 cum->maybe_vaarg = true;
2775 if (TARGET_DEBUG_ARG)
2776 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2781 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2782 But in the case of vector types, it is some vector mode.
2784 When we have only some of our vector isa extensions enabled, then there
2785 are some modes for which vector_mode_supported_p is false. For these
2786 modes, the generic vector support in gcc will choose some non-vector mode
2787 in order to implement the type. By computing the natural mode, we'll
2788 select the proper ABI location for the operand and not depend on whatever
2789 the middle-end decides to do with these vector types. */
2791 static enum machine_mode
2792 type_natural_mode (tree type)
2794 enum machine_mode mode = TYPE_MODE (type);
2796 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2798 HOST_WIDE_INT size = int_size_in_bytes (type);
2799 if ((size == 8 || size == 16)
2800 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2801 && TYPE_VECTOR_SUBPARTS (type) > 1)
2803 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2805 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2806 mode = MIN_MODE_VECTOR_FLOAT;
2808 mode = MIN_MODE_VECTOR_INT;
2810 /* Get the mode which has this inner mode and number of units. */
2811 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2812 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2813 && GET_MODE_INNER (mode) == innermode)
2823 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2824 this may not agree with the mode that the type system has chosen for the
2825 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2826 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2829 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2834 if (orig_mode != BLKmode)
2835 tmp = gen_rtx_REG (orig_mode, regno);
2838 tmp = gen_rtx_REG (mode, regno);
2839 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2840 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2846 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2847 of this code is to classify each 8bytes of incoming argument by the register
2848 class and assign registers accordingly. */
2850 /* Return the union class of CLASS1 and CLASS2.
2851 See the x86-64 PS ABI for details. */
2853 static enum x86_64_reg_class
2854 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2856 /* Rule #1: If both classes are equal, this is the resulting class. */
2857 if (class1 == class2)
2860 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2862 if (class1 == X86_64_NO_CLASS)
2864 if (class2 == X86_64_NO_CLASS)
2867 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2868 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2869 return X86_64_MEMORY_CLASS;
2871 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2872 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2873 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2874 return X86_64_INTEGERSI_CLASS;
2875 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2876 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2877 return X86_64_INTEGER_CLASS;
2879 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2881 if (class1 == X86_64_X87_CLASS
2882 || class1 == X86_64_X87UP_CLASS
2883 || class1 == X86_64_COMPLEX_X87_CLASS
2884 || class2 == X86_64_X87_CLASS
2885 || class2 == X86_64_X87UP_CLASS
2886 || class2 == X86_64_COMPLEX_X87_CLASS)
2887 return X86_64_MEMORY_CLASS;
2889 /* Rule #6: Otherwise class SSE is used. */
2890 return X86_64_SSE_CLASS;
2893 /* Classify the argument of type TYPE and mode MODE.
2894 CLASSES will be filled by the register class used to pass each word
2895 of the operand. The number of words is returned. In case the parameter
2896 should be passed in memory, 0 is returned. As a special case for zero
2897 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2899 BIT_OFFSET is used internally for handling records and specifies offset
2900 of the offset in bits modulo 256 to avoid overflow cases.
2902 See the x86-64 PS ABI for details.
2906 classify_argument (enum machine_mode mode, tree type,
2907 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2909 HOST_WIDE_INT bytes =
2910 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2911 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2913 /* Variable sized entities are always passed/returned in memory. */
2917 if (mode != VOIDmode
2918 && targetm.calls.must_pass_in_stack (mode, type))
2921 if (type && AGGREGATE_TYPE_P (type))
2925 enum x86_64_reg_class subclasses[MAX_CLASSES];
2927 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2931 for (i = 0; i < words; i++)
2932 classes[i] = X86_64_NO_CLASS;
2934 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2935 signalize memory class, so handle it as special case. */
2938 classes[0] = X86_64_NO_CLASS;
2942 /* Classify each field of record and merge classes. */
2943 switch (TREE_CODE (type))
2946 /* For classes first merge in the field of the subclasses. */
2947 if (TYPE_BINFO (type))
2949 tree binfo, base_binfo;
2952 for (binfo = TYPE_BINFO (type), basenum = 0;
2953 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2956 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2957 tree type = BINFO_TYPE (base_binfo);
2959 num = classify_argument (TYPE_MODE (type),
2961 (offset + bit_offset) % 256);
2964 for (i = 0; i < num; i++)
2966 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2968 merge_classes (subclasses[i], classes[i + pos]);
2972 /* And now merge the fields of structure. */
2973 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2975 if (TREE_CODE (field) == FIELD_DECL)
2979 if (TREE_TYPE (field) == error_mark_node)
2982 /* Bitfields are always classified as integer. Handle them
2983 early, since later code would consider them to be
2984 misaligned integers. */
2985 if (DECL_BIT_FIELD (field))
2987 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2988 i < ((int_bit_position (field) + (bit_offset % 64))
2989 + tree_low_cst (DECL_SIZE (field), 0)
2992 merge_classes (X86_64_INTEGER_CLASS,
2997 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2998 TREE_TYPE (field), subclasses,
2999 (int_bit_position (field)
3000 + bit_offset) % 256);
3003 for (i = 0; i < num; i++)
3006 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3008 merge_classes (subclasses[i], classes[i + pos]);
3016 /* Arrays are handled as small records. */
3019 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3020 TREE_TYPE (type), subclasses, bit_offset);
3024 /* The partial classes are now full classes. */
3025 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3026 subclasses[0] = X86_64_SSE_CLASS;
3027 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3028 subclasses[0] = X86_64_INTEGER_CLASS;
3030 for (i = 0; i < words; i++)
3031 classes[i] = subclasses[i % num];
3036 case QUAL_UNION_TYPE:
3037 /* Unions are similar to RECORD_TYPE but offset is always 0.
3040 /* Unions are not derived. */
3041 gcc_assert (!TYPE_BINFO (type)
3042 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3043 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3045 if (TREE_CODE (field) == FIELD_DECL)
3049 if (TREE_TYPE (field) == error_mark_node)
3052 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3053 TREE_TYPE (field), subclasses,
3057 for (i = 0; i < num; i++)
3058 classes[i] = merge_classes (subclasses[i], classes[i]);
3067 /* Final merger cleanup. */
3068 for (i = 0; i < words; i++)
3070 /* If one class is MEMORY, everything should be passed in
3072 if (classes[i] == X86_64_MEMORY_CLASS)
3075 /* The X86_64_SSEUP_CLASS should be always preceded by
3076 X86_64_SSE_CLASS. */
3077 if (classes[i] == X86_64_SSEUP_CLASS
3078 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3079 classes[i] = X86_64_SSE_CLASS;
3081 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3082 if (classes[i] == X86_64_X87UP_CLASS
3083 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3084 classes[i] = X86_64_SSE_CLASS;
3089 /* Compute alignment needed. We align all types to natural boundaries with
3090 exception of XFmode that is aligned to 64bits. */
3091 if (mode != VOIDmode && mode != BLKmode)
3093 int mode_alignment = GET_MODE_BITSIZE (mode);
3096 mode_alignment = 128;
3097 else if (mode == XCmode)
3098 mode_alignment = 256;
3099 if (COMPLEX_MODE_P (mode))
3100 mode_alignment /= 2;
3101 /* Misaligned fields are always returned in memory. */
3102 if (bit_offset % mode_alignment)
3106 /* for V1xx modes, just use the base mode */
3107 if (VECTOR_MODE_P (mode)
3108 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3109 mode = GET_MODE_INNER (mode);
3111 /* Classification of atomic types. */
3116 classes[0] = X86_64_SSE_CLASS;
3119 classes[0] = X86_64_SSE_CLASS;
3120 classes[1] = X86_64_SSEUP_CLASS;
3129 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3130 classes[0] = X86_64_INTEGERSI_CLASS;
3132 classes[0] = X86_64_INTEGER_CLASS;
3136 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3141 if (!(bit_offset % 64))
3142 classes[0] = X86_64_SSESF_CLASS;
3144 classes[0] = X86_64_SSE_CLASS;
3147 classes[0] = X86_64_SSEDF_CLASS;
3150 classes[0] = X86_64_X87_CLASS;
3151 classes[1] = X86_64_X87UP_CLASS;
3154 classes[0] = X86_64_SSE_CLASS;
3155 classes[1] = X86_64_SSEUP_CLASS;
3158 classes[0] = X86_64_SSE_CLASS;
3161 classes[0] = X86_64_SSEDF_CLASS;
3162 classes[1] = X86_64_SSEDF_CLASS;
3165 classes[0] = X86_64_COMPLEX_X87_CLASS;
3168 /* This modes is larger than 16 bytes. */
3176 classes[0] = X86_64_SSE_CLASS;
3177 classes[1] = X86_64_SSEUP_CLASS;
3183 classes[0] = X86_64_SSE_CLASS;
3189 gcc_assert (VECTOR_MODE_P (mode));
3194 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3196 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3197 classes[0] = X86_64_INTEGERSI_CLASS;
3199 classes[0] = X86_64_INTEGER_CLASS;
3200 classes[1] = X86_64_INTEGER_CLASS;
3201 return 1 + (bytes > 8);
3205 /* Examine the argument and return set number of register required in each
3206 class. Return 0 iff parameter should be passed in memory. */
3208 examine_argument (enum machine_mode mode, tree type, int in_return,
3209 int *int_nregs, int *sse_nregs)
3211 enum x86_64_reg_class class[MAX_CLASSES];
3212 int n = classify_argument (mode, type, class, 0);
3218 for (n--; n >= 0; n--)
3221 case X86_64_INTEGER_CLASS:
3222 case X86_64_INTEGERSI_CLASS:
3225 case X86_64_SSE_CLASS:
3226 case X86_64_SSESF_CLASS:
3227 case X86_64_SSEDF_CLASS:
3230 case X86_64_NO_CLASS:
3231 case X86_64_SSEUP_CLASS:
3233 case X86_64_X87_CLASS:
3234 case X86_64_X87UP_CLASS:
3238 case X86_64_COMPLEX_X87_CLASS:
3239 return in_return ? 2 : 0;
3240 case X86_64_MEMORY_CLASS:
3246 /* Construct container for the argument used by GCC interface. See
3247 FUNCTION_ARG for the detailed description. */
3250 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3251 tree type, int in_return, int nintregs, int nsseregs,
3252 const int *intreg, int sse_regno)
3254 /* The following variables hold the static issued_error state. */
3255 static bool issued_sse_arg_error;
3256 static bool issued_sse_ret_error;
3257 static bool issued_x87_ret_error;
3259 enum machine_mode tmpmode;
3261 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3262 enum x86_64_reg_class class[MAX_CLASSES];
3266 int needed_sseregs, needed_intregs;
3267 rtx exp[MAX_CLASSES];
3270 n = classify_argument (mode, type, class, 0);
3271 if (TARGET_DEBUG_ARG)
3274 fprintf (stderr, "Memory class\n");
3277 fprintf (stderr, "Classes:");
3278 for (i = 0; i < n; i++)
3280 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3282 fprintf (stderr, "\n");
3287 if (!examine_argument (mode, type, in_return, &needed_intregs,
3290 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3293 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3294 some less clueful developer tries to use floating-point anyway. */
3295 if (needed_sseregs && !TARGET_SSE)
3299 if (!issued_sse_ret_error)
3301 error ("SSE register return with SSE disabled");
3302 issued_sse_ret_error = true;
3305 else if (!issued_sse_arg_error)
3307 error ("SSE register argument with SSE disabled");
3308 issued_sse_arg_error = true;
3313 /* Likewise, error if the ABI requires us to return values in the
3314 x87 registers and the user specified -mno-80387. */
3315 if (!TARGET_80387 && in_return)
3316 for (i = 0; i < n; i++)
3317 if (class[i] == X86_64_X87_CLASS
3318 || class[i] == X86_64_X87UP_CLASS
3319 || class[i] == X86_64_COMPLEX_X87_CLASS)
3321 if (!issued_x87_ret_error)
3323 error ("x87 register return with x87 disabled");
3324 issued_x87_ret_error = true;
3329 /* First construct simple cases. Avoid SCmode, since we want to use
3330 single register to pass this type. */
3331 if (n == 1 && mode != SCmode)
3334 case X86_64_INTEGER_CLASS:
3335 case X86_64_INTEGERSI_CLASS:
3336 return gen_rtx_REG (mode, intreg[0]);
3337 case X86_64_SSE_CLASS:
3338 case X86_64_SSESF_CLASS:
3339 case X86_64_SSEDF_CLASS:
3340 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3341 case X86_64_X87_CLASS:
3342 case X86_64_COMPLEX_X87_CLASS:
3343 return gen_rtx_REG (mode, FIRST_STACK_REG);
3344 case X86_64_NO_CLASS:
3345 /* Zero sized array, struct or class. */
3350 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3352 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3354 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3355 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3356 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3357 && class[1] == X86_64_INTEGER_CLASS
3358 && (mode == CDImode || mode == TImode || mode == TFmode)
3359 && intreg[0] + 1 == intreg[1])
3360 return gen_rtx_REG (mode, intreg[0]);
3362 /* Otherwise figure out the entries of the PARALLEL. */
3363 for (i = 0; i < n; i++)
3367 case X86_64_NO_CLASS:
3369 case X86_64_INTEGER_CLASS:
3370 case X86_64_INTEGERSI_CLASS:
3371 /* Merge TImodes on aligned occasions here too. */
3372 if (i * 8 + 8 > bytes)
3373 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3374 else if (class[i] == X86_64_INTEGERSI_CLASS)
3378 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3379 if (tmpmode == BLKmode)
3381 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3382 gen_rtx_REG (tmpmode, *intreg),
3386 case X86_64_SSESF_CLASS:
3387 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3388 gen_rtx_REG (SFmode,
3389 SSE_REGNO (sse_regno)),
3393 case X86_64_SSEDF_CLASS:
3394 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3395 gen_rtx_REG (DFmode,
3396 SSE_REGNO (sse_regno)),
3400 case X86_64_SSE_CLASS:
3401 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3405 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3406 gen_rtx_REG (tmpmode,
3407 SSE_REGNO (sse_regno)),
3409 if (tmpmode == TImode)
3418 /* Empty aligned struct, union or class. */
3422 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3423 for (i = 0; i < nexps; i++)
3424 XVECEXP (ret, 0, i) = exp [i];
3428 /* Update the data in CUM to advance over an argument
3429 of mode MODE and data type TYPE.
3430 (TYPE is null for libcalls where that information may not be available.) */
3433 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3434 tree type, int named)
3437 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3438 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3441 mode = type_natural_mode (type);
3443 if (TARGET_DEBUG_ARG)
3444 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3445 "mode=%s, named=%d)\n\n",
3446 words, cum->words, cum->nregs, cum->sse_nregs,
3447 GET_MODE_NAME (mode), named);
3451 int int_nregs, sse_nregs;
3452 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3453 cum->words += words;
3454 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3456 cum->nregs -= int_nregs;
3457 cum->sse_nregs -= sse_nregs;
3458 cum->regno += int_nregs;
3459 cum->sse_regno += sse_nregs;
3462 cum->words += words;
3480 cum->words += words;
3481 cum->nregs -= words;
3482 cum->regno += words;
3484 if (cum->nregs <= 0)
3492 if (cum->float_in_sse < 2)
3495 if (cum->float_in_sse < 1)
3506 if (!type || !AGGREGATE_TYPE_P (type))
3508 cum->sse_words += words;
3509 cum->sse_nregs -= 1;
3510 cum->sse_regno += 1;
3511 if (cum->sse_nregs <= 0)
3523 if (!type || !AGGREGATE_TYPE_P (type))
3525 cum->mmx_words += words;
3526 cum->mmx_nregs -= 1;
3527 cum->mmx_regno += 1;
3528 if (cum->mmx_nregs <= 0)
3539 /* Define where to put the arguments to a function.
3540 Value is zero to push the argument on the stack,
3541 or a hard register in which to store the argument.
3543 MODE is the argument's machine mode.
3544 TYPE is the data type of the argument (as a tree).
3545 This is null for libcalls where that information may
3547 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3548 the preceding args and about the function being called.
3549 NAMED is nonzero if this argument is a named parameter
3550 (otherwise it is an extra parameter matching an ellipsis). */
3553 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3554 tree type, int named)
3556 enum machine_mode mode = orig_mode;
3559 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3560 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3561 static bool warnedsse, warnedmmx;
3563 /* To simplify the code below, represent vector types with a vector mode
3564 even if MMX/SSE are not active. */
3565 if (type && TREE_CODE (type) == VECTOR_TYPE)
3566 mode = type_natural_mode (type);
3568 /* Handle a hidden AL argument containing number of registers for varargs
3569 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3571 if (mode == VOIDmode)
3574 return GEN_INT (cum->maybe_vaarg
3575 ? (cum->sse_nregs < 0
3583 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3585 &x86_64_int_parameter_registers [cum->regno],
3590 /* For now, pass fp/complex values on the stack. */
3602 if (words <= cum->nregs)
3604 int regno = cum->regno;
3606 /* Fastcall allocates the first two DWORD (SImode) or
3607 smaller arguments to ECX and EDX. */
3610 if (mode == BLKmode || mode == DImode)
3613 /* ECX not EAX is the first allocated register. */
3617 ret = gen_rtx_REG (mode, regno);
3621 if (cum->float_in_sse < 2)
3624 if (cum->float_in_sse < 1)
3634 if (!type || !AGGREGATE_TYPE_P (type))
3636 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3639 warning (0, "SSE vector argument without SSE enabled "
3643 ret = gen_reg_or_parallel (mode, orig_mode,
3644 cum->sse_regno + FIRST_SSE_REG);
3651 if (!type || !AGGREGATE_TYPE_P (type))
3653 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3656 warning (0, "MMX vector argument without MMX enabled "
3660 ret = gen_reg_or_parallel (mode, orig_mode,
3661 cum->mmx_regno + FIRST_MMX_REG);
3666 if (TARGET_DEBUG_ARG)
3669 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3670 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3673 print_simple_rtl (stderr, ret);
3675 fprintf (stderr, ", stack");
3677 fprintf (stderr, " )\n");
3683 /* A C expression that indicates when an argument must be passed by
3684 reference. If nonzero for an argument, a copy of that argument is
3685 made in memory and a pointer to the argument is passed instead of
3686 the argument itself. The pointer is passed in whatever way is
3687 appropriate for passing a pointer to that type. */
3690 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3691 enum machine_mode mode ATTRIBUTE_UNUSED,
3692 tree type, bool named ATTRIBUTE_UNUSED)
3697 if (type && int_size_in_bytes (type) == -1)
3699 if (TARGET_DEBUG_ARG)
3700 fprintf (stderr, "function_arg_pass_by_reference\n");
3707 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3708 ABI. Only called if TARGET_SSE. */
3710 contains_128bit_aligned_vector_p (tree type)
3712 enum machine_mode mode = TYPE_MODE (type);
3713 if (SSE_REG_MODE_P (mode)
3714 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3716 if (TYPE_ALIGN (type) < 128)
3719 if (AGGREGATE_TYPE_P (type))
3721 /* Walk the aggregates recursively. */
3722 switch (TREE_CODE (type))
3726 case QUAL_UNION_TYPE:
3730 if (TYPE_BINFO (type))
3732 tree binfo, base_binfo;
3735 for (binfo = TYPE_BINFO (type), i = 0;
3736 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3737 if (contains_128bit_aligned_vector_p
3738 (BINFO_TYPE (base_binfo)))
3741 /* And now merge the fields of structure. */
3742 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3744 if (TREE_CODE (field) == FIELD_DECL
3745 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3752 /* Just for use if some languages passes arrays by value. */
3753 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3764 /* Gives the alignment boundary, in bits, of an argument with the
3765 specified mode and type. */
3768 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3772 align = TYPE_ALIGN (type);
3774 align = GET_MODE_ALIGNMENT (mode);
3775 if (align < PARM_BOUNDARY)
3776 align = PARM_BOUNDARY;
3779 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3780 make an exception for SSE modes since these require 128bit
3783 The handling here differs from field_alignment. ICC aligns MMX
3784 arguments to 4 byte boundaries, while structure fields are aligned
3785 to 8 byte boundaries. */
3787 align = PARM_BOUNDARY;
3790 if (!SSE_REG_MODE_P (mode))
3791 align = PARM_BOUNDARY;
3795 if (!contains_128bit_aligned_vector_p (type))
3796 align = PARM_BOUNDARY;
3804 /* Return true if N is a possible register number of function value. */
3806 ix86_function_value_regno_p (int regno)
3809 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3810 || (regno == FIRST_SSE_REG && TARGET_SSE))
3814 && (regno == FIRST_MMX_REG && TARGET_MMX))
3820 /* Define how to find the value returned by a function.
3821 VALTYPE is the data type of the value (as a tree).
3822 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3823 otherwise, FUNC is 0. */
3825 ix86_function_value (tree valtype, tree fntype_or_decl,
3826 bool outgoing ATTRIBUTE_UNUSED)
3828 enum machine_mode natmode = type_natural_mode (valtype);
3832 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3833 1, REGPARM_MAX, SSE_REGPARM_MAX,
3834 x86_64_int_return_registers, 0);
3835 /* For zero sized structures, construct_container return NULL, but we
3836 need to keep rest of compiler happy by returning meaningful value. */
3838 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3843 tree fn = NULL_TREE, fntype;
3845 && DECL_P (fntype_or_decl))
3846 fn = fntype_or_decl;
3847 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3848 return gen_rtx_REG (TYPE_MODE (valtype),
3849 ix86_value_regno (natmode, fn, fntype));
3853 /* Return true iff type is returned in memory. */
3855 ix86_return_in_memory (tree type)
3857 int needed_intregs, needed_sseregs, size;
3858 enum machine_mode mode = type_natural_mode (type);
3861 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3863 if (mode == BLKmode)
3866 size = int_size_in_bytes (type);
3868 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3871 if (VECTOR_MODE_P (mode) || mode == TImode)
3873 /* User-created vectors small enough to fit in EAX. */
3877 /* MMX/3dNow values are returned in MM0,
3878 except when it doesn't exits. */
3880 return (TARGET_MMX ? 0 : 1);
3882 /* SSE values are returned in XMM0, except when it doesn't exist. */
3884 return (TARGET_SSE ? 0 : 1);
3898 /* When returning SSE vector types, we have a choice of either
3899 (1) being abi incompatible with a -march switch, or
3900 (2) generating an error.
3901 Given no good solution, I think the safest thing is one warning.
3902 The user won't be able to use -Werror, but....
3904 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3905 called in response to actually generating a caller or callee that
3906 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3907 via aggregate_value_p for general type probing from tree-ssa. */
3910 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3912 static bool warnedsse, warnedmmx;
3916 /* Look at the return type of the function, not the function type. */
3917 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3919 if (!TARGET_SSE && !warnedsse)
3922 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3925 warning (0, "SSE vector return without SSE enabled "
3930 if (!TARGET_MMX && !warnedmmx)
3932 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3935 warning (0, "MMX vector return without MMX enabled "
3944 /* Define how to find the value returned by a library function
3945 assuming the value has mode MODE. */
3947 ix86_libcall_value (enum machine_mode mode)
3961 return gen_rtx_REG (mode, FIRST_SSE_REG);
3964 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3968 return gen_rtx_REG (mode, 0);
3972 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3975 /* Given a mode, return the register to use for a return value. */
3978 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3980 gcc_assert (!TARGET_64BIT);
3982 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3983 we normally prevent this case when mmx is not available. However
3984 some ABIs may require the result to be returned like DImode. */
3985 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3986 return TARGET_MMX ? FIRST_MMX_REG : 0;
3988 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3989 we prevent this case when sse is not available. However some ABIs
3990 may require the result to be returned like integer TImode. */
3991 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3992 return TARGET_SSE ? FIRST_SSE_REG : 0;
3994 /* Decimal floating point values can go in %eax, unlike other float modes. */
3995 if (DECIMAL_FLOAT_MODE_P (mode))
3998 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3999 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4002 /* Floating point return values in %st(0), except for local functions when
4003 SSE math is enabled or for functions with sseregparm attribute. */
4004 if ((func || fntype)
4005 && (mode == SFmode || mode == DFmode))
4007 int sse_level = ix86_function_sseregparm (fntype, func);
4008 if ((sse_level >= 1 && mode == SFmode)
4009 || (sse_level == 2 && mode == DFmode))
4010 return FIRST_SSE_REG;
4013 return FIRST_FLOAT_REG;
4016 /* Create the va_list data type. */
4019 ix86_build_builtin_va_list (void)
4021 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4023 /* For i386 we use plain pointer to argument area. */
4025 return build_pointer_type (char_type_node);
4027 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4028 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4030 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4031 unsigned_type_node);
4032 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4033 unsigned_type_node);
4034 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4036 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4039 va_list_gpr_counter_field = f_gpr;
4040 va_list_fpr_counter_field = f_fpr;
4042 DECL_FIELD_CONTEXT (f_gpr) = record;
4043 DECL_FIELD_CONTEXT (f_fpr) = record;
4044 DECL_FIELD_CONTEXT (f_ovf) = record;
4045 DECL_FIELD_CONTEXT (f_sav) = record;
4047 TREE_CHAIN (record) = type_decl;
4048 TYPE_NAME (record) = type_decl;
4049 TYPE_FIELDS (record) = f_gpr;
4050 TREE_CHAIN (f_gpr) = f_fpr;
4051 TREE_CHAIN (f_fpr) = f_ovf;
4052 TREE_CHAIN (f_ovf) = f_sav;
4054 layout_type (record);
4056 /* The correct type is an array type of one element. */
4057 return build_array_type (record, build_index_type (size_zero_node));
4060 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4063 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4064 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4067 CUMULATIVE_ARGS next_cum;
4068 rtx save_area = NULL_RTX, mem;
4081 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4084 /* Indicate to allocate space on the stack for varargs save area. */
4085 ix86_save_varrargs_registers = 1;
4087 cfun->stack_alignment_needed = 128;
4089 fntype = TREE_TYPE (current_function_decl);
4090 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4091 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4092 != void_type_node));
4094 /* For varargs, we do not want to skip the dummy va_dcl argument.
4095 For stdargs, we do want to skip the last named argument. */
4098 function_arg_advance (&next_cum, mode, type, 1);
4101 save_area = frame_pointer_rtx;
4103 set = get_varargs_alias_set ();
4105 for (i = next_cum.regno;
4107 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4110 mem = gen_rtx_MEM (Pmode,
4111 plus_constant (save_area, i * UNITS_PER_WORD));
4112 MEM_NOTRAP_P (mem) = 1;
4113 set_mem_alias_set (mem, set);
4114 emit_move_insn (mem, gen_rtx_REG (Pmode,
4115 x86_64_int_parameter_registers[i]));
4118 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4120 /* Now emit code to save SSE registers. The AX parameter contains number
4121 of SSE parameter registers used to call this function. We use
4122 sse_prologue_save insn template that produces computed jump across
4123 SSE saves. We need some preparation work to get this working. */
4125 label = gen_label_rtx ();
4126 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4128 /* Compute address to jump to :
4129 label - 5*eax + nnamed_sse_arguments*5 */
4130 tmp_reg = gen_reg_rtx (Pmode);
4131 nsse_reg = gen_reg_rtx (Pmode);
4132 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4133 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4134 gen_rtx_MULT (Pmode, nsse_reg,
4136 if (next_cum.sse_regno)
4139 gen_rtx_CONST (DImode,
4140 gen_rtx_PLUS (DImode,
4142 GEN_INT (next_cum.sse_regno * 4))));
4144 emit_move_insn (nsse_reg, label_ref);
4145 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4147 /* Compute address of memory block we save into. We always use pointer
4148 pointing 127 bytes after first byte to store - this is needed to keep
4149 instruction size limited by 4 bytes. */
4150 tmp_reg = gen_reg_rtx (Pmode);
4151 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4152 plus_constant (save_area,
4153 8 * REGPARM_MAX + 127)));
4154 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4155 MEM_NOTRAP_P (mem) = 1;
4156 set_mem_alias_set (mem, set);
4157 set_mem_align (mem, BITS_PER_WORD);
4159 /* And finally do the dirty job! */
4160 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4161 GEN_INT (next_cum.sse_regno), label));
4166 /* Implement va_start. */
4169 ix86_va_start (tree valist, rtx nextarg)
4171 HOST_WIDE_INT words, n_gpr, n_fpr;
4172 tree f_gpr, f_fpr, f_ovf, f_sav;
4173 tree gpr, fpr, ovf, sav, t;
4176 /* Only 64bit target needs something special. */
4179 std_expand_builtin_va_start (valist, nextarg);
4183 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4184 f_fpr = TREE_CHAIN (f_gpr);
4185 f_ovf = TREE_CHAIN (f_fpr);
4186 f_sav = TREE_CHAIN (f_ovf);
4188 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4189 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4190 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4191 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4192 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4194 /* Count number of gp and fp argument registers used. */
4195 words = current_function_args_info.words;
4196 n_gpr = current_function_args_info.regno;
4197 n_fpr = current_function_args_info.sse_regno;
4199 if (TARGET_DEBUG_ARG)
4200 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4201 (int) words, (int) n_gpr, (int) n_fpr);
4203 if (cfun->va_list_gpr_size)
4205 type = TREE_TYPE (gpr);
4206 t = build2 (MODIFY_EXPR, type, gpr,
4207 build_int_cst (type, n_gpr * 8));
4208 TREE_SIDE_EFFECTS (t) = 1;
4209 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4212 if (cfun->va_list_fpr_size)
4214 type = TREE_TYPE (fpr);
4215 t = build2 (MODIFY_EXPR, type, fpr,
4216 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4217 TREE_SIDE_EFFECTS (t) = 1;
4218 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4221 /* Find the overflow area. */
4222 type = TREE_TYPE (ovf);
4223 t = make_tree (type, virtual_incoming_args_rtx);
4225 t = build2 (PLUS_EXPR, type, t,
4226 build_int_cst (type, words * UNITS_PER_WORD));
4227 t = build2 (MODIFY_EXPR, type, ovf, t);
4228 TREE_SIDE_EFFECTS (t) = 1;
4229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4231 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4233 /* Find the register save area.
4234 Prologue of the function save it right above stack frame. */
4235 type = TREE_TYPE (sav);
4236 t = make_tree (type, frame_pointer_rtx);
4237 t = build2 (MODIFY_EXPR, type, sav, t);
4238 TREE_SIDE_EFFECTS (t) = 1;
4239 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4243 /* Implement va_arg. */
4246 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4248 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4249 tree f_gpr, f_fpr, f_ovf, f_sav;
4250 tree gpr, fpr, ovf, sav, t;
4252 tree lab_false, lab_over = NULL_TREE;
4257 enum machine_mode nat_mode;
4259 /* Only 64bit target needs something special. */
4261 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4263 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4264 f_fpr = TREE_CHAIN (f_gpr);
4265 f_ovf = TREE_CHAIN (f_fpr);
4266 f_sav = TREE_CHAIN (f_ovf);
4268 valist = build_va_arg_indirect_ref (valist);
4269 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4270 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4271 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4272 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4274 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4276 type = build_pointer_type (type);
4277 size = int_size_in_bytes (type);
4278 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4280 nat_mode = type_natural_mode (type);
4281 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4282 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4284 /* Pull the value out of the saved registers. */
4286 addr = create_tmp_var (ptr_type_node, "addr");
4287 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4291 int needed_intregs, needed_sseregs;
4293 tree int_addr, sse_addr;
4295 lab_false = create_artificial_label ();
4296 lab_over = create_artificial_label ();
4298 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4300 need_temp = (!REG_P (container)
4301 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4302 || TYPE_ALIGN (type) > 128));
4304 /* In case we are passing structure, verify that it is consecutive block
4305 on the register save area. If not we need to do moves. */
4306 if (!need_temp && !REG_P (container))
4308 /* Verify that all registers are strictly consecutive */
4309 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4313 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4315 rtx slot = XVECEXP (container, 0, i);
4316 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4317 || INTVAL (XEXP (slot, 1)) != i * 16)
4325 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4327 rtx slot = XVECEXP (container, 0, i);
4328 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4329 || INTVAL (XEXP (slot, 1)) != i * 8)
4341 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4342 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4343 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4344 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4347 /* First ensure that we fit completely in registers. */
4350 t = build_int_cst (TREE_TYPE (gpr),
4351 (REGPARM_MAX - needed_intregs + 1) * 8);
4352 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4353 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4354 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4355 gimplify_and_add (t, pre_p);
4359 t = build_int_cst (TREE_TYPE (fpr),
4360 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4362 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4363 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4364 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4365 gimplify_and_add (t, pre_p);
4368 /* Compute index to start of area used for integer regs. */
4371 /* int_addr = gpr + sav; */
4372 t = fold_convert (ptr_type_node, gpr);
4373 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4374 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4375 gimplify_and_add (t, pre_p);
4379 /* sse_addr = fpr + sav; */
4380 t = fold_convert (ptr_type_node, fpr);
4381 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4382 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4383 gimplify_and_add (t, pre_p);
4388 tree temp = create_tmp_var (type, "va_arg_tmp");
4391 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4392 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4393 gimplify_and_add (t, pre_p);
4395 for (i = 0; i < XVECLEN (container, 0); i++)
4397 rtx slot = XVECEXP (container, 0, i);
4398 rtx reg = XEXP (slot, 0);
4399 enum machine_mode mode = GET_MODE (reg);
4400 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4401 tree addr_type = build_pointer_type (piece_type);
4404 tree dest_addr, dest;
4406 if (SSE_REGNO_P (REGNO (reg)))
4408 src_addr = sse_addr;
4409 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4413 src_addr = int_addr;
4414 src_offset = REGNO (reg) * 8;
4416 src_addr = fold_convert (addr_type, src_addr);
4417 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4418 size_int (src_offset)));
4419 src = build_va_arg_indirect_ref (src_addr);
4421 dest_addr = fold_convert (addr_type, addr);
4422 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4423 size_int (INTVAL (XEXP (slot, 1)))));
4424 dest = build_va_arg_indirect_ref (dest_addr);
4426 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4427 gimplify_and_add (t, pre_p);
4433 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4434 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4435 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4436 gimplify_and_add (t, pre_p);
4440 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4441 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4442 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4443 gimplify_and_add (t, pre_p);
4446 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4447 gimplify_and_add (t, pre_p);
4449 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4450 append_to_statement_list (t, pre_p);
4453 /* ... otherwise out of the overflow area. */
4455 /* Care for on-stack alignment if needed. */
4456 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4457 || integer_zerop (TYPE_SIZE (type)))
4461 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4462 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4463 build_int_cst (TREE_TYPE (ovf), align - 1));
4464 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4465 build_int_cst (TREE_TYPE (t), -align));
4467 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4469 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4470 gimplify_and_add (t2, pre_p);
4472 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4473 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4474 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4475 gimplify_and_add (t, pre_p);
4479 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4480 append_to_statement_list (t, pre_p);
4483 ptrtype = build_pointer_type (type);
4484 addr = fold_convert (ptrtype, addr);
4487 addr = build_va_arg_indirect_ref (addr);
4488 return build_va_arg_indirect_ref (addr);
4491 /* Return nonzero if OPNUM's MEM should be matched
4492 in movabs* patterns. */
4495 ix86_check_movabs (rtx insn, int opnum)
4499 set = PATTERN (insn);
4500 if (GET_CODE (set) == PARALLEL)
4501 set = XVECEXP (set, 0, 0);
4502 gcc_assert (GET_CODE (set) == SET);
4503 mem = XEXP (set, opnum);
4504 while (GET_CODE (mem) == SUBREG)
4505 mem = SUBREG_REG (mem);
4506 gcc_assert (GET_CODE (mem) == MEM);
4507 return (volatile_ok || !MEM_VOLATILE_P (mem));
4510 /* Initialize the table of extra 80387 mathematical constants. */
4513 init_ext_80387_constants (void)
4515 static const char * cst[5] =
4517 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4518 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4519 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4520 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4521 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4525 for (i = 0; i < 5; i++)
4527 real_from_string (&ext_80387_constants_table[i], cst[i]);
4528 /* Ensure each constant is rounded to XFmode precision. */
4529 real_convert (&ext_80387_constants_table[i],
4530 XFmode, &ext_80387_constants_table[i]);
4533 ext_80387_constants_init = 1;
4536 /* Return true if the constant is something that can be loaded with
4537 a special instruction. */
4540 standard_80387_constant_p (rtx x)
4542 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4545 if (x == CONST0_RTX (GET_MODE (x)))
4547 if (x == CONST1_RTX (GET_MODE (x)))
4550 /* For XFmode constants, try to find a special 80387 instruction when
4551 optimizing for size or on those CPUs that benefit from them. */
4552 if (GET_MODE (x) == XFmode
4553 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4558 if (! ext_80387_constants_init)
4559 init_ext_80387_constants ();
4561 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4562 for (i = 0; i < 5; i++)
4563 if (real_identical (&r, &ext_80387_constants_table[i]))
4570 /* Return the opcode of the special instruction to be used to load
4574 standard_80387_constant_opcode (rtx x)
4576 switch (standard_80387_constant_p (x))
4597 /* Return the CONST_DOUBLE representing the 80387 constant that is
4598 loaded by the specified special instruction. The argument IDX
4599 matches the return value from standard_80387_constant_p. */
4602 standard_80387_constant_rtx (int idx)
4606 if (! ext_80387_constants_init)
4607 init_ext_80387_constants ();
4623 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4627 /* Return 1 if mode is a valid mode for sse. */
4629 standard_sse_mode_p (enum machine_mode mode)
4646 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4649 standard_sse_constant_p (rtx x)
4651 enum machine_mode mode = GET_MODE (x);
4653 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4655 if (vector_all_ones_operand (x, mode)
4656 && standard_sse_mode_p (mode))
4657 return TARGET_SSE2 ? 2 : -1;
4662 /* Return the opcode of the special instruction to be used to load
4666 standard_sse_constant_opcode (rtx insn, rtx x)
4668 switch (standard_sse_constant_p (x))
4671 if (get_attr_mode (insn) == MODE_V4SF)
4672 return "xorps\t%0, %0";
4673 else if (get_attr_mode (insn) == MODE_V2DF)
4674 return "xorpd\t%0, %0";
4676 return "pxor\t%0, %0";
4678 return "pcmpeqd\t%0, %0";
4683 /* Returns 1 if OP contains a symbol reference */
4686 symbolic_reference_mentioned_p (rtx op)
4691 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4694 fmt = GET_RTX_FORMAT (GET_CODE (op));
4695 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4701 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4702 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4706 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4713 /* Return 1 if it is appropriate to emit `ret' instructions in the
4714 body of a function. Do this only if the epilogue is simple, needing a
4715 couple of insns. Prior to reloading, we can't tell how many registers
4716 must be saved, so return 0 then. Return 0 if there is no frame
4717 marker to de-allocate. */
4720 ix86_can_use_return_insn_p (void)
4722 struct ix86_frame frame;
4724 if (! reload_completed || frame_pointer_needed)
4727 /* Don't allow more than 32 pop, since that's all we can do
4728 with one instruction. */
4729 if (current_function_pops_args
4730 && current_function_args_size >= 32768)
4733 ix86_compute_frame_layout (&frame);
4734 return frame.to_allocate == 0 && frame.nregs == 0;
4737 /* Value should be nonzero if functions must have frame pointers.
4738 Zero means the frame pointer need not be set up (and parms may
4739 be accessed via the stack pointer) in functions that seem suitable. */
4742 ix86_frame_pointer_required (void)
4744 /* If we accessed previous frames, then the generated code expects
4745 to be able to access the saved ebp value in our frame. */
4746 if (cfun->machine->accesses_prev_frame)
4749 /* Several x86 os'es need a frame pointer for other reasons,
4750 usually pertaining to setjmp. */
4751 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4754 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4755 the frame pointer by default. Turn it back on now if we've not
4756 got a leaf function. */
4757 if (TARGET_OMIT_LEAF_FRAME_POINTER
4758 && (!current_function_is_leaf
4759 || ix86_current_function_calls_tls_descriptor))
4762 if (current_function_profile)
4768 /* Record that the current function accesses previous call frames. */
4771 ix86_setup_frame_addresses (void)
4773 cfun->machine->accesses_prev_frame = 1;
4776 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4777 # define USE_HIDDEN_LINKONCE 1
4779 # define USE_HIDDEN_LINKONCE 0
4782 static int pic_labels_used;
4784 /* Fills in the label name that should be used for a pc thunk for
4785 the given register. */
4788 get_pc_thunk_name (char name[32], unsigned int regno)
4790 if (USE_HIDDEN_LINKONCE)
4791 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4793 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4797 /* This function generates code for -fpic that loads %ebx with
4798 the return address of the caller and then returns. */
4801 ix86_file_end (void)
4806 for (regno = 0; regno < 8; ++regno)
4810 if (! ((pic_labels_used >> regno) & 1))
4813 get_pc_thunk_name (name, regno);
4818 switch_to_section (darwin_sections[text_coal_section]);
4819 fputs ("\t.weak_definition\t", asm_out_file);
4820 assemble_name (asm_out_file, name);
4821 fputs ("\n\t.private_extern\t", asm_out_file);
4822 assemble_name (asm_out_file, name);
4823 fputs ("\n", asm_out_file);
4824 ASM_OUTPUT_LABEL (asm_out_file, name);
4828 if (USE_HIDDEN_LINKONCE)
4832 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4834 TREE_PUBLIC (decl) = 1;
4835 TREE_STATIC (decl) = 1;
4836 DECL_ONE_ONLY (decl) = 1;
4838 (*targetm.asm_out.unique_section) (decl, 0);
4839 switch_to_section (get_named_section (decl, NULL, 0));
4841 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4842 fputs ("\t.hidden\t", asm_out_file);
4843 assemble_name (asm_out_file, name);
4844 fputc ('\n', asm_out_file);
4845 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4849 switch_to_section (text_section);
4850 ASM_OUTPUT_LABEL (asm_out_file, name);
4853 xops[0] = gen_rtx_REG (SImode, regno);
4854 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4855 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4856 output_asm_insn ("ret", xops);
4859 if (NEED_INDICATE_EXEC_STACK)
4860 file_end_indicate_exec_stack ();
4863 /* Emit code for the SET_GOT patterns. */
4866 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4871 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4873 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4875 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4878 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4880 output_asm_insn ("call\t%a2", xops);
4883 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4884 is what will be referenced by the Mach-O PIC subsystem. */
4886 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4889 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4890 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4893 output_asm_insn ("pop{l}\t%0", xops);
4898 get_pc_thunk_name (name, REGNO (dest));
4899 pic_labels_used |= 1 << REGNO (dest);
4901 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4902 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4903 output_asm_insn ("call\t%X2", xops);
4904 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4905 is what will be referenced by the Mach-O PIC subsystem. */
4908 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4910 targetm.asm_out.internal_label (asm_out_file, "L",
4911 CODE_LABEL_NUMBER (label));
4918 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4919 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4921 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4926 /* Generate an "push" pattern for input ARG. */
4931 return gen_rtx_SET (VOIDmode,
4933 gen_rtx_PRE_DEC (Pmode,
4934 stack_pointer_rtx)),
4938 /* Return >= 0 if there is an unused call-clobbered register available
4939 for the entire function. */
4942 ix86_select_alt_pic_regnum (void)
4944 if (current_function_is_leaf && !current_function_profile
4945 && !ix86_current_function_calls_tls_descriptor)
4948 for (i = 2; i >= 0; --i)
4949 if (!regs_ever_live[i])
4953 return INVALID_REGNUM;
4956 /* Return 1 if we need to save REGNO. */
4958 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4960 if (pic_offset_table_rtx
4961 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4962 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4963 || current_function_profile
4964 || current_function_calls_eh_return
4965 || current_function_uses_const_pool))
4967 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4972 if (current_function_calls_eh_return && maybe_eh_return)
4977 unsigned test = EH_RETURN_DATA_REGNO (i);
4978 if (test == INVALID_REGNUM)
4985 if (cfun->machine->force_align_arg_pointer
4986 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4989 return (regs_ever_live[regno]
4990 && !call_used_regs[regno]
4991 && !fixed_regs[regno]
4992 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4995 /* Return number of registers to be saved on the stack. */
4998 ix86_nsaved_regs (void)
5003 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5004 if (ix86_save_reg (regno, true))
5009 /* Return the offset between two registers, one to be eliminated, and the other
5010 its replacement, at the start of a routine. */
5013 ix86_initial_elimination_offset (int from, int to)
5015 struct ix86_frame frame;
5016 ix86_compute_frame_layout (&frame);
5018 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5019 return frame.hard_frame_pointer_offset;
5020 else if (from == FRAME_POINTER_REGNUM
5021 && to == HARD_FRAME_POINTER_REGNUM)
5022 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5025 gcc_assert (to == STACK_POINTER_REGNUM);
5027 if (from == ARG_POINTER_REGNUM)
5028 return frame.stack_pointer_offset;
5030 gcc_assert (from == FRAME_POINTER_REGNUM);
5031 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5035 /* Fill structure ix86_frame about frame of currently computed function. */
5038 ix86_compute_frame_layout (struct ix86_frame *frame)
5040 HOST_WIDE_INT total_size;
5041 unsigned int stack_alignment_needed;
5042 HOST_WIDE_INT offset;
5043 unsigned int preferred_alignment;
5044 HOST_WIDE_INT size = get_frame_size ();
5046 frame->nregs = ix86_nsaved_regs ();
5049 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5050 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5052 /* During reload iteration the amount of registers saved can change.
5053 Recompute the value as needed. Do not recompute when amount of registers
5054 didn't change as reload does multiple calls to the function and does not
5055 expect the decision to change within single iteration. */
5057 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5059 int count = frame->nregs;
5061 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5062 /* The fast prologue uses move instead of push to save registers. This
5063 is significantly longer, but also executes faster as modern hardware
5064 can execute the moves in parallel, but can't do that for push/pop.
5066 Be careful about choosing what prologue to emit: When function takes
5067 many instructions to execute we may use slow version as well as in
5068 case function is known to be outside hot spot (this is known with
5069 feedback only). Weight the size of function by number of registers
5070 to save as it is cheap to use one or two push instructions but very
5071 slow to use many of them. */
5073 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5074 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5075 || (flag_branch_probabilities
5076 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5077 cfun->machine->use_fast_prologue_epilogue = false;
5079 cfun->machine->use_fast_prologue_epilogue
5080 = !expensive_function_p (count);
5082 if (TARGET_PROLOGUE_USING_MOVE
5083 && cfun->machine->use_fast_prologue_epilogue)
5084 frame->save_regs_using_mov = true;
5086 frame->save_regs_using_mov = false;
5089 /* Skip return address and saved base pointer. */
5090 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5092 frame->hard_frame_pointer_offset = offset;
5094 /* Do some sanity checking of stack_alignment_needed and
5095 preferred_alignment, since i386 port is the only using those features
5096 that may break easily. */
5098 gcc_assert (!size || stack_alignment_needed);
5099 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5100 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5101 gcc_assert (stack_alignment_needed
5102 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5104 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5105 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5107 /* Register save area */
5108 offset += frame->nregs * UNITS_PER_WORD;
5111 if (ix86_save_varrargs_registers)
5113 offset += X86_64_VARARGS_SIZE;
5114 frame->va_arg_size = X86_64_VARARGS_SIZE;
5117 frame->va_arg_size = 0;
5119 /* Align start of frame for local function. */
5120 frame->padding1 = ((offset + stack_alignment_needed - 1)
5121 & -stack_alignment_needed) - offset;
5123 offset += frame->padding1;
5125 /* Frame pointer points here. */
5126 frame->frame_pointer_offset = offset;
5130 /* Add outgoing arguments area. Can be skipped if we eliminated
5131 all the function calls as dead code.
5132 Skipping is however impossible when function calls alloca. Alloca
5133 expander assumes that last current_function_outgoing_args_size
5134 of stack frame are unused. */
5135 if (ACCUMULATE_OUTGOING_ARGS
5136 && (!current_function_is_leaf || current_function_calls_alloca
5137 || ix86_current_function_calls_tls_descriptor))
5139 offset += current_function_outgoing_args_size;
5140 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5143 frame->outgoing_arguments_size = 0;
5145 /* Align stack boundary. Only needed if we're calling another function
5147 if (!current_function_is_leaf || current_function_calls_alloca
5148 || ix86_current_function_calls_tls_descriptor)
5149 frame->padding2 = ((offset + preferred_alignment - 1)
5150 & -preferred_alignment) - offset;
5152 frame->padding2 = 0;
5154 offset += frame->padding2;
5156 /* We've reached end of stack frame. */
5157 frame->stack_pointer_offset = offset;
5159 /* Size prologue needs to allocate. */
5160 frame->to_allocate =
5161 (size + frame->padding1 + frame->padding2
5162 + frame->outgoing_arguments_size + frame->va_arg_size);
5164 if ((!frame->to_allocate && frame->nregs <= 1)
5165 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5166 frame->save_regs_using_mov = false;
5168 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5169 && current_function_is_leaf
5170 && !ix86_current_function_calls_tls_descriptor)
5172 frame->red_zone_size = frame->to_allocate;
5173 if (frame->save_regs_using_mov)
5174 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5175 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5176 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5179 frame->red_zone_size = 0;
5180 frame->to_allocate -= frame->red_zone_size;
5181 frame->stack_pointer_offset -= frame->red_zone_size;
5183 fprintf (stderr, "nregs: %i\n", frame->nregs);
5184 fprintf (stderr, "size: %i\n", size);
5185 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5186 fprintf (stderr, "padding1: %i\n", frame->padding1);
5187 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5188 fprintf (stderr, "padding2: %i\n", frame->padding2);
5189 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5190 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5191 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5192 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5193 frame->hard_frame_pointer_offset);
5194 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5198 /* Emit code to save registers in the prologue. */
5201 ix86_emit_save_regs (void)
5206 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5207 if (ix86_save_reg (regno, true))
5209 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5210 RTX_FRAME_RELATED_P (insn) = 1;
5214 /* Emit code to save registers using MOV insns. First register
5215 is restored from POINTER + OFFSET. */
5217 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5222 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5223 if (ix86_save_reg (regno, true))
5225 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5227 gen_rtx_REG (Pmode, regno));
5228 RTX_FRAME_RELATED_P (insn) = 1;
5229 offset += UNITS_PER_WORD;
5233 /* Expand prologue or epilogue stack adjustment.
5234 The pattern exist to put a dependency on all ebp-based memory accesses.
5235 STYLE should be negative if instructions should be marked as frame related,
5236 zero if %r11 register is live and cannot be freely used and positive
5240 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5245 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5246 else if (x86_64_immediate_operand (offset, DImode))
5247 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5251 /* r11 is used by indirect sibcall return as well, set before the
5252 epilogue and used after the epilogue. ATM indirect sibcall
5253 shouldn't be used together with huge frame sizes in one
5254 function because of the frame_size check in sibcall.c. */
5256 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5257 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5259 RTX_FRAME_RELATED_P (insn) = 1;
5260 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5264 RTX_FRAME_RELATED_P (insn) = 1;
5267 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5270 ix86_internal_arg_pointer (void)
5272 bool has_force_align_arg_pointer =
5273 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5274 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5275 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5276 && DECL_NAME (current_function_decl)
5277 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5278 && DECL_FILE_SCOPE_P (current_function_decl))
5279 || ix86_force_align_arg_pointer
5280 || has_force_align_arg_pointer)
5282 /* Nested functions can't realign the stack due to a register
5284 if (DECL_CONTEXT (current_function_decl)
5285 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5287 if (ix86_force_align_arg_pointer)
5288 warning (0, "-mstackrealign ignored for nested functions");
5289 if (has_force_align_arg_pointer)
5290 error ("%s not supported for nested functions",
5291 ix86_force_align_arg_pointer_string);
5292 return virtual_incoming_args_rtx;
5294 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5295 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5298 return virtual_incoming_args_rtx;
5301 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5302 This is called from dwarf2out.c to emit call frame instructions
5303 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5305 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5307 rtx unspec = SET_SRC (pattern);
5308 gcc_assert (GET_CODE (unspec) == UNSPEC);
5312 case UNSPEC_REG_SAVE:
5313 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5314 SET_DEST (pattern));
5316 case UNSPEC_DEF_CFA:
5317 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5318 INTVAL (XVECEXP (unspec, 0, 0)));
5325 /* Expand the prologue into a bunch of separate insns. */
5328 ix86_expand_prologue (void)
5332 struct ix86_frame frame;
5333 HOST_WIDE_INT allocate;
5335 ix86_compute_frame_layout (&frame);
5337 if (cfun->machine->force_align_arg_pointer)
5341 /* Grab the argument pointer. */
5342 x = plus_constant (stack_pointer_rtx, 4);
5343 y = cfun->machine->force_align_arg_pointer;
5344 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5345 RTX_FRAME_RELATED_P (insn) = 1;
5347 /* The unwind info consists of two parts: install the fafp as the cfa,
5348 and record the fafp as the "save register" of the stack pointer.
5349 The later is there in order that the unwinder can see where it
5350 should restore the stack pointer across the and insn. */
5351 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5352 x = gen_rtx_SET (VOIDmode, y, x);
5353 RTX_FRAME_RELATED_P (x) = 1;
5354 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5356 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5357 RTX_FRAME_RELATED_P (y) = 1;
5358 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5359 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5360 REG_NOTES (insn) = x;
5362 /* Align the stack. */
5363 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5366 /* And here we cheat like madmen with the unwind info. We force the
5367 cfa register back to sp+4, which is exactly what it was at the
5368 start of the function. Re-pushing the return address results in
5369 the return at the same spot relative to the cfa, and thus is
5370 correct wrt the unwind info. */
5371 x = cfun->machine->force_align_arg_pointer;
5372 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5373 insn = emit_insn (gen_push (x));
5374 RTX_FRAME_RELATED_P (insn) = 1;
5377 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5378 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5379 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5380 REG_NOTES (insn) = x;
5383 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5384 slower on all targets. Also sdb doesn't like it. */
5386 if (frame_pointer_needed)
5388 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5389 RTX_FRAME_RELATED_P (insn) = 1;
5391 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5392 RTX_FRAME_RELATED_P (insn) = 1;
5395 allocate = frame.to_allocate;
5397 if (!frame.save_regs_using_mov)
5398 ix86_emit_save_regs ();
5400 allocate += frame.nregs * UNITS_PER_WORD;
5402 /* When using red zone we may start register saving before allocating
5403 the stack frame saving one cycle of the prologue. */
5404 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5405 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5406 : stack_pointer_rtx,
5407 -frame.nregs * UNITS_PER_WORD);
5411 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5412 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5413 GEN_INT (-allocate), -1);
5416 /* Only valid for Win32. */
5417 rtx eax = gen_rtx_REG (SImode, 0);
5418 bool eax_live = ix86_eax_live_at_start_p ();
5421 gcc_assert (!TARGET_64BIT);
5425 emit_insn (gen_push (eax));
5429 emit_move_insn (eax, GEN_INT (allocate));
5431 insn = emit_insn (gen_allocate_stack_worker (eax));
5432 RTX_FRAME_RELATED_P (insn) = 1;
5433 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5434 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5435 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5436 t, REG_NOTES (insn));
5440 if (frame_pointer_needed)
5441 t = plus_constant (hard_frame_pointer_rtx,
5444 - frame.nregs * UNITS_PER_WORD);
5446 t = plus_constant (stack_pointer_rtx, allocate);
5447 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5451 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5453 if (!frame_pointer_needed || !frame.to_allocate)
5454 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5456 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5457 -frame.nregs * UNITS_PER_WORD);
5460 pic_reg_used = false;
5461 if (pic_offset_table_rtx
5462 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5463 || current_function_profile))
5465 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5467 if (alt_pic_reg_used != INVALID_REGNUM)
5468 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5470 pic_reg_used = true;
5476 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5478 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5480 /* Even with accurate pre-reload life analysis, we can wind up
5481 deleting all references to the pic register after reload.
5482 Consider if cross-jumping unifies two sides of a branch
5483 controlled by a comparison vs the only read from a global.
5484 In which case, allow the set_got to be deleted, though we're
5485 too late to do anything about the ebx save in the prologue. */
5486 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5489 /* Prevent function calls from be scheduled before the call to mcount.
5490 In the pic_reg_used case, make sure that the got load isn't deleted. */
5491 if (current_function_profile)
5492 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5495 /* Emit code to restore saved registers using MOV insns. First register
5496 is restored from POINTER + OFFSET. */
5498 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5499 int maybe_eh_return)
5502 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5504 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5505 if (ix86_save_reg (regno, maybe_eh_return))
5507 /* Ensure that adjust_address won't be forced to produce pointer
5508 out of range allowed by x86-64 instruction set. */
5509 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5513 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5514 emit_move_insn (r11, GEN_INT (offset));
5515 emit_insn (gen_adddi3 (r11, r11, pointer));
5516 base_address = gen_rtx_MEM (Pmode, r11);
5519 emit_move_insn (gen_rtx_REG (Pmode, regno),
5520 adjust_address (base_address, Pmode, offset));
5521 offset += UNITS_PER_WORD;
5525 /* Restore function stack, frame, and registers. */
5528 ix86_expand_epilogue (int style)
5531 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5532 struct ix86_frame frame;
5533 HOST_WIDE_INT offset;
5535 ix86_compute_frame_layout (&frame);
5537 /* Calculate start of saved registers relative to ebp. Special care
5538 must be taken for the normal return case of a function using
5539 eh_return: the eax and edx registers are marked as saved, but not
5540 restored along this path. */
5541 offset = frame.nregs;
5542 if (current_function_calls_eh_return && style != 2)
5544 offset *= -UNITS_PER_WORD;
5546 /* If we're only restoring one register and sp is not valid then
5547 using a move instruction to restore the register since it's
5548 less work than reloading sp and popping the register.
5550 The default code result in stack adjustment using add/lea instruction,
5551 while this code results in LEAVE instruction (or discrete equivalent),
5552 so it is profitable in some other cases as well. Especially when there
5553 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5554 and there is exactly one register to pop. This heuristic may need some
5555 tuning in future. */
5556 if ((!sp_valid && frame.nregs <= 1)
5557 || (TARGET_EPILOGUE_USING_MOVE
5558 && cfun->machine->use_fast_prologue_epilogue
5559 && (frame.nregs > 1 || frame.to_allocate))
5560 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5561 || (frame_pointer_needed && TARGET_USE_LEAVE
5562 && cfun->machine->use_fast_prologue_epilogue
5563 && frame.nregs == 1)
5564 || current_function_calls_eh_return)
5566 /* Restore registers. We can use ebp or esp to address the memory
5567 locations. If both are available, default to ebp, since offsets
5568 are known to be small. Only exception is esp pointing directly to the
5569 end of block of saved registers, where we may simplify addressing
5572 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5573 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5574 frame.to_allocate, style == 2);
5576 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5577 offset, style == 2);
5579 /* eh_return epilogues need %ecx added to the stack pointer. */
5582 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5584 if (frame_pointer_needed)
5586 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5587 tmp = plus_constant (tmp, UNITS_PER_WORD);
5588 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5590 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5591 emit_move_insn (hard_frame_pointer_rtx, tmp);
5593 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5598 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5599 tmp = plus_constant (tmp, (frame.to_allocate
5600 + frame.nregs * UNITS_PER_WORD));
5601 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5604 else if (!frame_pointer_needed)
5605 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5606 GEN_INT (frame.to_allocate
5607 + frame.nregs * UNITS_PER_WORD),
5609 /* If not an i386, mov & pop is faster than "leave". */
5610 else if (TARGET_USE_LEAVE || optimize_size
5611 || !cfun->machine->use_fast_prologue_epilogue)
5612 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5615 pro_epilogue_adjust_stack (stack_pointer_rtx,
5616 hard_frame_pointer_rtx,
5619 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5621 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5626 /* First step is to deallocate the stack frame so that we can
5627 pop the registers. */
5630 gcc_assert (frame_pointer_needed);
5631 pro_epilogue_adjust_stack (stack_pointer_rtx,
5632 hard_frame_pointer_rtx,
5633 GEN_INT (offset), style);
5635 else if (frame.to_allocate)
5636 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5637 GEN_INT (frame.to_allocate), style);
5639 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5640 if (ix86_save_reg (regno, false))
5643 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5645 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5647 if (frame_pointer_needed)
5649 /* Leave results in shorter dependency chains on CPUs that are
5650 able to grok it fast. */
5651 if (TARGET_USE_LEAVE)
5652 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5653 else if (TARGET_64BIT)
5654 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5656 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5660 if (cfun->machine->force_align_arg_pointer)
5662 emit_insn (gen_addsi3 (stack_pointer_rtx,
5663 cfun->machine->force_align_arg_pointer,
5667 /* Sibcall epilogues don't want a return instruction. */
5671 if (current_function_pops_args && current_function_args_size)
5673 rtx popc = GEN_INT (current_function_pops_args);
5675 /* i386 can only pop 64K bytes. If asked to pop more, pop
5676 return address, do explicit add, and jump indirectly to the
5679 if (current_function_pops_args >= 65536)
5681 rtx ecx = gen_rtx_REG (SImode, 2);
5683 /* There is no "pascal" calling convention in 64bit ABI. */
5684 gcc_assert (!TARGET_64BIT);
5686 emit_insn (gen_popsi1 (ecx));
5687 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5688 emit_jump_insn (gen_return_indirect_internal (ecx));
5691 emit_jump_insn (gen_return_pop_internal (popc));
5694 emit_jump_insn (gen_return_internal ());
5697 /* Reset from the function's potential modifications. */
5700 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5701 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5703 if (pic_offset_table_rtx)
5704 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5706 /* Mach-O doesn't support labels at the end of objects, so if
5707 it looks like we might want one, insert a NOP. */
5709 rtx insn = get_last_insn ();
5712 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5713 insn = PREV_INSN (insn);
5717 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5718 fputs ("\tnop\n", file);
5724 /* Extract the parts of an RTL expression that is a valid memory address
5725 for an instruction. Return 0 if the structure of the address is
5726 grossly off. Return -1 if the address contains ASHIFT, so it is not
5727 strictly valid, but still used for computing length of lea instruction. */
5730 ix86_decompose_address (rtx addr, struct ix86_address *out)
5732 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5733 rtx base_reg, index_reg;
5734 HOST_WIDE_INT scale = 1;
5735 rtx scale_rtx = NULL_RTX;
5737 enum ix86_address_seg seg = SEG_DEFAULT;
5739 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5741 else if (GET_CODE (addr) == PLUS)
5751 addends[n++] = XEXP (op, 1);
5754 while (GET_CODE (op) == PLUS);
5759 for (i = n; i >= 0; --i)
5762 switch (GET_CODE (op))
5767 index = XEXP (op, 0);
5768 scale_rtx = XEXP (op, 1);
5772 if (XINT (op, 1) == UNSPEC_TP
5773 && TARGET_TLS_DIRECT_SEG_REFS
5774 && seg == SEG_DEFAULT)
5775 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5804 else if (GET_CODE (addr) == MULT)
5806 index = XEXP (addr, 0); /* index*scale */
5807 scale_rtx = XEXP (addr, 1);
5809 else if (GET_CODE (addr) == ASHIFT)
5813 /* We're called for lea too, which implements ashift on occasion. */
5814 index = XEXP (addr, 0);
5815 tmp = XEXP (addr, 1);
5816 if (GET_CODE (tmp) != CONST_INT)
5818 scale = INTVAL (tmp);
5819 if ((unsigned HOST_WIDE_INT) scale > 3)
5825 disp = addr; /* displacement */
5827 /* Extract the integral value of scale. */
5830 if (GET_CODE (scale_rtx) != CONST_INT)
5832 scale = INTVAL (scale_rtx);
5835 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5836 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5838 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5839 if (base_reg && index_reg && scale == 1
5840 && (index_reg == arg_pointer_rtx
5841 || index_reg == frame_pointer_rtx
5842 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5845 tmp = base, base = index, index = tmp;
5846 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5849 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5850 if ((base_reg == hard_frame_pointer_rtx
5851 || base_reg == frame_pointer_rtx
5852 || base_reg == arg_pointer_rtx) && !disp)
5855 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5856 Avoid this by transforming to [%esi+0]. */
5857 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5858 && base_reg && !index_reg && !disp
5860 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5863 /* Special case: encode reg+reg instead of reg*2. */
5864 if (!base && index && scale && scale == 2)
5865 base = index, base_reg = index_reg, scale = 1;
5867 /* Special case: scaling cannot be encoded without base or displacement. */
5868 if (!base && !disp && index && scale != 1)
5880 /* Return cost of the memory address x.
5881 For i386, it is better to use a complex address than let gcc copy
5882 the address into a reg and make a new pseudo. But not if the address
5883 requires to two regs - that would mean more pseudos with longer
5886 ix86_address_cost (rtx x)
5888 struct ix86_address parts;
5890 int ok = ix86_decompose_address (x, &parts);
5894 if (parts.base && GET_CODE (parts.base) == SUBREG)
5895 parts.base = SUBREG_REG (parts.base);
5896 if (parts.index && GET_CODE (parts.index) == SUBREG)
5897 parts.index = SUBREG_REG (parts.index);
5899 /* More complex memory references are better. */
5900 if (parts.disp && parts.disp != const0_rtx)
5902 if (parts.seg != SEG_DEFAULT)
5905 /* Attempt to minimize number of registers in the address. */
5907 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5909 && (!REG_P (parts.index)
5910 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5914 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5916 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5917 && parts.base != parts.index)
5920 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5921 since it's predecode logic can't detect the length of instructions
5922 and it degenerates to vector decoded. Increase cost of such
5923 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5924 to split such addresses or even refuse such addresses at all.
5926 Following addressing modes are affected:
5931 The first and last case may be avoidable by explicitly coding the zero in
5932 memory address, but I don't have AMD-K6 machine handy to check this
5936 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5937 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5938 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5944 /* If X is a machine specific address (i.e. a symbol or label being
5945 referenced as a displacement from the GOT implemented using an
5946 UNSPEC), then return the base term. Otherwise return X. */
5949 ix86_find_base_term (rtx x)
5955 if (GET_CODE (x) != CONST)
5958 if (GET_CODE (term) == PLUS
5959 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5960 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5961 term = XEXP (term, 0);
5962 if (GET_CODE (term) != UNSPEC
5963 || XINT (term, 1) != UNSPEC_GOTPCREL)
5966 term = XVECEXP (term, 0, 0);
5968 if (GET_CODE (term) != SYMBOL_REF
5969 && GET_CODE (term) != LABEL_REF)
5975 term = ix86_delegitimize_address (x);
5977 if (GET_CODE (term) != SYMBOL_REF
5978 && GET_CODE (term) != LABEL_REF)
5984 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5985 this is used for to form addresses to local data when -fPIC is in
5989 darwin_local_data_pic (rtx disp)
5991 if (GET_CODE (disp) == MINUS)
5993 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5994 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5995 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5997 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5998 if (! strcmp (sym_name, "<pic base>"))
6006 /* Determine if a given RTX is a valid constant. We already know this
6007 satisfies CONSTANT_P. */
6010 legitimate_constant_p (rtx x)
6012 switch (GET_CODE (x))
6017 if (GET_CODE (x) == PLUS)
6019 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6024 if (TARGET_MACHO && darwin_local_data_pic (x))
6027 /* Only some unspecs are valid as "constants". */
6028 if (GET_CODE (x) == UNSPEC)
6029 switch (XINT (x, 1))
6032 return TARGET_64BIT;
6035 x = XVECEXP (x, 0, 0);
6036 return (GET_CODE (x) == SYMBOL_REF
6037 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6039 x = XVECEXP (x, 0, 0);
6040 return (GET_CODE (x) == SYMBOL_REF
6041 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6046 /* We must have drilled down to a symbol. */
6047 if (GET_CODE (x) == LABEL_REF)
6049 if (GET_CODE (x) != SYMBOL_REF)
6054 /* TLS symbols are never valid. */
6055 if (SYMBOL_REF_TLS_MODEL (x))
6060 if (GET_MODE (x) == TImode
6061 && x != CONST0_RTX (TImode)
6067 if (x == CONST0_RTX (GET_MODE (x)))
6075 /* Otherwise we handle everything else in the move patterns. */
6079 /* Determine if it's legal to put X into the constant pool. This
6080 is not possible for the address of thread-local symbols, which
6081 is checked above. */
6084 ix86_cannot_force_const_mem (rtx x)
6086 /* We can always put integral constants and vectors in memory. */
6087 switch (GET_CODE (x))
6097 return !legitimate_constant_p (x);
6100 /* Determine if a given RTX is a valid constant address. */
6103 constant_address_p (rtx x)
6105 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6108 /* Nonzero if the constant value X is a legitimate general operand
6109 when generating PIC code. It is given that flag_pic is on and
6110 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6113 legitimate_pic_operand_p (rtx x)
6117 switch (GET_CODE (x))
6120 inner = XEXP (x, 0);
6121 if (GET_CODE (inner) == PLUS
6122 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6123 inner = XEXP (inner, 0);
6125 /* Only some unspecs are valid as "constants". */
6126 if (GET_CODE (inner) == UNSPEC)
6127 switch (XINT (inner, 1))
6130 return TARGET_64BIT;
6132 x = XVECEXP (inner, 0, 0);
6133 return (GET_CODE (x) == SYMBOL_REF
6134 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6142 return legitimate_pic_address_disp_p (x);
6149 /* Determine if a given CONST RTX is a valid memory displacement
6153 legitimate_pic_address_disp_p (rtx disp)
6157 /* In 64bit mode we can allow direct addresses of symbols and labels
6158 when they are not dynamic symbols. */
6161 rtx op0 = disp, op1;
6163 switch (GET_CODE (disp))
6169 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6171 op0 = XEXP (XEXP (disp, 0), 0);
6172 op1 = XEXP (XEXP (disp, 0), 1);
6173 if (GET_CODE (op1) != CONST_INT
6174 || INTVAL (op1) >= 16*1024*1024
6175 || INTVAL (op1) < -16*1024*1024)
6177 if (GET_CODE (op0) == LABEL_REF)
6179 if (GET_CODE (op0) != SYMBOL_REF)
6184 /* TLS references should always be enclosed in UNSPEC. */
6185 if (SYMBOL_REF_TLS_MODEL (op0))
6187 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6195 if (GET_CODE (disp) != CONST)
6197 disp = XEXP (disp, 0);
6201 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6202 of GOT tables. We should not need these anyway. */
6203 if (GET_CODE (disp) != UNSPEC
6204 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6205 && XINT (disp, 1) != UNSPEC_GOTOFF))
6208 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6209 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6215 if (GET_CODE (disp) == PLUS)
6217 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6219 disp = XEXP (disp, 0);
6223 if (TARGET_MACHO && darwin_local_data_pic (disp))
6226 if (GET_CODE (disp) != UNSPEC)
6229 switch (XINT (disp, 1))
6234 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6236 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6237 While ABI specify also 32bit relocation but we don't produce it in
6238 small PIC model at all. */
6239 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6240 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6242 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6244 case UNSPEC_GOTTPOFF:
6245 case UNSPEC_GOTNTPOFF:
6246 case UNSPEC_INDNTPOFF:
6249 disp = XVECEXP (disp, 0, 0);
6250 return (GET_CODE (disp) == SYMBOL_REF
6251 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6253 disp = XVECEXP (disp, 0, 0);
6254 return (GET_CODE (disp) == SYMBOL_REF
6255 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6257 disp = XVECEXP (disp, 0, 0);
6258 return (GET_CODE (disp) == SYMBOL_REF
6259 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6265 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6266 memory address for an instruction. The MODE argument is the machine mode
6267 for the MEM expression that wants to use this address.
6269 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6270 convert common non-canonical forms to canonical form so that they will
6274 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6276 struct ix86_address parts;
6277 rtx base, index, disp;
6278 HOST_WIDE_INT scale;
6279 const char *reason = NULL;
6280 rtx reason_rtx = NULL_RTX;
6282 if (TARGET_DEBUG_ADDR)
6285 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6286 GET_MODE_NAME (mode), strict);
6290 if (ix86_decompose_address (addr, &parts) <= 0)
6292 reason = "decomposition failed";
6297 index = parts.index;
6299 scale = parts.scale;
6301 /* Validate base register.
6303 Don't allow SUBREG's that span more than a word here. It can lead to spill
6304 failures when the base is one word out of a two word structure, which is
6305 represented internally as a DImode int. */
6314 else if (GET_CODE (base) == SUBREG
6315 && REG_P (SUBREG_REG (base))
6316 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6318 reg = SUBREG_REG (base);
6321 reason = "base is not a register";
6325 if (GET_MODE (base) != Pmode)
6327 reason = "base is not in Pmode";
6331 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6332 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6334 reason = "base is not valid";
6339 /* Validate index register.
6341 Don't allow SUBREG's that span more than a word here -- same as above. */
6350 else if (GET_CODE (index) == SUBREG
6351 && REG_P (SUBREG_REG (index))
6352 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6354 reg = SUBREG_REG (index);
6357 reason = "index is not a register";
6361 if (GET_MODE (index) != Pmode)
6363 reason = "index is not in Pmode";
6367 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6368 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6370 reason = "index is not valid";
6375 /* Validate scale factor. */
6378 reason_rtx = GEN_INT (scale);
6381 reason = "scale without index";
6385 if (scale != 2 && scale != 4 && scale != 8)
6387 reason = "scale is not a valid multiplier";
6392 /* Validate displacement. */
6397 if (GET_CODE (disp) == CONST
6398 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6399 switch (XINT (XEXP (disp, 0), 1))
6401 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6402 used. While ABI specify also 32bit relocations, we don't produce
6403 them at all and use IP relative instead. */
6406 gcc_assert (flag_pic);
6408 goto is_legitimate_pic;
6409 reason = "64bit address unspec";
6412 case UNSPEC_GOTPCREL:
6413 gcc_assert (flag_pic);
6414 goto is_legitimate_pic;
6416 case UNSPEC_GOTTPOFF:
6417 case UNSPEC_GOTNTPOFF:
6418 case UNSPEC_INDNTPOFF:
6424 reason = "invalid address unspec";
6428 else if (flag_pic && (SYMBOLIC_CONST (disp)
6430 && !machopic_operand_p (disp)
6435 if (TARGET_64BIT && (index || base))
6437 /* foo@dtpoff(%rX) is ok. */
6438 if (GET_CODE (disp) != CONST
6439 || GET_CODE (XEXP (disp, 0)) != PLUS
6440 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6441 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6442 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6443 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6445 reason = "non-constant pic memory reference";
6449 else if (! legitimate_pic_address_disp_p (disp))
6451 reason = "displacement is an invalid pic construct";
6455 /* This code used to verify that a symbolic pic displacement
6456 includes the pic_offset_table_rtx register.
6458 While this is good idea, unfortunately these constructs may
6459 be created by "adds using lea" optimization for incorrect
6468 This code is nonsensical, but results in addressing
6469 GOT table with pic_offset_table_rtx base. We can't
6470 just refuse it easily, since it gets matched by
6471 "addsi3" pattern, that later gets split to lea in the
6472 case output register differs from input. While this
6473 can be handled by separate addsi pattern for this case
6474 that never results in lea, this seems to be easier and
6475 correct fix for crash to disable this test. */
6477 else if (GET_CODE (disp) != LABEL_REF
6478 && GET_CODE (disp) != CONST_INT
6479 && (GET_CODE (disp) != CONST
6480 || !legitimate_constant_p (disp))
6481 && (GET_CODE (disp) != SYMBOL_REF
6482 || !legitimate_constant_p (disp)))
6484 reason = "displacement is not constant";
6487 else if (TARGET_64BIT
6488 && !x86_64_immediate_operand (disp, VOIDmode))
6490 reason = "displacement is out of range";
6495 /* Everything looks valid. */
6496 if (TARGET_DEBUG_ADDR)
6497 fprintf (stderr, "Success.\n");
6501 if (TARGET_DEBUG_ADDR)
6503 fprintf (stderr, "Error: %s\n", reason);
6504 debug_rtx (reason_rtx);
6509 /* Return a unique alias set for the GOT. */
6511 static HOST_WIDE_INT
6512 ix86_GOT_alias_set (void)
6514 static HOST_WIDE_INT set = -1;
6516 set = new_alias_set ();
6520 /* Return a legitimate reference for ORIG (an address) using the
6521 register REG. If REG is 0, a new pseudo is generated.
6523 There are two types of references that must be handled:
6525 1. Global data references must load the address from the GOT, via
6526 the PIC reg. An insn is emitted to do this load, and the reg is
6529 2. Static data references, constant pool addresses, and code labels
6530 compute the address as an offset from the GOT, whose base is in
6531 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6532 differentiate them from global data objects. The returned
6533 address is the PIC reg + an unspec constant.
6535 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6536 reg also appears in the address. */
6539 legitimize_pic_address (rtx orig, rtx reg)
6547 reg = gen_reg_rtx (Pmode);
6548 /* Use the generic Mach-O PIC machinery. */
6549 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6552 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6554 else if (TARGET_64BIT
6555 && ix86_cmodel != CM_SMALL_PIC
6556 && local_symbolic_operand (addr, Pmode))
6559 /* This symbol may be referenced via a displacement from the PIC
6560 base address (@GOTOFF). */
6562 if (reload_in_progress)
6563 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6564 if (GET_CODE (addr) == CONST)
6565 addr = XEXP (addr, 0);
6566 if (GET_CODE (addr) == PLUS)
6568 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6569 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6572 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6573 new = gen_rtx_CONST (Pmode, new);
6575 tmpreg = gen_reg_rtx (Pmode);
6578 emit_move_insn (tmpreg, new);
6582 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6583 tmpreg, 1, OPTAB_DIRECT);
6586 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6588 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6590 /* This symbol may be referenced via a displacement from the PIC
6591 base address (@GOTOFF). */
6593 if (reload_in_progress)
6594 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6595 if (GET_CODE (addr) == CONST)
6596 addr = XEXP (addr, 0);
6597 if (GET_CODE (addr) == PLUS)
6599 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6600 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6603 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6604 new = gen_rtx_CONST (Pmode, new);
6605 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6609 emit_move_insn (reg, new);
6613 else if (GET_CODE (addr) == SYMBOL_REF)
6617 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6618 new = gen_rtx_CONST (Pmode, new);
6619 new = gen_const_mem (Pmode, new);
6620 set_mem_alias_set (new, ix86_GOT_alias_set ());
6623 reg = gen_reg_rtx (Pmode);
6624 /* Use directly gen_movsi, otherwise the address is loaded
6625 into register for CSE. We don't want to CSE this addresses,
6626 instead we CSE addresses from the GOT table, so skip this. */
6627 emit_insn (gen_movsi (reg, new));
6632 /* This symbol must be referenced via a load from the
6633 Global Offset Table (@GOT). */
6635 if (reload_in_progress)
6636 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6637 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6638 new = gen_rtx_CONST (Pmode, new);
6639 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6640 new = gen_const_mem (Pmode, new);
6641 set_mem_alias_set (new, ix86_GOT_alias_set ());
6644 reg = gen_reg_rtx (Pmode);
6645 emit_move_insn (reg, new);
6651 if (GET_CODE (addr) == CONST_INT
6652 && !x86_64_immediate_operand (addr, VOIDmode))
6656 emit_move_insn (reg, addr);
6660 new = force_reg (Pmode, addr);
6662 else if (GET_CODE (addr) == CONST)
6664 addr = XEXP (addr, 0);
6666 /* We must match stuff we generate before. Assume the only
6667 unspecs that can get here are ours. Not that we could do
6668 anything with them anyway.... */
6669 if (GET_CODE (addr) == UNSPEC
6670 || (GET_CODE (addr) == PLUS
6671 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6673 gcc_assert (GET_CODE (addr) == PLUS);
6675 if (GET_CODE (addr) == PLUS)
6677 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6679 /* Check first to see if this is a constant offset from a @GOTOFF
6680 symbol reference. */
6681 if (local_symbolic_operand (op0, Pmode)
6682 && GET_CODE (op1) == CONST_INT)
6686 if (reload_in_progress)
6687 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6688 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6690 new = gen_rtx_PLUS (Pmode, new, op1);
6691 new = gen_rtx_CONST (Pmode, new);
6692 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6696 emit_move_insn (reg, new);
6702 if (INTVAL (op1) < -16*1024*1024
6703 || INTVAL (op1) >= 16*1024*1024)
6705 if (!x86_64_immediate_operand (op1, Pmode))
6706 op1 = force_reg (Pmode, op1);
6707 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6713 base = legitimize_pic_address (XEXP (addr, 0), reg);
6714 new = legitimize_pic_address (XEXP (addr, 1),
6715 base == reg ? NULL_RTX : reg);
6717 if (GET_CODE (new) == CONST_INT)
6718 new = plus_constant (base, INTVAL (new));
6721 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6723 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6724 new = XEXP (new, 1);
6726 new = gen_rtx_PLUS (Pmode, base, new);
6734 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6737 get_thread_pointer (int to_reg)
6741 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6745 reg = gen_reg_rtx (Pmode);
6746 insn = gen_rtx_SET (VOIDmode, reg, tp);
6747 insn = emit_insn (insn);
6752 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6753 false if we expect this to be used for a memory address and true if
6754 we expect to load the address into a register. */
6757 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6759 rtx dest, base, off, pic, tp;
6764 case TLS_MODEL_GLOBAL_DYNAMIC:
6765 dest = gen_reg_rtx (Pmode);
6766 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6768 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6770 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6773 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6774 insns = get_insns ();
6777 emit_libcall_block (insns, dest, rax, x);
6779 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6780 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6782 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6784 if (TARGET_GNU2_TLS)
6786 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6788 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6792 case TLS_MODEL_LOCAL_DYNAMIC:
6793 base = gen_reg_rtx (Pmode);
6794 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6796 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6798 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6801 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6802 insns = get_insns ();
6805 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6806 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6807 emit_libcall_block (insns, base, rax, note);
6809 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6810 emit_insn (gen_tls_local_dynamic_base_64 (base));
6812 emit_insn (gen_tls_local_dynamic_base_32 (base));
6814 if (TARGET_GNU2_TLS)
6816 rtx x = ix86_tls_module_base ();
6818 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6819 gen_rtx_MINUS (Pmode, x, tp));
6822 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6823 off = gen_rtx_CONST (Pmode, off);
6825 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6827 if (TARGET_GNU2_TLS)
6829 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6831 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6836 case TLS_MODEL_INITIAL_EXEC:
6840 type = UNSPEC_GOTNTPOFF;
6844 if (reload_in_progress)
6845 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6846 pic = pic_offset_table_rtx;
6847 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6849 else if (!TARGET_ANY_GNU_TLS)
6851 pic = gen_reg_rtx (Pmode);
6852 emit_insn (gen_set_got (pic));
6853 type = UNSPEC_GOTTPOFF;
6858 type = UNSPEC_INDNTPOFF;
6861 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6862 off = gen_rtx_CONST (Pmode, off);
6864 off = gen_rtx_PLUS (Pmode, pic, off);
6865 off = gen_const_mem (Pmode, off);
6866 set_mem_alias_set (off, ix86_GOT_alias_set ());
6868 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6870 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6871 off = force_reg (Pmode, off);
6872 return gen_rtx_PLUS (Pmode, base, off);
6876 base = get_thread_pointer (true);
6877 dest = gen_reg_rtx (Pmode);
6878 emit_insn (gen_subsi3 (dest, base, off));
6882 case TLS_MODEL_LOCAL_EXEC:
6883 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6884 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6885 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6886 off = gen_rtx_CONST (Pmode, off);
6888 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6890 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6891 return gen_rtx_PLUS (Pmode, base, off);
6895 base = get_thread_pointer (true);
6896 dest = gen_reg_rtx (Pmode);
6897 emit_insn (gen_subsi3 (dest, base, off));
6908 /* Try machine-dependent ways of modifying an illegitimate address
6909 to be legitimate. If we find one, return the new, valid address.
6910 This macro is used in only one place: `memory_address' in explow.c.
6912 OLDX is the address as it was before break_out_memory_refs was called.
6913 In some cases it is useful to look at this to decide what needs to be done.
6915 MODE and WIN are passed so that this macro can use
6916 GO_IF_LEGITIMATE_ADDRESS.
6918 It is always safe for this macro to do nothing. It exists to recognize
6919 opportunities to optimize the output.
6921 For the 80386, we handle X+REG by loading X into a register R and
6922 using R+REG. R will go in a general reg and indexing will be used.
6923 However, if REG is a broken-out memory address or multiplication,
6924 nothing needs to be done because REG can certainly go in a general reg.
6926 When -fpic is used, special handling is needed for symbolic references.
6927 See comments by legitimize_pic_address in i386.c for details. */
6930 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6935 if (TARGET_DEBUG_ADDR)
6937 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6938 GET_MODE_NAME (mode));
6942 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6944 return legitimize_tls_address (x, log, false);
6945 if (GET_CODE (x) == CONST
6946 && GET_CODE (XEXP (x, 0)) == PLUS
6947 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6948 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6950 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6951 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6954 if (flag_pic && SYMBOLIC_CONST (x))
6955 return legitimize_pic_address (x, 0);
6957 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6958 if (GET_CODE (x) == ASHIFT
6959 && GET_CODE (XEXP (x, 1)) == CONST_INT
6960 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6963 log = INTVAL (XEXP (x, 1));
6964 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6965 GEN_INT (1 << log));
6968 if (GET_CODE (x) == PLUS)
6970 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6972 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6973 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6974 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6977 log = INTVAL (XEXP (XEXP (x, 0), 1));
6978 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6979 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6980 GEN_INT (1 << log));
6983 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6984 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6985 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6988 log = INTVAL (XEXP (XEXP (x, 1), 1));
6989 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6990 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6991 GEN_INT (1 << log));
6994 /* Put multiply first if it isn't already. */
6995 if (GET_CODE (XEXP (x, 1)) == MULT)
6997 rtx tmp = XEXP (x, 0);
6998 XEXP (x, 0) = XEXP (x, 1);
7003 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7004 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7005 created by virtual register instantiation, register elimination, and
7006 similar optimizations. */
7007 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7010 x = gen_rtx_PLUS (Pmode,
7011 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7012 XEXP (XEXP (x, 1), 0)),
7013 XEXP (XEXP (x, 1), 1));
7017 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7018 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7019 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7021 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7022 && CONSTANT_P (XEXP (x, 1)))
7025 rtx other = NULL_RTX;
7027 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7029 constant = XEXP (x, 1);
7030 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7032 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7034 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7035 other = XEXP (x, 1);
7043 x = gen_rtx_PLUS (Pmode,
7044 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7045 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7046 plus_constant (other, INTVAL (constant)));
7050 if (changed && legitimate_address_p (mode, x, FALSE))
7053 if (GET_CODE (XEXP (x, 0)) == MULT)
7056 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7059 if (GET_CODE (XEXP (x, 1)) == MULT)
7062 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7066 && GET_CODE (XEXP (x, 1)) == REG
7067 && GET_CODE (XEXP (x, 0)) == REG)
7070 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7073 x = legitimize_pic_address (x, 0);
7076 if (changed && legitimate_address_p (mode, x, FALSE))
7079 if (GET_CODE (XEXP (x, 0)) == REG)
7081 rtx temp = gen_reg_rtx (Pmode);
7082 rtx val = force_operand (XEXP (x, 1), temp);
7084 emit_move_insn (temp, val);
7090 else if (GET_CODE (XEXP (x, 1)) == REG)
7092 rtx temp = gen_reg_rtx (Pmode);
7093 rtx val = force_operand (XEXP (x, 0), temp);
7095 emit_move_insn (temp, val);
7105 /* Print an integer constant expression in assembler syntax. Addition
7106 and subtraction are the only arithmetic that may appear in these
7107 expressions. FILE is the stdio stream to write to, X is the rtx, and
7108 CODE is the operand print code from the output string. */
7111 output_pic_addr_const (FILE *file, rtx x, int code)
7115 switch (GET_CODE (x))
7118 gcc_assert (flag_pic);
7123 output_addr_const (file, x);
7124 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7125 fputs ("@PLT", file);
7132 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7133 assemble_name (asm_out_file, buf);
7137 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7141 /* This used to output parentheses around the expression,
7142 but that does not work on the 386 (either ATT or BSD assembler). */
7143 output_pic_addr_const (file, XEXP (x, 0), code);
7147 if (GET_MODE (x) == VOIDmode)
7149 /* We can use %d if the number is <32 bits and positive. */
7150 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7151 fprintf (file, "0x%lx%08lx",
7152 (unsigned long) CONST_DOUBLE_HIGH (x),
7153 (unsigned long) CONST_DOUBLE_LOW (x));
7155 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7158 /* We can't handle floating point constants;
7159 PRINT_OPERAND must handle them. */
7160 output_operand_lossage ("floating constant misused");
7164 /* Some assemblers need integer constants to appear first. */
7165 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7167 output_pic_addr_const (file, XEXP (x, 0), code);
7169 output_pic_addr_const (file, XEXP (x, 1), code);
7173 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7174 output_pic_addr_const (file, XEXP (x, 1), code);
7176 output_pic_addr_const (file, XEXP (x, 0), code);
7182 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7183 output_pic_addr_const (file, XEXP (x, 0), code);
7185 output_pic_addr_const (file, XEXP (x, 1), code);
7187 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7191 gcc_assert (XVECLEN (x, 0) == 1);
7192 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7193 switch (XINT (x, 1))
7196 fputs ("@GOT", file);
7199 fputs ("@GOTOFF", file);
7201 case UNSPEC_GOTPCREL:
7202 fputs ("@GOTPCREL(%rip)", file);
7204 case UNSPEC_GOTTPOFF:
7205 /* FIXME: This might be @TPOFF in Sun ld too. */
7206 fputs ("@GOTTPOFF", file);
7209 fputs ("@TPOFF", file);
7213 fputs ("@TPOFF", file);
7215 fputs ("@NTPOFF", file);
7218 fputs ("@DTPOFF", file);
7220 case UNSPEC_GOTNTPOFF:
7222 fputs ("@GOTTPOFF(%rip)", file);
7224 fputs ("@GOTNTPOFF", file);
7226 case UNSPEC_INDNTPOFF:
7227 fputs ("@INDNTPOFF", file);
7230 output_operand_lossage ("invalid UNSPEC as operand");
7236 output_operand_lossage ("invalid expression as operand");
7240 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7241 We need to emit DTP-relative relocations. */
7244 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7246 fputs (ASM_LONG, file);
7247 output_addr_const (file, x);
7248 fputs ("@DTPOFF", file);
7254 fputs (", 0", file);
7261 /* In the name of slightly smaller debug output, and to cater to
7262 general assembler lossage, recognize PIC+GOTOFF and turn it back
7263 into a direct symbol reference.
7265 On Darwin, this is necessary to avoid a crash, because Darwin
7266 has a different PIC label for each routine but the DWARF debugging
7267 information is not associated with any particular routine, so it's
7268 necessary to remove references to the PIC label from RTL stored by
7269 the DWARF output code. */
7272 ix86_delegitimize_address (rtx orig_x)
7275 /* reg_addend is NULL or a multiple of some register. */
7276 rtx reg_addend = NULL_RTX;
7277 /* const_addend is NULL or a const_int. */
7278 rtx const_addend = NULL_RTX;
7279 /* This is the result, or NULL. */
7280 rtx result = NULL_RTX;
7282 if (GET_CODE (x) == MEM)
7287 if (GET_CODE (x) != CONST
7288 || GET_CODE (XEXP (x, 0)) != UNSPEC
7289 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7290 || GET_CODE (orig_x) != MEM)
7292 return XVECEXP (XEXP (x, 0), 0, 0);
7295 if (GET_CODE (x) != PLUS
7296 || GET_CODE (XEXP (x, 1)) != CONST)
7299 if (GET_CODE (XEXP (x, 0)) == REG
7300 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7301 /* %ebx + GOT/GOTOFF */
7303 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7305 /* %ebx + %reg * scale + GOT/GOTOFF */
7306 reg_addend = XEXP (x, 0);
7307 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7308 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7309 reg_addend = XEXP (reg_addend, 1);
7310 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7311 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7312 reg_addend = XEXP (reg_addend, 0);
7315 if (GET_CODE (reg_addend) != REG
7316 && GET_CODE (reg_addend) != MULT
7317 && GET_CODE (reg_addend) != ASHIFT)
7323 x = XEXP (XEXP (x, 1), 0);
7324 if (GET_CODE (x) == PLUS
7325 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7327 const_addend = XEXP (x, 1);
7331 if (GET_CODE (x) == UNSPEC
7332 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7333 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7334 result = XVECEXP (x, 0, 0);
7336 if (TARGET_MACHO && darwin_local_data_pic (x)
7337 && GET_CODE (orig_x) != MEM)
7338 result = XEXP (x, 0);
7344 result = gen_rtx_PLUS (Pmode, result, const_addend);
7346 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7351 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7356 if (mode == CCFPmode || mode == CCFPUmode)
7358 enum rtx_code second_code, bypass_code;
7359 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7360 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7361 code = ix86_fp_compare_code_to_integer (code);
7365 code = reverse_condition (code);
7376 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7380 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7381 Those same assemblers have the same but opposite lossage on cmov. */
7382 gcc_assert (mode == CCmode);
7383 suffix = fp ? "nbe" : "a";
7403 gcc_assert (mode == CCmode);
7425 gcc_assert (mode == CCmode);
7426 suffix = fp ? "nb" : "ae";
7429 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7433 gcc_assert (mode == CCmode);
7437 suffix = fp ? "u" : "p";
7440 suffix = fp ? "nu" : "np";
7445 fputs (suffix, file);
7448 /* Print the name of register X to FILE based on its machine mode and number.
7449 If CODE is 'w', pretend the mode is HImode.
7450 If CODE is 'b', pretend the mode is QImode.
7451 If CODE is 'k', pretend the mode is SImode.
7452 If CODE is 'q', pretend the mode is DImode.
7453 If CODE is 'h', pretend the reg is the 'high' byte register.
7454 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7457 print_reg (rtx x, int code, FILE *file)
7459 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7460 && REGNO (x) != FRAME_POINTER_REGNUM
7461 && REGNO (x) != FLAGS_REG
7462 && REGNO (x) != FPSR_REG);
7464 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7467 if (code == 'w' || MMX_REG_P (x))
7469 else if (code == 'b')
7471 else if (code == 'k')
7473 else if (code == 'q')
7475 else if (code == 'y')
7477 else if (code == 'h')
7480 code = GET_MODE_SIZE (GET_MODE (x));
7482 /* Irritatingly, AMD extended registers use different naming convention
7483 from the normal registers. */
7484 if (REX_INT_REG_P (x))
7486 gcc_assert (TARGET_64BIT);
7490 error ("extended registers have no high halves");
7493 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7496 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7499 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7502 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7505 error ("unsupported operand size for extended register");
7513 if (STACK_TOP_P (x))
7515 fputs ("st(0)", file);
7522 if (! ANY_FP_REG_P (x))
7523 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7528 fputs (hi_reg_name[REGNO (x)], file);
7531 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7533 fputs (qi_reg_name[REGNO (x)], file);
7536 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7538 fputs (qi_high_reg_name[REGNO (x)], file);
7545 /* Locate some local-dynamic symbol still in use by this function
7546 so that we can print its name in some tls_local_dynamic_base
7550 get_some_local_dynamic_name (void)
7554 if (cfun->machine->some_ld_name)
7555 return cfun->machine->some_ld_name;
7557 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7559 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7560 return cfun->machine->some_ld_name;
7566 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7570 if (GET_CODE (x) == SYMBOL_REF
7571 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7573 cfun->machine->some_ld_name = XSTR (x, 0);
7581 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7582 C -- print opcode suffix for set/cmov insn.
7583 c -- like C, but print reversed condition
7584 F,f -- likewise, but for floating-point.
7585 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7587 R -- print the prefix for register names.
7588 z -- print the opcode suffix for the size of the current operand.
7589 * -- print a star (in certain assembler syntax)
7590 A -- print an absolute memory reference.
7591 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7592 s -- print a shift double count, followed by the assemblers argument
7594 b -- print the QImode name of the register for the indicated operand.
7595 %b0 would print %al if operands[0] is reg 0.
7596 w -- likewise, print the HImode name of the register.
7597 k -- likewise, print the SImode name of the register.
7598 q -- likewise, print the DImode name of the register.
7599 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7600 y -- print "st(0)" instead of "st" as a register.
7601 D -- print condition for SSE cmp instruction.
7602 P -- if PIC, print an @PLT suffix.
7603 X -- don't print any sort of PIC '@' suffix for a symbol.
7604 & -- print some in-use local-dynamic symbol name.
7605 H -- print a memory address offset by 8; used for sse high-parts
7609 print_operand (FILE *file, rtx x, int code)
7616 if (ASSEMBLER_DIALECT == ASM_ATT)
7621 assemble_name (file, get_some_local_dynamic_name ());
7625 switch (ASSEMBLER_DIALECT)
7632 /* Intel syntax. For absolute addresses, registers should not
7633 be surrounded by braces. */
7634 if (GET_CODE (x) != REG)
7637 PRINT_OPERAND (file, x, 0);
7647 PRINT_OPERAND (file, x, 0);
7652 if (ASSEMBLER_DIALECT == ASM_ATT)
7657 if (ASSEMBLER_DIALECT == ASM_ATT)
7662 if (ASSEMBLER_DIALECT == ASM_ATT)
7667 if (ASSEMBLER_DIALECT == ASM_ATT)
7672 if (ASSEMBLER_DIALECT == ASM_ATT)
7677 if (ASSEMBLER_DIALECT == ASM_ATT)
7682 /* 387 opcodes don't get size suffixes if the operands are
7684 if (STACK_REG_P (x))
7687 /* Likewise if using Intel opcodes. */
7688 if (ASSEMBLER_DIALECT == ASM_INTEL)
7691 /* This is the size of op from size of operand. */
7692 switch (GET_MODE_SIZE (GET_MODE (x)))
7695 #ifdef HAVE_GAS_FILDS_FISTS
7701 if (GET_MODE (x) == SFmode)
7716 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7718 #ifdef GAS_MNEMONICS
7744 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7746 PRINT_OPERAND (file, x, 0);
7752 /* Little bit of braindamage here. The SSE compare instructions
7753 does use completely different names for the comparisons that the
7754 fp conditional moves. */
7755 switch (GET_CODE (x))
7770 fputs ("unord", file);
7774 fputs ("neq", file);
7778 fputs ("nlt", file);
7782 fputs ("nle", file);
7785 fputs ("ord", file);
7792 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7793 if (ASSEMBLER_DIALECT == ASM_ATT)
7795 switch (GET_MODE (x))
7797 case HImode: putc ('w', file); break;
7799 case SFmode: putc ('l', file); break;
7801 case DFmode: putc ('q', file); break;
7802 default: gcc_unreachable ();
7809 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7812 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7813 if (ASSEMBLER_DIALECT == ASM_ATT)
7816 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7819 /* Like above, but reverse condition */
7821 /* Check to see if argument to %c is really a constant
7822 and not a condition code which needs to be reversed. */
7823 if (!COMPARISON_P (x))
7825 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7828 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7831 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7832 if (ASSEMBLER_DIALECT == ASM_ATT)
7835 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7839 /* It doesn't actually matter what mode we use here, as we're
7840 only going to use this for printing. */
7841 x = adjust_address_nv (x, DImode, 8);
7848 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7851 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7854 int pred_val = INTVAL (XEXP (x, 0));
7856 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7857 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7859 int taken = pred_val > REG_BR_PROB_BASE / 2;
7860 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7862 /* Emit hints only in the case default branch prediction
7863 heuristics would fail. */
7864 if (taken != cputaken)
7866 /* We use 3e (DS) prefix for taken branches and
7867 2e (CS) prefix for not taken branches. */
7869 fputs ("ds ; ", file);
7871 fputs ("cs ; ", file);
7878 output_operand_lossage ("invalid operand code '%c'", code);
7882 if (GET_CODE (x) == REG)
7883 print_reg (x, code, file);
7885 else if (GET_CODE (x) == MEM)
7887 /* No `byte ptr' prefix for call instructions. */
7888 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7891 switch (GET_MODE_SIZE (GET_MODE (x)))
7893 case 1: size = "BYTE"; break;
7894 case 2: size = "WORD"; break;
7895 case 4: size = "DWORD"; break;
7896 case 8: size = "QWORD"; break;
7897 case 12: size = "XWORD"; break;
7898 case 16: size = "XMMWORD"; break;
7903 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7906 else if (code == 'w')
7908 else if (code == 'k')
7912 fputs (" PTR ", file);
7916 /* Avoid (%rip) for call operands. */
7917 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7918 && GET_CODE (x) != CONST_INT)
7919 output_addr_const (file, x);
7920 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7921 output_operand_lossage ("invalid constraints for operand");
7926 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7931 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7932 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7934 if (ASSEMBLER_DIALECT == ASM_ATT)
7936 fprintf (file, "0x%08lx", l);
7939 /* These float cases don't actually occur as immediate operands. */
7940 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7944 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7945 fprintf (file, "%s", dstr);
7948 else if (GET_CODE (x) == CONST_DOUBLE
7949 && GET_MODE (x) == XFmode)
7953 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7954 fprintf (file, "%s", dstr);
7959 /* We have patterns that allow zero sets of memory, for instance.
7960 In 64-bit mode, we should probably support all 8-byte vectors,
7961 since we can in fact encode that into an immediate. */
7962 if (GET_CODE (x) == CONST_VECTOR)
7964 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7970 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7972 if (ASSEMBLER_DIALECT == ASM_ATT)
7975 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7976 || GET_CODE (x) == LABEL_REF)
7978 if (ASSEMBLER_DIALECT == ASM_ATT)
7981 fputs ("OFFSET FLAT:", file);
7984 if (GET_CODE (x) == CONST_INT)
7985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7987 output_pic_addr_const (file, x, code);
7989 output_addr_const (file, x);
7993 /* Print a memory operand whose address is ADDR. */
7996 print_operand_address (FILE *file, rtx addr)
7998 struct ix86_address parts;
7999 rtx base, index, disp;
8001 int ok = ix86_decompose_address (addr, &parts);
8006 index = parts.index;
8008 scale = parts.scale;
8016 if (USER_LABEL_PREFIX[0] == 0)
8018 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8024 if (!base && !index)
8026 /* Displacement only requires special attention. */
8028 if (GET_CODE (disp) == CONST_INT)
8030 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8032 if (USER_LABEL_PREFIX[0] == 0)
8034 fputs ("ds:", file);
8036 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8039 output_pic_addr_const (file, disp, 0);
8041 output_addr_const (file, disp);
8043 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8046 if (GET_CODE (disp) == CONST
8047 && GET_CODE (XEXP (disp, 0)) == PLUS
8048 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8049 disp = XEXP (XEXP (disp, 0), 0);
8050 if (GET_CODE (disp) == LABEL_REF
8051 || (GET_CODE (disp) == SYMBOL_REF
8052 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8053 fputs ("(%rip)", file);
8058 if (ASSEMBLER_DIALECT == ASM_ATT)
8063 output_pic_addr_const (file, disp, 0);
8064 else if (GET_CODE (disp) == LABEL_REF)
8065 output_asm_label (disp);
8067 output_addr_const (file, disp);
8072 print_reg (base, 0, file);
8076 print_reg (index, 0, file);
8078 fprintf (file, ",%d", scale);
8084 rtx offset = NULL_RTX;
8088 /* Pull out the offset of a symbol; print any symbol itself. */
8089 if (GET_CODE (disp) == CONST
8090 && GET_CODE (XEXP (disp, 0)) == PLUS
8091 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8093 offset = XEXP (XEXP (disp, 0), 1);
8094 disp = gen_rtx_CONST (VOIDmode,
8095 XEXP (XEXP (disp, 0), 0));
8099 output_pic_addr_const (file, disp, 0);
8100 else if (GET_CODE (disp) == LABEL_REF)
8101 output_asm_label (disp);
8102 else if (GET_CODE (disp) == CONST_INT)
8105 output_addr_const (file, disp);
8111 print_reg (base, 0, file);
8114 if (INTVAL (offset) >= 0)
8116 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8127 print_reg (index, 0, file);
8129 fprintf (file, "*%d", scale);
8137 output_addr_const_extra (FILE *file, rtx x)
8141 if (GET_CODE (x) != UNSPEC)
8144 op = XVECEXP (x, 0, 0);
8145 switch (XINT (x, 1))
8147 case UNSPEC_GOTTPOFF:
8148 output_addr_const (file, op);
8149 /* FIXME: This might be @TPOFF in Sun ld. */
8150 fputs ("@GOTTPOFF", file);
8153 output_addr_const (file, op);
8154 fputs ("@TPOFF", file);
8157 output_addr_const (file, op);
8159 fputs ("@TPOFF", file);
8161 fputs ("@NTPOFF", file);
8164 output_addr_const (file, op);
8165 fputs ("@DTPOFF", file);
8167 case UNSPEC_GOTNTPOFF:
8168 output_addr_const (file, op);
8170 fputs ("@GOTTPOFF(%rip)", file);
8172 fputs ("@GOTNTPOFF", file);
8174 case UNSPEC_INDNTPOFF:
8175 output_addr_const (file, op);
8176 fputs ("@INDNTPOFF", file);
8186 /* Split one or more DImode RTL references into pairs of SImode
8187 references. The RTL can be REG, offsettable MEM, integer constant, or
8188 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8189 split and "num" is its length. lo_half and hi_half are output arrays
8190 that parallel "operands". */
8193 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8197 rtx op = operands[num];
8199 /* simplify_subreg refuse to split volatile memory addresses,
8200 but we still have to handle it. */
8201 if (GET_CODE (op) == MEM)
8203 lo_half[num] = adjust_address (op, SImode, 0);
8204 hi_half[num] = adjust_address (op, SImode, 4);
8208 lo_half[num] = simplify_gen_subreg (SImode, op,
8209 GET_MODE (op) == VOIDmode
8210 ? DImode : GET_MODE (op), 0);
8211 hi_half[num] = simplify_gen_subreg (SImode, op,
8212 GET_MODE (op) == VOIDmode
8213 ? DImode : GET_MODE (op), 4);
8217 /* Split one or more TImode RTL references into pairs of DImode
8218 references. The RTL can be REG, offsettable MEM, integer constant, or
8219 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8220 split and "num" is its length. lo_half and hi_half are output arrays
8221 that parallel "operands". */
8224 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8228 rtx op = operands[num];
8230 /* simplify_subreg refuse to split volatile memory addresses, but we
8231 still have to handle it. */
8232 if (GET_CODE (op) == MEM)
8234 lo_half[num] = adjust_address (op, DImode, 0);
8235 hi_half[num] = adjust_address (op, DImode, 8);
8239 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8240 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8245 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8246 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8247 is the expression of the binary operation. The output may either be
8248 emitted here, or returned to the caller, like all output_* functions.
8250 There is no guarantee that the operands are the same mode, as they
8251 might be within FLOAT or FLOAT_EXTEND expressions. */
8253 #ifndef SYSV386_COMPAT
8254 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8255 wants to fix the assemblers because that causes incompatibility
8256 with gcc. No-one wants to fix gcc because that causes
8257 incompatibility with assemblers... You can use the option of
8258 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8259 #define SYSV386_COMPAT 1
8263 output_387_binary_op (rtx insn, rtx *operands)
8265 static char buf[30];
8268 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8270 #ifdef ENABLE_CHECKING
8271 /* Even if we do not want to check the inputs, this documents input
8272 constraints. Which helps in understanding the following code. */
8273 if (STACK_REG_P (operands[0])
8274 && ((REG_P (operands[1])
8275 && REGNO (operands[0]) == REGNO (operands[1])
8276 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8277 || (REG_P (operands[2])
8278 && REGNO (operands[0]) == REGNO (operands[2])
8279 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8280 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8283 gcc_assert (is_sse);
8286 switch (GET_CODE (operands[3]))
8289 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8290 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8298 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8299 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8307 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8308 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8316 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8317 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8331 if (GET_MODE (operands[0]) == SFmode)
8332 strcat (buf, "ss\t{%2, %0|%0, %2}");
8334 strcat (buf, "sd\t{%2, %0|%0, %2}");
8339 switch (GET_CODE (operands[3]))
8343 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8345 rtx temp = operands[2];
8346 operands[2] = operands[1];
8350 /* know operands[0] == operands[1]. */
8352 if (GET_CODE (operands[2]) == MEM)
8358 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8360 if (STACK_TOP_P (operands[0]))
8361 /* How is it that we are storing to a dead operand[2]?
8362 Well, presumably operands[1] is dead too. We can't
8363 store the result to st(0) as st(0) gets popped on this
8364 instruction. Instead store to operands[2] (which I
8365 think has to be st(1)). st(1) will be popped later.
8366 gcc <= 2.8.1 didn't have this check and generated
8367 assembly code that the Unixware assembler rejected. */
8368 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8370 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8374 if (STACK_TOP_P (operands[0]))
8375 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8377 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8382 if (GET_CODE (operands[1]) == MEM)
8388 if (GET_CODE (operands[2]) == MEM)
8394 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8397 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8398 derived assemblers, confusingly reverse the direction of
8399 the operation for fsub{r} and fdiv{r} when the
8400 destination register is not st(0). The Intel assembler
8401 doesn't have this brain damage. Read !SYSV386_COMPAT to
8402 figure out what the hardware really does. */
8403 if (STACK_TOP_P (operands[0]))
8404 p = "{p\t%0, %2|rp\t%2, %0}";
8406 p = "{rp\t%2, %0|p\t%0, %2}";
8408 if (STACK_TOP_P (operands[0]))
8409 /* As above for fmul/fadd, we can't store to st(0). */
8410 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8412 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8417 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8420 if (STACK_TOP_P (operands[0]))
8421 p = "{rp\t%0, %1|p\t%1, %0}";
8423 p = "{p\t%1, %0|rp\t%0, %1}";
8425 if (STACK_TOP_P (operands[0]))
8426 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8428 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8433 if (STACK_TOP_P (operands[0]))
8435 if (STACK_TOP_P (operands[1]))
8436 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8438 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8441 else if (STACK_TOP_P (operands[1]))
8444 p = "{\t%1, %0|r\t%0, %1}";
8446 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8452 p = "{r\t%2, %0|\t%0, %2}";
8454 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8467 /* Return needed mode for entity in optimize_mode_switching pass. */
8470 ix86_mode_needed (int entity, rtx insn)
8472 enum attr_i387_cw mode;
8474 /* The mode UNINITIALIZED is used to store control word after a
8475 function call or ASM pattern. The mode ANY specify that function
8476 has no requirements on the control word and make no changes in the
8477 bits we are interested in. */
8480 || (NONJUMP_INSN_P (insn)
8481 && (asm_noperands (PATTERN (insn)) >= 0
8482 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8483 return I387_CW_UNINITIALIZED;
8485 if (recog_memoized (insn) < 0)
8488 mode = get_attr_i387_cw (insn);
8493 if (mode == I387_CW_TRUNC)
8498 if (mode == I387_CW_FLOOR)
8503 if (mode == I387_CW_CEIL)
8508 if (mode == I387_CW_MASK_PM)
8519 /* Output code to initialize control word copies used by trunc?f?i and
8520 rounding patterns. CURRENT_MODE is set to current control word,
8521 while NEW_MODE is set to new control word. */
8524 emit_i387_cw_initialization (int mode)
8526 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8531 rtx reg = gen_reg_rtx (HImode);
8533 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8534 emit_move_insn (reg, stored_mode);
8536 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8541 /* round toward zero (truncate) */
8542 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8543 slot = SLOT_CW_TRUNC;
8547 /* round down toward -oo */
8548 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8549 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8550 slot = SLOT_CW_FLOOR;
8554 /* round up toward +oo */
8555 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8556 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8557 slot = SLOT_CW_CEIL;
8560 case I387_CW_MASK_PM:
8561 /* mask precision exception for nearbyint() */
8562 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8563 slot = SLOT_CW_MASK_PM;
8575 /* round toward zero (truncate) */
8576 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8577 slot = SLOT_CW_TRUNC;
8581 /* round down toward -oo */
8582 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8583 slot = SLOT_CW_FLOOR;
8587 /* round up toward +oo */
8588 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8589 slot = SLOT_CW_CEIL;
8592 case I387_CW_MASK_PM:
8593 /* mask precision exception for nearbyint() */
8594 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8595 slot = SLOT_CW_MASK_PM;
8603 gcc_assert (slot < MAX_386_STACK_LOCALS);
8605 new_mode = assign_386_stack_local (HImode, slot);
8606 emit_move_insn (new_mode, reg);
8609 /* Output code for INSN to convert a float to a signed int. OPERANDS
8610 are the insn operands. The output may be [HSD]Imode and the input
8611 operand may be [SDX]Fmode. */
8614 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8616 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8617 int dimode_p = GET_MODE (operands[0]) == DImode;
8618 int round_mode = get_attr_i387_cw (insn);
8620 /* Jump through a hoop or two for DImode, since the hardware has no
8621 non-popping instruction. We used to do this a different way, but
8622 that was somewhat fragile and broke with post-reload splitters. */
8623 if ((dimode_p || fisttp) && !stack_top_dies)
8624 output_asm_insn ("fld\t%y1", operands);
8626 gcc_assert (STACK_TOP_P (operands[1]));
8627 gcc_assert (GET_CODE (operands[0]) == MEM);
8630 output_asm_insn ("fisttp%z0\t%0", operands);
8633 if (round_mode != I387_CW_ANY)
8634 output_asm_insn ("fldcw\t%3", operands);
8635 if (stack_top_dies || dimode_p)
8636 output_asm_insn ("fistp%z0\t%0", operands);
8638 output_asm_insn ("fist%z0\t%0", operands);
8639 if (round_mode != I387_CW_ANY)
8640 output_asm_insn ("fldcw\t%2", operands);
8646 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8647 have the values zero or one, indicates the ffreep insn's operand
8648 from the OPERANDS array. */
8651 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8653 if (TARGET_USE_FFREEP)
8654 #if HAVE_AS_IX86_FFREEP
8655 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8657 switch (REGNO (operands[opno]))
8659 case FIRST_STACK_REG + 0: return ".word\t0xc0df";
8660 case FIRST_STACK_REG + 1: return ".word\t0xc1df";
8661 case FIRST_STACK_REG + 2: return ".word\t0xc2df";
8662 case FIRST_STACK_REG + 3: return ".word\t0xc3df";
8663 case FIRST_STACK_REG + 4: return ".word\t0xc4df";
8664 case FIRST_STACK_REG + 5: return ".word\t0xc5df";
8665 case FIRST_STACK_REG + 6: return ".word\t0xc6df";
8666 case FIRST_STACK_REG + 7: return ".word\t0xc7df";
8670 return opno ? "fstp\t%y1" : "fstp\t%y0";
8674 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8675 should be used. UNORDERED_P is true when fucom should be used. */
8678 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8681 rtx cmp_op0, cmp_op1;
8682 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8686 cmp_op0 = operands[0];
8687 cmp_op1 = operands[1];
8691 cmp_op0 = operands[1];
8692 cmp_op1 = operands[2];
8697 if (GET_MODE (operands[0]) == SFmode)
8699 return "ucomiss\t{%1, %0|%0, %1}";
8701 return "comiss\t{%1, %0|%0, %1}";
8704 return "ucomisd\t{%1, %0|%0, %1}";
8706 return "comisd\t{%1, %0|%0, %1}";
8709 gcc_assert (STACK_TOP_P (cmp_op0));
8711 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8713 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8717 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8718 return output_387_ffreep (operands, 1);
8721 return "ftst\n\tfnstsw\t%0";
8724 if (STACK_REG_P (cmp_op1)
8726 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8727 && REGNO (cmp_op1) != FIRST_STACK_REG)
8729 /* If both the top of the 387 stack dies, and the other operand
8730 is also a stack register that dies, then this must be a
8731 `fcompp' float compare */
8735 /* There is no double popping fcomi variant. Fortunately,
8736 eflags is immune from the fstp's cc clobbering. */
8738 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8740 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8741 return output_387_ffreep (operands, 0);
8746 return "fucompp\n\tfnstsw\t%0";
8748 return "fcompp\n\tfnstsw\t%0";
8753 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8755 static const char * const alt[16] =
8757 "fcom%z2\t%y2\n\tfnstsw\t%0",
8758 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8759 "fucom%z2\t%y2\n\tfnstsw\t%0",
8760 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8762 "ficom%z2\t%y2\n\tfnstsw\t%0",
8763 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8767 "fcomi\t{%y1, %0|%0, %y1}",
8768 "fcomip\t{%y1, %0|%0, %y1}",
8769 "fucomi\t{%y1, %0|%0, %y1}",
8770 "fucomip\t{%y1, %0|%0, %y1}",
8781 mask = eflags_p << 3;
8782 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8783 mask |= unordered_p << 1;
8784 mask |= stack_top_dies;
8786 gcc_assert (mask < 16);
8795 ix86_output_addr_vec_elt (FILE *file, int value)
8797 const char *directive = ASM_LONG;
8801 directive = ASM_QUAD;
8803 gcc_assert (!TARGET_64BIT);
8806 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8810 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8813 fprintf (file, "%s%s%d-%s%d\n",
8814 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8815 else if (HAVE_AS_GOTOFF_IN_DATA)
8816 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8818 else if (TARGET_MACHO)
8820 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8821 machopic_output_function_base_name (file);
8822 fprintf(file, "\n");
8826 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8827 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8830 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8834 ix86_expand_clear (rtx dest)
8838 /* We play register width games, which are only valid after reload. */
8839 gcc_assert (reload_completed);
8841 /* Avoid HImode and its attendant prefix byte. */
8842 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8843 dest = gen_rtx_REG (SImode, REGNO (dest));
8845 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8847 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8848 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8850 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8851 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8857 /* X is an unchanging MEM. If it is a constant pool reference, return
8858 the constant pool rtx, else NULL. */
8861 maybe_get_pool_constant (rtx x)
8863 x = ix86_delegitimize_address (XEXP (x, 0));
8865 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8866 return get_pool_constant (x);
8872 ix86_expand_move (enum machine_mode mode, rtx operands[])
8874 int strict = (reload_in_progress || reload_completed);
8876 enum tls_model model;
8881 if (GET_CODE (op1) == SYMBOL_REF)
8883 model = SYMBOL_REF_TLS_MODEL (op1);
8886 op1 = legitimize_tls_address (op1, model, true);
8887 op1 = force_operand (op1, op0);
8892 else if (GET_CODE (op1) == CONST
8893 && GET_CODE (XEXP (op1, 0)) == PLUS
8894 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8896 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8899 rtx addend = XEXP (XEXP (op1, 0), 1);
8900 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8901 op1 = force_operand (op1, NULL);
8902 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8903 op0, 1, OPTAB_DIRECT);
8909 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8914 rtx temp = ((reload_in_progress
8915 || ((op0 && GET_CODE (op0) == REG)
8917 ? op0 : gen_reg_rtx (Pmode));
8918 op1 = machopic_indirect_data_reference (op1, temp);
8919 op1 = machopic_legitimize_pic_address (op1, mode,
8920 temp == op1 ? 0 : temp);
8922 else if (MACHOPIC_INDIRECT)
8923 op1 = machopic_indirect_data_reference (op1, 0);
8927 if (GET_CODE (op0) == MEM)
8928 op1 = force_reg (Pmode, op1);
8930 op1 = legitimize_address (op1, op1, Pmode);
8931 #endif /* TARGET_MACHO */
8935 if (GET_CODE (op0) == MEM
8936 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8937 || !push_operand (op0, mode))
8938 && GET_CODE (op1) == MEM)
8939 op1 = force_reg (mode, op1);
8941 if (push_operand (op0, mode)
8942 && ! general_no_elim_operand (op1, mode))
8943 op1 = copy_to_mode_reg (mode, op1);
8945 /* Force large constants in 64bit compilation into register
8946 to get them CSEed. */
8947 if (TARGET_64BIT && mode == DImode
8948 && immediate_operand (op1, mode)
8949 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8950 && !register_operand (op0, mode)
8951 && optimize && !reload_completed && !reload_in_progress)
8952 op1 = copy_to_mode_reg (mode, op1);
8954 if (FLOAT_MODE_P (mode))
8956 /* If we are loading a floating point constant to a register,
8957 force the value to memory now, since we'll get better code
8958 out the back end. */
8962 else if (GET_CODE (op1) == CONST_DOUBLE)
8964 op1 = validize_mem (force_const_mem (mode, op1));
8965 if (!register_operand (op0, mode))
8967 rtx temp = gen_reg_rtx (mode);
8968 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8969 emit_move_insn (op0, temp);
8976 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8980 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8982 rtx op0 = operands[0], op1 = operands[1];
8984 /* Force constants other than zero into memory. We do not know how
8985 the instructions used to build constants modify the upper 64 bits
8986 of the register, once we have that information we may be able
8987 to handle some of them more efficiently. */
8988 if ((reload_in_progress | reload_completed) == 0
8989 && register_operand (op0, mode)
8991 && standard_sse_constant_p (op1) <= 0)
8992 op1 = validize_mem (force_const_mem (mode, op1));
8994 /* Make operand1 a register if it isn't already. */
8996 && !register_operand (op0, mode)
8997 && !register_operand (op1, mode))
8999 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9003 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9006 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9007 straight to ix86_expand_vector_move. */
9010 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9019 /* If we're optimizing for size, movups is the smallest. */
9022 op0 = gen_lowpart (V4SFmode, op0);
9023 op1 = gen_lowpart (V4SFmode, op1);
9024 emit_insn (gen_sse_movups (op0, op1));
9028 /* ??? If we have typed data, then it would appear that using
9029 movdqu is the only way to get unaligned data loaded with
9031 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9033 op0 = gen_lowpart (V16QImode, op0);
9034 op1 = gen_lowpart (V16QImode, op1);
9035 emit_insn (gen_sse2_movdqu (op0, op1));
9039 if (TARGET_SSE2 && mode == V2DFmode)
9043 /* When SSE registers are split into halves, we can avoid
9044 writing to the top half twice. */
9045 if (TARGET_SSE_SPLIT_REGS)
9047 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9052 /* ??? Not sure about the best option for the Intel chips.
9053 The following would seem to satisfy; the register is
9054 entirely cleared, breaking the dependency chain. We
9055 then store to the upper half, with a dependency depth
9056 of one. A rumor has it that Intel recommends two movsd
9057 followed by an unpacklpd, but this is unconfirmed. And
9058 given that the dependency depth of the unpacklpd would
9059 still be one, I'm not sure why this would be better. */
9060 zero = CONST0_RTX (V2DFmode);
9063 m = adjust_address (op1, DFmode, 0);
9064 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9065 m = adjust_address (op1, DFmode, 8);
9066 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9070 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9071 emit_move_insn (op0, CONST0_RTX (mode));
9073 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9075 if (mode != V4SFmode)
9076 op0 = gen_lowpart (V4SFmode, op0);
9077 m = adjust_address (op1, V2SFmode, 0);
9078 emit_insn (gen_sse_loadlps (op0, op0, m));
9079 m = adjust_address (op1, V2SFmode, 8);
9080 emit_insn (gen_sse_loadhps (op0, op0, m));
9083 else if (MEM_P (op0))
9085 /* If we're optimizing for size, movups is the smallest. */
9088 op0 = gen_lowpart (V4SFmode, op0);
9089 op1 = gen_lowpart (V4SFmode, op1);
9090 emit_insn (gen_sse_movups (op0, op1));
9094 /* ??? Similar to above, only less clear because of quote
9095 typeless stores unquote. */
9096 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9097 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9099 op0 = gen_lowpart (V16QImode, op0);
9100 op1 = gen_lowpart (V16QImode, op1);
9101 emit_insn (gen_sse2_movdqu (op0, op1));
9105 if (TARGET_SSE2 && mode == V2DFmode)
9107 m = adjust_address (op0, DFmode, 0);
9108 emit_insn (gen_sse2_storelpd (m, op1));
9109 m = adjust_address (op0, DFmode, 8);
9110 emit_insn (gen_sse2_storehpd (m, op1));
9114 if (mode != V4SFmode)
9115 op1 = gen_lowpart (V4SFmode, op1);
9116 m = adjust_address (op0, V2SFmode, 0);
9117 emit_insn (gen_sse_storelps (m, op1));
9118 m = adjust_address (op0, V2SFmode, 8);
9119 emit_insn (gen_sse_storehps (m, op1));
9126 /* Expand a push in MODE. This is some mode for which we do not support
9127 proper push instructions, at least from the registers that we expect
9128 the value to live in. */
9131 ix86_expand_push (enum machine_mode mode, rtx x)
9135 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9136 GEN_INT (-GET_MODE_SIZE (mode)),
9137 stack_pointer_rtx, 1, OPTAB_DIRECT);
9138 if (tmp != stack_pointer_rtx)
9139 emit_move_insn (stack_pointer_rtx, tmp);
9141 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9142 emit_move_insn (tmp, x);
9145 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9146 destination to use for the operation. If different from the true
9147 destination in operands[0], a copy operation will be required. */
9150 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9153 int matching_memory;
9154 rtx src1, src2, dst;
9160 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9161 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9162 && (rtx_equal_p (dst, src2)
9163 || immediate_operand (src1, mode)))
9170 /* If the destination is memory, and we do not have matching source
9171 operands, do things in registers. */
9172 matching_memory = 0;
9173 if (GET_CODE (dst) == MEM)
9175 if (rtx_equal_p (dst, src1))
9176 matching_memory = 1;
9177 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9178 && rtx_equal_p (dst, src2))
9179 matching_memory = 2;
9181 dst = gen_reg_rtx (mode);
9184 /* Both source operands cannot be in memory. */
9185 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9187 if (matching_memory != 2)
9188 src2 = force_reg (mode, src2);
9190 src1 = force_reg (mode, src1);
9193 /* If the operation is not commutable, source 1 cannot be a constant
9194 or non-matching memory. */
9195 if ((CONSTANT_P (src1)
9196 || (!matching_memory && GET_CODE (src1) == MEM))
9197 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9198 src1 = force_reg (mode, src1);
9200 src1 = operands[1] = src1;
9201 src2 = operands[2] = src2;
9205 /* Similarly, but assume that the destination has already been
9209 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9210 enum machine_mode mode, rtx operands[])
9212 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9213 gcc_assert (dst == operands[0]);
9216 /* Attempt to expand a binary operator. Make the expansion closer to the
9217 actual machine, then just general_operand, which will allow 3 separate
9218 memory references (one output, two input) in a single insn. */
9221 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9224 rtx src1, src2, dst, op, clob;
9226 dst = ix86_fixup_binary_operands (code, mode, operands);
9230 /* Emit the instruction. */
9232 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9233 if (reload_in_progress)
9235 /* Reload doesn't know about the flags register, and doesn't know that
9236 it doesn't want to clobber it. We can only do this with PLUS. */
9237 gcc_assert (code == PLUS);
9242 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9243 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9246 /* Fix up the destination if needed. */
9247 if (dst != operands[0])
9248 emit_move_insn (operands[0], dst);
9251 /* Return TRUE or FALSE depending on whether the binary operator meets the
9252 appropriate constraints. */
9255 ix86_binary_operator_ok (enum rtx_code code,
9256 enum machine_mode mode ATTRIBUTE_UNUSED,
9259 /* Both source operands cannot be in memory. */
9260 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9262 /* If the operation is not commutable, source 1 cannot be a constant. */
9263 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9265 /* If the destination is memory, we must have a matching source operand. */
9266 if (GET_CODE (operands[0]) == MEM
9267 && ! (rtx_equal_p (operands[0], operands[1])
9268 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9269 && rtx_equal_p (operands[0], operands[2]))))
9271 /* If the operation is not commutable and the source 1 is memory, we must
9272 have a matching destination. */
9273 if (GET_CODE (operands[1]) == MEM
9274 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9275 && ! rtx_equal_p (operands[0], operands[1]))
9280 /* Attempt to expand a unary operator. Make the expansion closer to the
9281 actual machine, then just general_operand, which will allow 2 separate
9282 memory references (one output, one input) in a single insn. */
9285 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9288 int matching_memory;
9289 rtx src, dst, op, clob;
9294 /* If the destination is memory, and we do not have matching source
9295 operands, do things in registers. */
9296 matching_memory = 0;
9299 if (rtx_equal_p (dst, src))
9300 matching_memory = 1;
9302 dst = gen_reg_rtx (mode);
9305 /* When source operand is memory, destination must match. */
9306 if (MEM_P (src) && !matching_memory)
9307 src = force_reg (mode, src);
9309 /* Emit the instruction. */
9311 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9312 if (reload_in_progress || code == NOT)
9314 /* Reload doesn't know about the flags register, and doesn't know that
9315 it doesn't want to clobber it. */
9316 gcc_assert (code == NOT);
9321 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9322 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9325 /* Fix up the destination if needed. */
9326 if (dst != operands[0])
9327 emit_move_insn (operands[0], dst);
9330 /* Return TRUE or FALSE depending on whether the unary operator meets the
9331 appropriate constraints. */
9334 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9335 enum machine_mode mode ATTRIBUTE_UNUSED,
9336 rtx operands[2] ATTRIBUTE_UNUSED)
9338 /* If one of operands is memory, source and destination must match. */
9339 if ((GET_CODE (operands[0]) == MEM
9340 || GET_CODE (operands[1]) == MEM)
9341 && ! rtx_equal_p (operands[0], operands[1]))
9346 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9347 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9348 true, then replicate the mask for all elements of the vector register.
9349 If INVERT is true, then create a mask excluding the sign bit. */
9352 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9354 enum machine_mode vec_mode;
9355 HOST_WIDE_INT hi, lo;
9360 /* Find the sign bit, sign extended to 2*HWI. */
9362 lo = 0x80000000, hi = lo < 0;
9363 else if (HOST_BITS_PER_WIDE_INT >= 64)
9364 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9366 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9371 /* Force this value into the low part of a fp vector constant. */
9372 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9373 mask = gen_lowpart (mode, mask);
9378 v = gen_rtvec (4, mask, mask, mask, mask);
9380 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9381 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9382 vec_mode = V4SFmode;
9387 v = gen_rtvec (2, mask, mask);
9389 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9390 vec_mode = V2DFmode;
9393 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9396 /* Generate code for floating point ABS or NEG. */
9399 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9402 rtx mask, set, use, clob, dst, src;
9403 bool matching_memory;
9404 bool use_sse = false;
9405 bool vector_mode = VECTOR_MODE_P (mode);
9406 enum machine_mode elt_mode = mode;
9410 elt_mode = GET_MODE_INNER (mode);
9413 else if (TARGET_SSE_MATH)
9414 use_sse = SSE_FLOAT_MODE_P (mode);
9416 /* NEG and ABS performed with SSE use bitwise mask operations.
9417 Create the appropriate mask now. */
9419 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9426 /* If the destination is memory, and we don't have matching source
9427 operands or we're using the x87, do things in registers. */
9428 matching_memory = false;
9431 if (use_sse && rtx_equal_p (dst, src))
9432 matching_memory = true;
9434 dst = gen_reg_rtx (mode);
9436 if (MEM_P (src) && !matching_memory)
9437 src = force_reg (mode, src);
9441 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9442 set = gen_rtx_SET (VOIDmode, dst, set);
9447 set = gen_rtx_fmt_e (code, mode, src);
9448 set = gen_rtx_SET (VOIDmode, dst, set);
9451 use = gen_rtx_USE (VOIDmode, mask);
9452 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9453 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9454 gen_rtvec (3, set, use, clob)));
9460 if (dst != operands[0])
9461 emit_move_insn (operands[0], dst);
9464 /* Expand a copysign operation. Special case operand 0 being a constant. */
9467 ix86_expand_copysign (rtx operands[])
9469 enum machine_mode mode, vmode;
9470 rtx dest, op0, op1, mask, nmask;
9476 mode = GET_MODE (dest);
9477 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9479 if (GET_CODE (op0) == CONST_DOUBLE)
9483 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9484 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9486 if (op0 == CONST0_RTX (mode))
9487 op0 = CONST0_RTX (vmode);
9491 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9492 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9494 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9495 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9498 mask = ix86_build_signbit_mask (mode, 0, 0);
9501 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9503 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9507 nmask = ix86_build_signbit_mask (mode, 0, 1);
9508 mask = ix86_build_signbit_mask (mode, 0, 0);
9511 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9513 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9517 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9518 be a constant, and so has already been expanded into a vector constant. */
9521 ix86_split_copysign_const (rtx operands[])
9523 enum machine_mode mode, vmode;
9524 rtx dest, op0, op1, mask, x;
9531 mode = GET_MODE (dest);
9532 vmode = GET_MODE (mask);
9534 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9535 x = gen_rtx_AND (vmode, dest, mask);
9536 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9538 if (op0 != CONST0_RTX (vmode))
9540 x = gen_rtx_IOR (vmode, dest, op0);
9541 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9545 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9546 so we have to do two masks. */
9549 ix86_split_copysign_var (rtx operands[])
9551 enum machine_mode mode, vmode;
9552 rtx dest, scratch, op0, op1, mask, nmask, x;
9555 scratch = operands[1];
9558 nmask = operands[4];
9561 mode = GET_MODE (dest);
9562 vmode = GET_MODE (mask);
9564 if (rtx_equal_p (op0, op1))
9566 /* Shouldn't happen often (it's useless, obviously), but when it does
9567 we'd generate incorrect code if we continue below. */
9568 emit_move_insn (dest, op0);
9572 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9574 gcc_assert (REGNO (op1) == REGNO (scratch));
9576 x = gen_rtx_AND (vmode, scratch, mask);
9577 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9580 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9581 x = gen_rtx_NOT (vmode, dest);
9582 x = gen_rtx_AND (vmode, x, op0);
9583 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9587 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9589 x = gen_rtx_AND (vmode, scratch, mask);
9591 else /* alternative 2,4 */
9593 gcc_assert (REGNO (mask) == REGNO (scratch));
9594 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9595 x = gen_rtx_AND (vmode, scratch, op1);
9597 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9599 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9601 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9602 x = gen_rtx_AND (vmode, dest, nmask);
9604 else /* alternative 3,4 */
9606 gcc_assert (REGNO (nmask) == REGNO (dest));
9608 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9609 x = gen_rtx_AND (vmode, dest, op0);
9611 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9614 x = gen_rtx_IOR (vmode, dest, scratch);
9615 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9618 /* Return TRUE or FALSE depending on whether the first SET in INSN
9619 has source and destination with matching CC modes, and that the
9620 CC mode is at least as constrained as REQ_MODE. */
9623 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9626 enum machine_mode set_mode;
9628 set = PATTERN (insn);
9629 if (GET_CODE (set) == PARALLEL)
9630 set = XVECEXP (set, 0, 0);
9631 gcc_assert (GET_CODE (set) == SET);
9632 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9634 set_mode = GET_MODE (SET_DEST (set));
9638 if (req_mode != CCNOmode
9639 && (req_mode != CCmode
9640 || XEXP (SET_SRC (set), 1) != const0_rtx))
9644 if (req_mode == CCGCmode)
9648 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9652 if (req_mode == CCZmode)
9662 return (GET_MODE (SET_SRC (set)) == set_mode);
9665 /* Generate insn patterns to do an integer compare of OPERANDS. */
9668 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9670 enum machine_mode cmpmode;
9673 cmpmode = SELECT_CC_MODE (code, op0, op1);
9674 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9676 /* This is very simple, but making the interface the same as in the
9677 FP case makes the rest of the code easier. */
9678 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9679 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9681 /* Return the test that should be put into the flags user, i.e.
9682 the bcc, scc, or cmov instruction. */
9683 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9686 /* Figure out whether to use ordered or unordered fp comparisons.
9687 Return the appropriate mode to use. */
9690 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9692 /* ??? In order to make all comparisons reversible, we do all comparisons
9693 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9694 all forms trapping and nontrapping comparisons, we can make inequality
9695 comparisons trapping again, since it results in better code when using
9696 FCOM based compares. */
9697 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9701 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9703 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9704 return ix86_fp_compare_mode (code);
9707 /* Only zero flag is needed. */
9709 case NE: /* ZF!=0 */
9711 /* Codes needing carry flag. */
9712 case GEU: /* CF=0 */
9713 case GTU: /* CF=0 & ZF=0 */
9714 case LTU: /* CF=1 */
9715 case LEU: /* CF=1 | ZF=1 */
9717 /* Codes possibly doable only with sign flag when
9718 comparing against zero. */
9719 case GE: /* SF=OF or SF=0 */
9720 case LT: /* SF<>OF or SF=1 */
9721 if (op1 == const0_rtx)
9724 /* For other cases Carry flag is not required. */
9726 /* Codes doable only with sign flag when comparing
9727 against zero, but we miss jump instruction for it
9728 so we need to use relational tests against overflow
9729 that thus needs to be zero. */
9730 case GT: /* ZF=0 & SF=OF */
9731 case LE: /* ZF=1 | SF<>OF */
9732 if (op1 == const0_rtx)
9736 /* strcmp pattern do (use flags) and combine may ask us for proper
9745 /* Return the fixed registers used for condition codes. */
9748 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9755 /* If two condition code modes are compatible, return a condition code
9756 mode which is compatible with both. Otherwise, return
9759 static enum machine_mode
9760 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9765 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9768 if ((m1 == CCGCmode && m2 == CCGOCmode)
9769 || (m1 == CCGOCmode && m2 == CCGCmode))
9797 /* These are only compatible with themselves, which we already
9803 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9806 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9808 enum rtx_code swapped_code = swap_condition (code);
9809 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9810 || (ix86_fp_comparison_cost (swapped_code)
9811 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9814 /* Swap, force into registers, or otherwise massage the two operands
9815 to a fp comparison. The operands are updated in place; the new
9816 comparison code is returned. */
9818 static enum rtx_code
9819 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9821 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9822 rtx op0 = *pop0, op1 = *pop1;
9823 enum machine_mode op_mode = GET_MODE (op0);
9824 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9826 /* All of the unordered compare instructions only work on registers.
9827 The same is true of the fcomi compare instructions. The XFmode
9828 compare instructions require registers except when comparing
9829 against zero or when converting operand 1 from fixed point to
9833 && (fpcmp_mode == CCFPUmode
9834 || (op_mode == XFmode
9835 && ! (standard_80387_constant_p (op0) == 1
9836 || standard_80387_constant_p (op1) == 1)
9837 && GET_CODE (op1) != FLOAT)
9838 || ix86_use_fcomi_compare (code)))
9840 op0 = force_reg (op_mode, op0);
9841 op1 = force_reg (op_mode, op1);
9845 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9846 things around if they appear profitable, otherwise force op0
9849 if (standard_80387_constant_p (op0) == 0
9850 || (GET_CODE (op0) == MEM
9851 && ! (standard_80387_constant_p (op1) == 0
9852 || GET_CODE (op1) == MEM)))
9855 tmp = op0, op0 = op1, op1 = tmp;
9856 code = swap_condition (code);
9859 if (GET_CODE (op0) != REG)
9860 op0 = force_reg (op_mode, op0);
9862 if (CONSTANT_P (op1))
9864 int tmp = standard_80387_constant_p (op1);
9866 op1 = validize_mem (force_const_mem (op_mode, op1));
9870 op1 = force_reg (op_mode, op1);
9873 op1 = force_reg (op_mode, op1);
9877 /* Try to rearrange the comparison to make it cheaper. */
9878 if (ix86_fp_comparison_cost (code)
9879 > ix86_fp_comparison_cost (swap_condition (code))
9880 && (GET_CODE (op1) == REG || !no_new_pseudos))
9883 tmp = op0, op0 = op1, op1 = tmp;
9884 code = swap_condition (code);
9885 if (GET_CODE (op0) != REG)
9886 op0 = force_reg (op_mode, op0);
9894 /* Convert comparison codes we use to represent FP comparison to integer
9895 code that will result in proper branch. Return UNKNOWN if no such code
9899 ix86_fp_compare_code_to_integer (enum rtx_code code)
9928 /* Split comparison code CODE into comparisons we can do using branch
9929 instructions. BYPASS_CODE is comparison code for branch that will
9930 branch around FIRST_CODE and SECOND_CODE. If some of branches
9931 is not required, set value to UNKNOWN.
9932 We never require more than two branches. */
9935 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9936 enum rtx_code *first_code,
9937 enum rtx_code *second_code)
9940 *bypass_code = UNKNOWN;
9941 *second_code = UNKNOWN;
9943 /* The fcomi comparison sets flags as follows:
9953 case GT: /* GTU - CF=0 & ZF=0 */
9954 case GE: /* GEU - CF=0 */
9955 case ORDERED: /* PF=0 */
9956 case UNORDERED: /* PF=1 */
9957 case UNEQ: /* EQ - ZF=1 */
9958 case UNLT: /* LTU - CF=1 */
9959 case UNLE: /* LEU - CF=1 | ZF=1 */
9960 case LTGT: /* EQ - ZF=0 */
9962 case LT: /* LTU - CF=1 - fails on unordered */
9964 *bypass_code = UNORDERED;
9966 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9968 *bypass_code = UNORDERED;
9970 case EQ: /* EQ - ZF=1 - fails on unordered */
9972 *bypass_code = UNORDERED;
9974 case NE: /* NE - ZF=0 - fails on unordered */
9976 *second_code = UNORDERED;
9978 case UNGE: /* GEU - CF=0 - fails on unordered */
9980 *second_code = UNORDERED;
9982 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9984 *second_code = UNORDERED;
9989 if (!TARGET_IEEE_FP)
9991 *second_code = UNKNOWN;
9992 *bypass_code = UNKNOWN;
9996 /* Return cost of comparison done fcom + arithmetics operations on AX.
9997 All following functions do use number of instructions as a cost metrics.
9998 In future this should be tweaked to compute bytes for optimize_size and
9999 take into account performance of various instructions on various CPUs. */
10001 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10003 if (!TARGET_IEEE_FP)
10005 /* The cost of code output by ix86_expand_fp_compare. */
10029 gcc_unreachable ();
10033 /* Return cost of comparison done using fcomi operation.
10034 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10036 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10038 enum rtx_code bypass_code, first_code, second_code;
10039 /* Return arbitrarily high cost when instruction is not supported - this
10040 prevents gcc from using it. */
10043 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10044 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10047 /* Return cost of comparison done using sahf operation.
10048 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10050 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10052 enum rtx_code bypass_code, first_code, second_code;
10053 /* Return arbitrarily high cost when instruction is not preferred - this
10054 avoids gcc from using it. */
10055 if (!TARGET_USE_SAHF && !optimize_size)
10057 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10058 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10061 /* Compute cost of the comparison done using any method.
10062 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10064 ix86_fp_comparison_cost (enum rtx_code code)
10066 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10069 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10070 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10072 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10073 if (min > sahf_cost)
10075 if (min > fcomi_cost)
10080 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10083 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10084 rtx *second_test, rtx *bypass_test)
10086 enum machine_mode fpcmp_mode, intcmp_mode;
10088 int cost = ix86_fp_comparison_cost (code);
10089 enum rtx_code bypass_code, first_code, second_code;
10091 fpcmp_mode = ix86_fp_compare_mode (code);
10092 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10095 *second_test = NULL_RTX;
10097 *bypass_test = NULL_RTX;
10099 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10101 /* Do fcomi/sahf based test when profitable. */
10102 if ((bypass_code == UNKNOWN || bypass_test)
10103 && (second_code == UNKNOWN || second_test)
10104 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10108 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10109 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10115 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10116 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10118 scratch = gen_reg_rtx (HImode);
10119 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10120 emit_insn (gen_x86_sahf_1 (scratch));
10123 /* The FP codes work out to act like unsigned. */
10124 intcmp_mode = fpcmp_mode;
10126 if (bypass_code != UNKNOWN)
10127 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10128 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10130 if (second_code != UNKNOWN)
10131 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10132 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10137 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10138 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10139 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10141 scratch = gen_reg_rtx (HImode);
10142 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10144 /* In the unordered case, we have to check C2 for NaN's, which
10145 doesn't happen to work out to anything nice combination-wise.
10146 So do some bit twiddling on the value we've got in AH to come
10147 up with an appropriate set of condition codes. */
10149 intcmp_mode = CCNOmode;
10154 if (code == GT || !TARGET_IEEE_FP)
10156 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10161 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10162 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10163 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10164 intcmp_mode = CCmode;
10170 if (code == LT && TARGET_IEEE_FP)
10172 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10173 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10174 intcmp_mode = CCmode;
10179 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10185 if (code == GE || !TARGET_IEEE_FP)
10187 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10192 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10193 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10200 if (code == LE && TARGET_IEEE_FP)
10202 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10203 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10204 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10205 intcmp_mode = CCmode;
10210 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10216 if (code == EQ && TARGET_IEEE_FP)
10218 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10219 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10220 intcmp_mode = CCmode;
10225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10232 if (code == NE && TARGET_IEEE_FP)
10234 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10235 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10247 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10251 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10256 gcc_unreachable ();
10260 /* Return the test that should be put into the flags user, i.e.
10261 the bcc, scc, or cmov instruction. */
10262 return gen_rtx_fmt_ee (code, VOIDmode,
10263 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10268 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10271 op0 = ix86_compare_op0;
10272 op1 = ix86_compare_op1;
10275 *second_test = NULL_RTX;
10277 *bypass_test = NULL_RTX;
10279 if (ix86_compare_emitted)
10281 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10282 ix86_compare_emitted = NULL_RTX;
10284 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10285 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10286 second_test, bypass_test);
10288 ret = ix86_expand_int_compare (code, op0, op1);
10293 /* Return true if the CODE will result in nontrivial jump sequence. */
10295 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10297 enum rtx_code bypass_code, first_code, second_code;
10300 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10301 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10305 ix86_expand_branch (enum rtx_code code, rtx label)
10309 /* If we have emitted a compare insn, go straight to simple.
10310 ix86_expand_compare won't emit anything if ix86_compare_emitted
10312 if (ix86_compare_emitted)
10315 switch (GET_MODE (ix86_compare_op0))
10321 tmp = ix86_expand_compare (code, NULL, NULL);
10322 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10323 gen_rtx_LABEL_REF (VOIDmode, label),
10325 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10334 enum rtx_code bypass_code, first_code, second_code;
10336 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10337 &ix86_compare_op1);
10339 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10341 /* Check whether we will use the natural sequence with one jump. If
10342 so, we can expand jump early. Otherwise delay expansion by
10343 creating compound insn to not confuse optimizers. */
10344 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10347 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10348 gen_rtx_LABEL_REF (VOIDmode, label),
10349 pc_rtx, NULL_RTX, NULL_RTX);
10353 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10354 ix86_compare_op0, ix86_compare_op1);
10355 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10356 gen_rtx_LABEL_REF (VOIDmode, label),
10358 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10360 use_fcomi = ix86_use_fcomi_compare (code);
10361 vec = rtvec_alloc (3 + !use_fcomi);
10362 RTVEC_ELT (vec, 0) = tmp;
10364 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10366 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10369 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10371 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10380 /* Expand DImode branch into multiple compare+branch. */
10382 rtx lo[2], hi[2], label2;
10383 enum rtx_code code1, code2, code3;
10384 enum machine_mode submode;
10386 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10388 tmp = ix86_compare_op0;
10389 ix86_compare_op0 = ix86_compare_op1;
10390 ix86_compare_op1 = tmp;
10391 code = swap_condition (code);
10393 if (GET_MODE (ix86_compare_op0) == DImode)
10395 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10396 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10401 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10402 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10406 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10407 avoid two branches. This costs one extra insn, so disable when
10408 optimizing for size. */
10410 if ((code == EQ || code == NE)
10412 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10417 if (hi[1] != const0_rtx)
10418 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10419 NULL_RTX, 0, OPTAB_WIDEN);
10422 if (lo[1] != const0_rtx)
10423 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10424 NULL_RTX, 0, OPTAB_WIDEN);
10426 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10427 NULL_RTX, 0, OPTAB_WIDEN);
10429 ix86_compare_op0 = tmp;
10430 ix86_compare_op1 = const0_rtx;
10431 ix86_expand_branch (code, label);
10435 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10436 op1 is a constant and the low word is zero, then we can just
10437 examine the high word. */
10439 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10442 case LT: case LTU: case GE: case GEU:
10443 ix86_compare_op0 = hi[0];
10444 ix86_compare_op1 = hi[1];
10445 ix86_expand_branch (code, label);
10451 /* Otherwise, we need two or three jumps. */
10453 label2 = gen_label_rtx ();
10456 code2 = swap_condition (code);
10457 code3 = unsigned_condition (code);
10461 case LT: case GT: case LTU: case GTU:
10464 case LE: code1 = LT; code2 = GT; break;
10465 case GE: code1 = GT; code2 = LT; break;
10466 case LEU: code1 = LTU; code2 = GTU; break;
10467 case GEU: code1 = GTU; code2 = LTU; break;
10469 case EQ: code1 = UNKNOWN; code2 = NE; break;
10470 case NE: code2 = UNKNOWN; break;
10473 gcc_unreachable ();
10478 * if (hi(a) < hi(b)) goto true;
10479 * if (hi(a) > hi(b)) goto false;
10480 * if (lo(a) < lo(b)) goto true;
10484 ix86_compare_op0 = hi[0];
10485 ix86_compare_op1 = hi[1];
10487 if (code1 != UNKNOWN)
10488 ix86_expand_branch (code1, label);
10489 if (code2 != UNKNOWN)
10490 ix86_expand_branch (code2, label2);
10492 ix86_compare_op0 = lo[0];
10493 ix86_compare_op1 = lo[1];
10494 ix86_expand_branch (code3, label);
10496 if (code2 != UNKNOWN)
10497 emit_label (label2);
10502 gcc_unreachable ();
10506 /* Split branch based on floating point condition. */
10508 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10509 rtx target1, rtx target2, rtx tmp, rtx pushed)
10511 rtx second, bypass;
10512 rtx label = NULL_RTX;
10514 int bypass_probability = -1, second_probability = -1, probability = -1;
10517 if (target2 != pc_rtx)
10520 code = reverse_condition_maybe_unordered (code);
10525 condition = ix86_expand_fp_compare (code, op1, op2,
10526 tmp, &second, &bypass);
10528 /* Remove pushed operand from stack. */
10530 ix86_free_from_memory (GET_MODE (pushed));
10532 if (split_branch_probability >= 0)
10534 /* Distribute the probabilities across the jumps.
10535 Assume the BYPASS and SECOND to be always test
10537 probability = split_branch_probability;
10539 /* Value of 1 is low enough to make no need for probability
10540 to be updated. Later we may run some experiments and see
10541 if unordered values are more frequent in practice. */
10543 bypass_probability = 1;
10545 second_probability = 1;
10547 if (bypass != NULL_RTX)
10549 label = gen_label_rtx ();
10550 i = emit_jump_insn (gen_rtx_SET
10552 gen_rtx_IF_THEN_ELSE (VOIDmode,
10554 gen_rtx_LABEL_REF (VOIDmode,
10557 if (bypass_probability >= 0)
10559 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10560 GEN_INT (bypass_probability),
10563 i = emit_jump_insn (gen_rtx_SET
10565 gen_rtx_IF_THEN_ELSE (VOIDmode,
10566 condition, target1, target2)));
10567 if (probability >= 0)
10569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10570 GEN_INT (probability),
10572 if (second != NULL_RTX)
10574 i = emit_jump_insn (gen_rtx_SET
10576 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10578 if (second_probability >= 0)
10580 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10581 GEN_INT (second_probability),
10584 if (label != NULL_RTX)
10585 emit_label (label);
10589 ix86_expand_setcc (enum rtx_code code, rtx dest)
10591 rtx ret, tmp, tmpreg, equiv;
10592 rtx second_test, bypass_test;
10594 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10595 return 0; /* FAIL */
10597 gcc_assert (GET_MODE (dest) == QImode);
10599 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10600 PUT_MODE (ret, QImode);
10605 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10606 if (bypass_test || second_test)
10608 rtx test = second_test;
10610 rtx tmp2 = gen_reg_rtx (QImode);
10613 gcc_assert (!second_test);
10614 test = bypass_test;
10616 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10618 PUT_MODE (test, QImode);
10619 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10622 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10624 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10627 /* Attach a REG_EQUAL note describing the comparison result. */
10628 if (ix86_compare_op0 && ix86_compare_op1)
10630 equiv = simplify_gen_relational (code, QImode,
10631 GET_MODE (ix86_compare_op0),
10632 ix86_compare_op0, ix86_compare_op1);
10633 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10636 return 1; /* DONE */
10639 /* Expand comparison setting or clearing carry flag. Return true when
10640 successful and set pop for the operation. */
10642 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10644 enum machine_mode mode =
10645 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10647 /* Do not handle DImode compares that go through special path. Also we can't
10648 deal with FP compares yet. This is possible to add. */
10649 if (mode == (TARGET_64BIT ? TImode : DImode))
10651 if (FLOAT_MODE_P (mode))
10653 rtx second_test = NULL, bypass_test = NULL;
10654 rtx compare_op, compare_seq;
10656 /* Shortcut: following common codes never translate into carry flag compares. */
10657 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10658 || code == ORDERED || code == UNORDERED)
10661 /* These comparisons require zero flag; swap operands so they won't. */
10662 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10663 && !TARGET_IEEE_FP)
10668 code = swap_condition (code);
10671 /* Try to expand the comparison and verify that we end up with carry flag
10672 based comparison. This is fails to be true only when we decide to expand
10673 comparison using arithmetic that is not too common scenario. */
10675 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10676 &second_test, &bypass_test);
10677 compare_seq = get_insns ();
10680 if (second_test || bypass_test)
10682 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10683 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10684 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10686 code = GET_CODE (compare_op);
10687 if (code != LTU && code != GEU)
10689 emit_insn (compare_seq);
10693 if (!INTEGRAL_MODE_P (mode))
10701 /* Convert a==0 into (unsigned)a<1. */
10704 if (op1 != const0_rtx)
10707 code = (code == EQ ? LTU : GEU);
10710 /* Convert a>b into b<a or a>=b-1. */
10713 if (GET_CODE (op1) == CONST_INT)
10715 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10716 /* Bail out on overflow. We still can swap operands but that
10717 would force loading of the constant into register. */
10718 if (op1 == const0_rtx
10719 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10721 code = (code == GTU ? GEU : LTU);
10728 code = (code == GTU ? LTU : GEU);
10732 /* Convert a>=0 into (unsigned)a<0x80000000. */
10735 if (mode == DImode || op1 != const0_rtx)
10737 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10738 code = (code == LT ? GEU : LTU);
10742 if (mode == DImode || op1 != constm1_rtx)
10744 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10745 code = (code == LE ? GEU : LTU);
10751 /* Swapping operands may cause constant to appear as first operand. */
10752 if (!nonimmediate_operand (op0, VOIDmode))
10754 if (no_new_pseudos)
10756 op0 = force_reg (mode, op0);
10758 ix86_compare_op0 = op0;
10759 ix86_compare_op1 = op1;
10760 *pop = ix86_expand_compare (code, NULL, NULL);
10761 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10766 ix86_expand_int_movcc (rtx operands[])
10768 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10769 rtx compare_seq, compare_op;
10770 rtx second_test, bypass_test;
10771 enum machine_mode mode = GET_MODE (operands[0]);
10772 bool sign_bit_compare_p = false;;
10775 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10776 compare_seq = get_insns ();
10779 compare_code = GET_CODE (compare_op);
10781 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10782 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10783 sign_bit_compare_p = true;
10785 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10786 HImode insns, we'd be swallowed in word prefix ops. */
10788 if ((mode != HImode || TARGET_FAST_PREFIX)
10789 && (mode != (TARGET_64BIT ? TImode : DImode))
10790 && GET_CODE (operands[2]) == CONST_INT
10791 && GET_CODE (operands[3]) == CONST_INT)
10793 rtx out = operands[0];
10794 HOST_WIDE_INT ct = INTVAL (operands[2]);
10795 HOST_WIDE_INT cf = INTVAL (operands[3]);
10796 HOST_WIDE_INT diff;
10799 /* Sign bit compares are better done using shifts than we do by using
10801 if (sign_bit_compare_p
10802 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10803 ix86_compare_op1, &compare_op))
10805 /* Detect overlap between destination and compare sources. */
10808 if (!sign_bit_compare_p)
10810 bool fpcmp = false;
10812 compare_code = GET_CODE (compare_op);
10814 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10815 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10818 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10821 /* To simplify rest of code, restrict to the GEU case. */
10822 if (compare_code == LTU)
10824 HOST_WIDE_INT tmp = ct;
10827 compare_code = reverse_condition (compare_code);
10828 code = reverse_condition (code);
10833 PUT_CODE (compare_op,
10834 reverse_condition_maybe_unordered
10835 (GET_CODE (compare_op)));
10837 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10841 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10842 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10843 tmp = gen_reg_rtx (mode);
10845 if (mode == DImode)
10846 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10848 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10852 if (code == GT || code == GE)
10853 code = reverse_condition (code);
10856 HOST_WIDE_INT tmp = ct;
10861 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10862 ix86_compare_op1, VOIDmode, 0, -1);
10875 tmp = expand_simple_binop (mode, PLUS,
10877 copy_rtx (tmp), 1, OPTAB_DIRECT);
10888 tmp = expand_simple_binop (mode, IOR,
10890 copy_rtx (tmp), 1, OPTAB_DIRECT);
10892 else if (diff == -1 && ct)
10902 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10904 tmp = expand_simple_binop (mode, PLUS,
10905 copy_rtx (tmp), GEN_INT (cf),
10906 copy_rtx (tmp), 1, OPTAB_DIRECT);
10914 * andl cf - ct, dest
10924 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10927 tmp = expand_simple_binop (mode, AND,
10929 gen_int_mode (cf - ct, mode),
10930 copy_rtx (tmp), 1, OPTAB_DIRECT);
10932 tmp = expand_simple_binop (mode, PLUS,
10933 copy_rtx (tmp), GEN_INT (ct),
10934 copy_rtx (tmp), 1, OPTAB_DIRECT);
10937 if (!rtx_equal_p (tmp, out))
10938 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10940 return 1; /* DONE */
10946 tmp = ct, ct = cf, cf = tmp;
10948 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10950 /* We may be reversing unordered compare to normal compare, that
10951 is not valid in general (we may convert non-trapping condition
10952 to trapping one), however on i386 we currently emit all
10953 comparisons unordered. */
10954 compare_code = reverse_condition_maybe_unordered (compare_code);
10955 code = reverse_condition_maybe_unordered (code);
10959 compare_code = reverse_condition (compare_code);
10960 code = reverse_condition (code);
10964 compare_code = UNKNOWN;
10965 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10966 && GET_CODE (ix86_compare_op1) == CONST_INT)
10968 if (ix86_compare_op1 == const0_rtx
10969 && (code == LT || code == GE))
10970 compare_code = code;
10971 else if (ix86_compare_op1 == constm1_rtx)
10975 else if (code == GT)
10980 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10981 if (compare_code != UNKNOWN
10982 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10983 && (cf == -1 || ct == -1))
10985 /* If lea code below could be used, only optimize
10986 if it results in a 2 insn sequence. */
10988 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10989 || diff == 3 || diff == 5 || diff == 9)
10990 || (compare_code == LT && ct == -1)
10991 || (compare_code == GE && cf == -1))
10994 * notl op1 (if necessary)
11002 code = reverse_condition (code);
11005 out = emit_store_flag (out, code, ix86_compare_op0,
11006 ix86_compare_op1, VOIDmode, 0, -1);
11008 out = expand_simple_binop (mode, IOR,
11010 out, 1, OPTAB_DIRECT);
11011 if (out != operands[0])
11012 emit_move_insn (operands[0], out);
11014 return 1; /* DONE */
11019 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11020 || diff == 3 || diff == 5 || diff == 9)
11021 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11023 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11029 * lea cf(dest*(ct-cf)),dest
11033 * This also catches the degenerate setcc-only case.
11039 out = emit_store_flag (out, code, ix86_compare_op0,
11040 ix86_compare_op1, VOIDmode, 0, 1);
11043 /* On x86_64 the lea instruction operates on Pmode, so we need
11044 to get arithmetics done in proper mode to match. */
11046 tmp = copy_rtx (out);
11050 out1 = copy_rtx (out);
11051 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11055 tmp = gen_rtx_PLUS (mode, tmp, out1);
11061 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11064 if (!rtx_equal_p (tmp, out))
11067 out = force_operand (tmp, copy_rtx (out));
11069 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11071 if (!rtx_equal_p (out, operands[0]))
11072 emit_move_insn (operands[0], copy_rtx (out));
11074 return 1; /* DONE */
11078 * General case: Jumpful:
11079 * xorl dest,dest cmpl op1, op2
11080 * cmpl op1, op2 movl ct, dest
11081 * setcc dest jcc 1f
11082 * decl dest movl cf, dest
11083 * andl (cf-ct),dest 1:
11086 * Size 20. Size 14.
11088 * This is reasonably steep, but branch mispredict costs are
11089 * high on modern cpus, so consider failing only if optimizing
11093 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11094 && BRANCH_COST >= 2)
11100 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11101 /* We may be reversing unordered compare to normal compare,
11102 that is not valid in general (we may convert non-trapping
11103 condition to trapping one), however on i386 we currently
11104 emit all comparisons unordered. */
11105 code = reverse_condition_maybe_unordered (code);
11108 code = reverse_condition (code);
11109 if (compare_code != UNKNOWN)
11110 compare_code = reverse_condition (compare_code);
11114 if (compare_code != UNKNOWN)
11116 /* notl op1 (if needed)
11121 For x < 0 (resp. x <= -1) there will be no notl,
11122 so if possible swap the constants to get rid of the
11124 True/false will be -1/0 while code below (store flag
11125 followed by decrement) is 0/-1, so the constants need
11126 to be exchanged once more. */
11128 if (compare_code == GE || !cf)
11130 code = reverse_condition (code);
11135 HOST_WIDE_INT tmp = cf;
11140 out = emit_store_flag (out, code, ix86_compare_op0,
11141 ix86_compare_op1, VOIDmode, 0, -1);
11145 out = emit_store_flag (out, code, ix86_compare_op0,
11146 ix86_compare_op1, VOIDmode, 0, 1);
11148 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11149 copy_rtx (out), 1, OPTAB_DIRECT);
11152 out = expand_simple_binop (mode, AND, copy_rtx (out),
11153 gen_int_mode (cf - ct, mode),
11154 copy_rtx (out), 1, OPTAB_DIRECT);
11156 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11157 copy_rtx (out), 1, OPTAB_DIRECT);
11158 if (!rtx_equal_p (out, operands[0]))
11159 emit_move_insn (operands[0], copy_rtx (out));
11161 return 1; /* DONE */
11165 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11167 /* Try a few things more with specific constants and a variable. */
11170 rtx var, orig_out, out, tmp;
11172 if (BRANCH_COST <= 2)
11173 return 0; /* FAIL */
11175 /* If one of the two operands is an interesting constant, load a
11176 constant with the above and mask it in with a logical operation. */
11178 if (GET_CODE (operands[2]) == CONST_INT)
11181 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11182 operands[3] = constm1_rtx, op = and_optab;
11183 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11184 operands[3] = const0_rtx, op = ior_optab;
11186 return 0; /* FAIL */
11188 else if (GET_CODE (operands[3]) == CONST_INT)
11191 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11192 operands[2] = constm1_rtx, op = and_optab;
11193 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11194 operands[2] = const0_rtx, op = ior_optab;
11196 return 0; /* FAIL */
11199 return 0; /* FAIL */
11201 orig_out = operands[0];
11202 tmp = gen_reg_rtx (mode);
11205 /* Recurse to get the constant loaded. */
11206 if (ix86_expand_int_movcc (operands) == 0)
11207 return 0; /* FAIL */
11209 /* Mask in the interesting variable. */
11210 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11212 if (!rtx_equal_p (out, orig_out))
11213 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11215 return 1; /* DONE */
11219 * For comparison with above,
11229 if (! nonimmediate_operand (operands[2], mode))
11230 operands[2] = force_reg (mode, operands[2]);
11231 if (! nonimmediate_operand (operands[3], mode))
11232 operands[3] = force_reg (mode, operands[3]);
11234 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11236 rtx tmp = gen_reg_rtx (mode);
11237 emit_move_insn (tmp, operands[3]);
11240 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11242 rtx tmp = gen_reg_rtx (mode);
11243 emit_move_insn (tmp, operands[2]);
11247 if (! register_operand (operands[2], VOIDmode)
11249 || ! register_operand (operands[3], VOIDmode)))
11250 operands[2] = force_reg (mode, operands[2]);
11253 && ! register_operand (operands[3], VOIDmode))
11254 operands[3] = force_reg (mode, operands[3]);
11256 emit_insn (compare_seq);
11257 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11258 gen_rtx_IF_THEN_ELSE (mode,
11259 compare_op, operands[2],
11262 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11263 gen_rtx_IF_THEN_ELSE (mode,
11265 copy_rtx (operands[3]),
11266 copy_rtx (operands[0]))));
11268 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11269 gen_rtx_IF_THEN_ELSE (mode,
11271 copy_rtx (operands[2]),
11272 copy_rtx (operands[0]))));
11274 return 1; /* DONE */
11277 /* Swap, force into registers, or otherwise massage the two operands
11278 to an sse comparison with a mask result. Thus we differ a bit from
11279 ix86_prepare_fp_compare_args which expects to produce a flags result.
11281 The DEST operand exists to help determine whether to commute commutative
11282 operators. The POP0/POP1 operands are updated in place. The new
11283 comparison code is returned, or UNKNOWN if not implementable. */
11285 static enum rtx_code
11286 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11287 rtx *pop0, rtx *pop1)
11295 /* We have no LTGT as an operator. We could implement it with
11296 NE & ORDERED, but this requires an extra temporary. It's
11297 not clear that it's worth it. */
11304 /* These are supported directly. */
11311 /* For commutative operators, try to canonicalize the destination
11312 operand to be first in the comparison - this helps reload to
11313 avoid extra moves. */
11314 if (!dest || !rtx_equal_p (dest, *pop1))
11322 /* These are not supported directly. Swap the comparison operands
11323 to transform into something that is supported. */
11327 code = swap_condition (code);
11331 gcc_unreachable ();
11337 /* Detect conditional moves that exactly match min/max operational
11338 semantics. Note that this is IEEE safe, as long as we don't
11339 interchange the operands.
11341 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11342 and TRUE if the operation is successful and instructions are emitted. */
11345 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11346 rtx cmp_op1, rtx if_true, rtx if_false)
11348 enum machine_mode mode;
11354 else if (code == UNGE)
11357 if_true = if_false;
11363 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11365 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11370 mode = GET_MODE (dest);
11372 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11373 but MODE may be a vector mode and thus not appropriate. */
11374 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11376 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11379 if_true = force_reg (mode, if_true);
11380 v = gen_rtvec (2, if_true, if_false);
11381 tmp = gen_rtx_UNSPEC (mode, v, u);
11385 code = is_min ? SMIN : SMAX;
11386 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11389 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11393 /* Expand an sse vector comparison. Return the register with the result. */
11396 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11397 rtx op_true, rtx op_false)
11399 enum machine_mode mode = GET_MODE (dest);
11402 cmp_op0 = force_reg (mode, cmp_op0);
11403 if (!nonimmediate_operand (cmp_op1, mode))
11404 cmp_op1 = force_reg (mode, cmp_op1);
11407 || reg_overlap_mentioned_p (dest, op_true)
11408 || reg_overlap_mentioned_p (dest, op_false))
11409 dest = gen_reg_rtx (mode);
11411 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11412 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11417 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11418 operations. This is used for both scalar and vector conditional moves. */
11421 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11423 enum machine_mode mode = GET_MODE (dest);
11426 if (op_false == CONST0_RTX (mode))
11428 op_true = force_reg (mode, op_true);
11429 x = gen_rtx_AND (mode, cmp, op_true);
11430 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11432 else if (op_true == CONST0_RTX (mode))
11434 op_false = force_reg (mode, op_false);
11435 x = gen_rtx_NOT (mode, cmp);
11436 x = gen_rtx_AND (mode, x, op_false);
11437 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11441 op_true = force_reg (mode, op_true);
11442 op_false = force_reg (mode, op_false);
11444 t2 = gen_reg_rtx (mode);
11446 t3 = gen_reg_rtx (mode);
11450 x = gen_rtx_AND (mode, op_true, cmp);
11451 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11453 x = gen_rtx_NOT (mode, cmp);
11454 x = gen_rtx_AND (mode, x, op_false);
11455 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11457 x = gen_rtx_IOR (mode, t3, t2);
11458 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11462 /* Expand a floating-point conditional move. Return true if successful. */
11465 ix86_expand_fp_movcc (rtx operands[])
11467 enum machine_mode mode = GET_MODE (operands[0]);
11468 enum rtx_code code = GET_CODE (operands[1]);
11469 rtx tmp, compare_op, second_test, bypass_test;
11471 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11473 enum machine_mode cmode;
11475 /* Since we've no cmove for sse registers, don't force bad register
11476 allocation just to gain access to it. Deny movcc when the
11477 comparison mode doesn't match the move mode. */
11478 cmode = GET_MODE (ix86_compare_op0);
11479 if (cmode == VOIDmode)
11480 cmode = GET_MODE (ix86_compare_op1);
11484 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11486 &ix86_compare_op1);
11487 if (code == UNKNOWN)
11490 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11491 ix86_compare_op1, operands[2],
11495 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11496 ix86_compare_op1, operands[2], operands[3]);
11497 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11501 /* The floating point conditional move instructions don't directly
11502 support conditions resulting from a signed integer comparison. */
11504 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11506 /* The floating point conditional move instructions don't directly
11507 support signed integer comparisons. */
11509 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11511 gcc_assert (!second_test && !bypass_test);
11512 tmp = gen_reg_rtx (QImode);
11513 ix86_expand_setcc (code, tmp);
11515 ix86_compare_op0 = tmp;
11516 ix86_compare_op1 = const0_rtx;
11517 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11519 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11521 tmp = gen_reg_rtx (mode);
11522 emit_move_insn (tmp, operands[3]);
11525 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11527 tmp = gen_reg_rtx (mode);
11528 emit_move_insn (tmp, operands[2]);
11532 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11533 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11534 operands[2], operands[3])));
11536 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11537 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11538 operands[3], operands[0])));
11540 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11541 gen_rtx_IF_THEN_ELSE (mode, second_test,
11542 operands[2], operands[0])));
11547 /* Expand a floating-point vector conditional move; a vcond operation
11548 rather than a movcc operation. */
11551 ix86_expand_fp_vcond (rtx operands[])
11553 enum rtx_code code = GET_CODE (operands[3]);
11556 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11557 &operands[4], &operands[5]);
11558 if (code == UNKNOWN)
11561 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11562 operands[5], operands[1], operands[2]))
11565 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11566 operands[1], operands[2]);
11567 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11571 /* Expand a signed integral vector conditional move. */
11574 ix86_expand_int_vcond (rtx operands[])
11576 enum machine_mode mode = GET_MODE (operands[0]);
11577 enum rtx_code code = GET_CODE (operands[3]);
11578 bool negate = false;
11581 cop0 = operands[4];
11582 cop1 = operands[5];
11584 /* Canonicalize the comparison to EQ, GT, GTU. */
11595 code = reverse_condition (code);
11601 code = reverse_condition (code);
11607 code = swap_condition (code);
11608 x = cop0, cop0 = cop1, cop1 = x;
11612 gcc_unreachable ();
11615 /* Unsigned parallel compare is not supported by the hardware. Play some
11616 tricks to turn this into a signed comparison against 0. */
11619 cop0 = force_reg (mode, cop0);
11627 /* Perform a parallel modulo subtraction. */
11628 t1 = gen_reg_rtx (mode);
11629 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11631 /* Extract the original sign bit of op0. */
11632 mask = GEN_INT (-0x80000000);
11633 mask = gen_rtx_CONST_VECTOR (mode,
11634 gen_rtvec (4, mask, mask, mask, mask));
11635 mask = force_reg (mode, mask);
11636 t2 = gen_reg_rtx (mode);
11637 emit_insn (gen_andv4si3 (t2, cop0, mask));
11639 /* XOR it back into the result of the subtraction. This results
11640 in the sign bit set iff we saw unsigned underflow. */
11641 x = gen_reg_rtx (mode);
11642 emit_insn (gen_xorv4si3 (x, t1, t2));
11650 /* Perform a parallel unsigned saturating subtraction. */
11651 x = gen_reg_rtx (mode);
11652 emit_insn (gen_rtx_SET (VOIDmode, x,
11653 gen_rtx_US_MINUS (mode, cop0, cop1)));
11660 gcc_unreachable ();
11664 cop1 = CONST0_RTX (mode);
11667 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11668 operands[1+negate], operands[2-negate]);
11670 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11671 operands[2-negate]);
11675 /* Expand conditional increment or decrement using adb/sbb instructions.
11676 The default case using setcc followed by the conditional move can be
11677 done by generic code. */
11679 ix86_expand_int_addcc (rtx operands[])
11681 enum rtx_code code = GET_CODE (operands[1]);
11683 rtx val = const0_rtx;
11684 bool fpcmp = false;
11685 enum machine_mode mode = GET_MODE (operands[0]);
11687 if (operands[3] != const1_rtx
11688 && operands[3] != constm1_rtx)
11690 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11691 ix86_compare_op1, &compare_op))
11693 code = GET_CODE (compare_op);
11695 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11696 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11699 code = ix86_fp_compare_code_to_integer (code);
11706 PUT_CODE (compare_op,
11707 reverse_condition_maybe_unordered
11708 (GET_CODE (compare_op)));
11710 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11712 PUT_MODE (compare_op, mode);
11714 /* Construct either adc or sbb insn. */
11715 if ((code == LTU) == (operands[3] == constm1_rtx))
11717 switch (GET_MODE (operands[0]))
11720 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11723 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11726 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11729 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11732 gcc_unreachable ();
11737 switch (GET_MODE (operands[0]))
11740 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11743 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11746 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11749 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11752 gcc_unreachable ();
11755 return 1; /* DONE */
11759 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11760 works for floating pointer parameters and nonoffsetable memories.
11761 For pushes, it returns just stack offsets; the values will be saved
11762 in the right order. Maximally three parts are generated. */
11765 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11770 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11772 size = (GET_MODE_SIZE (mode) + 4) / 8;
11774 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11775 gcc_assert (size >= 2 && size <= 3);
11777 /* Optimize constant pool reference to immediates. This is used by fp
11778 moves, that force all constants to memory to allow combining. */
11779 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11781 rtx tmp = maybe_get_pool_constant (operand);
11786 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11788 /* The only non-offsetable memories we handle are pushes. */
11789 int ok = push_operand (operand, VOIDmode);
11793 operand = copy_rtx (operand);
11794 PUT_MODE (operand, Pmode);
11795 parts[0] = parts[1] = parts[2] = operand;
11799 if (GET_CODE (operand) == CONST_VECTOR)
11801 enum machine_mode imode = int_mode_for_mode (mode);
11802 /* Caution: if we looked through a constant pool memory above,
11803 the operand may actually have a different mode now. That's
11804 ok, since we want to pun this all the way back to an integer. */
11805 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11806 gcc_assert (operand != NULL);
11812 if (mode == DImode)
11813 split_di (&operand, 1, &parts[0], &parts[1]);
11816 if (REG_P (operand))
11818 gcc_assert (reload_completed);
11819 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11820 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11822 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11824 else if (offsettable_memref_p (operand))
11826 operand = adjust_address (operand, SImode, 0);
11827 parts[0] = operand;
11828 parts[1] = adjust_address (operand, SImode, 4);
11830 parts[2] = adjust_address (operand, SImode, 8);
11832 else if (GET_CODE (operand) == CONST_DOUBLE)
11837 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11841 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11842 parts[2] = gen_int_mode (l[2], SImode);
11845 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11848 gcc_unreachable ();
11850 parts[1] = gen_int_mode (l[1], SImode);
11851 parts[0] = gen_int_mode (l[0], SImode);
11854 gcc_unreachable ();
11859 if (mode == TImode)
11860 split_ti (&operand, 1, &parts[0], &parts[1]);
11861 if (mode == XFmode || mode == TFmode)
11863 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11864 if (REG_P (operand))
11866 gcc_assert (reload_completed);
11867 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11868 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11870 else if (offsettable_memref_p (operand))
11872 operand = adjust_address (operand, DImode, 0);
11873 parts[0] = operand;
11874 parts[1] = adjust_address (operand, upper_mode, 8);
11876 else if (GET_CODE (operand) == CONST_DOUBLE)
11881 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11882 real_to_target (l, &r, mode);
11884 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11885 if (HOST_BITS_PER_WIDE_INT >= 64)
11888 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11889 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11892 parts[0] = immed_double_const (l[0], l[1], DImode);
11894 if (upper_mode == SImode)
11895 parts[1] = gen_int_mode (l[2], SImode);
11896 else if (HOST_BITS_PER_WIDE_INT >= 64)
11899 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11900 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11903 parts[1] = immed_double_const (l[2], l[3], DImode);
11906 gcc_unreachable ();
11913 /* Emit insns to perform a move or push of DI, DF, and XF values.
11914 Return false when normal moves are needed; true when all required
11915 insns have been emitted. Operands 2-4 contain the input values
11916 int the correct order; operands 5-7 contain the output values. */
11919 ix86_split_long_move (rtx operands[])
11924 int collisions = 0;
11925 enum machine_mode mode = GET_MODE (operands[0]);
11927 /* The DFmode expanders may ask us to move double.
11928 For 64bit target this is single move. By hiding the fact
11929 here we simplify i386.md splitters. */
11930 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11932 /* Optimize constant pool reference to immediates. This is used by
11933 fp moves, that force all constants to memory to allow combining. */
11935 if (GET_CODE (operands[1]) == MEM
11936 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11937 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11938 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11939 if (push_operand (operands[0], VOIDmode))
11941 operands[0] = copy_rtx (operands[0]);
11942 PUT_MODE (operands[0], Pmode);
11945 operands[0] = gen_lowpart (DImode, operands[0]);
11946 operands[1] = gen_lowpart (DImode, operands[1]);
11947 emit_move_insn (operands[0], operands[1]);
11951 /* The only non-offsettable memory we handle is push. */
11952 if (push_operand (operands[0], VOIDmode))
11955 gcc_assert (GET_CODE (operands[0]) != MEM
11956 || offsettable_memref_p (operands[0]));
11958 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11959 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11961 /* When emitting push, take care for source operands on the stack. */
11962 if (push && GET_CODE (operands[1]) == MEM
11963 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11966 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11967 XEXP (part[1][2], 0));
11968 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11969 XEXP (part[1][1], 0));
11972 /* We need to do copy in the right order in case an address register
11973 of the source overlaps the destination. */
11974 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11976 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11978 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11981 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11984 /* Collision in the middle part can be handled by reordering. */
11985 if (collisions == 1 && nparts == 3
11986 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11989 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11990 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11993 /* If there are more collisions, we can't handle it by reordering.
11994 Do an lea to the last part and use only one colliding move. */
11995 else if (collisions > 1)
12001 base = part[0][nparts - 1];
12003 /* Handle the case when the last part isn't valid for lea.
12004 Happens in 64-bit mode storing the 12-byte XFmode. */
12005 if (GET_MODE (base) != Pmode)
12006 base = gen_rtx_REG (Pmode, REGNO (base));
12008 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12009 part[1][0] = replace_equiv_address (part[1][0], base);
12010 part[1][1] = replace_equiv_address (part[1][1],
12011 plus_constant (base, UNITS_PER_WORD));
12013 part[1][2] = replace_equiv_address (part[1][2],
12014 plus_constant (base, 8));
12024 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12025 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12026 emit_move_insn (part[0][2], part[1][2]);
12031 /* In 64bit mode we don't have 32bit push available. In case this is
12032 register, it is OK - we will just use larger counterpart. We also
12033 retype memory - these comes from attempt to avoid REX prefix on
12034 moving of second half of TFmode value. */
12035 if (GET_MODE (part[1][1]) == SImode)
12037 switch (GET_CODE (part[1][1]))
12040 part[1][1] = adjust_address (part[1][1], DImode, 0);
12044 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12048 gcc_unreachable ();
12051 if (GET_MODE (part[1][0]) == SImode)
12052 part[1][0] = part[1][1];
12055 emit_move_insn (part[0][1], part[1][1]);
12056 emit_move_insn (part[0][0], part[1][0]);
12060 /* Choose correct order to not overwrite the source before it is copied. */
12061 if ((REG_P (part[0][0])
12062 && REG_P (part[1][1])
12063 && (REGNO (part[0][0]) == REGNO (part[1][1])
12065 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12067 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12071 operands[2] = part[0][2];
12072 operands[3] = part[0][1];
12073 operands[4] = part[0][0];
12074 operands[5] = part[1][2];
12075 operands[6] = part[1][1];
12076 operands[7] = part[1][0];
12080 operands[2] = part[0][1];
12081 operands[3] = part[0][0];
12082 operands[5] = part[1][1];
12083 operands[6] = part[1][0];
12090 operands[2] = part[0][0];
12091 operands[3] = part[0][1];
12092 operands[4] = part[0][2];
12093 operands[5] = part[1][0];
12094 operands[6] = part[1][1];
12095 operands[7] = part[1][2];
12099 operands[2] = part[0][0];
12100 operands[3] = part[0][1];
12101 operands[5] = part[1][0];
12102 operands[6] = part[1][1];
12106 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12109 if (GET_CODE (operands[5]) == CONST_INT
12110 && operands[5] != const0_rtx
12111 && REG_P (operands[2]))
12113 if (GET_CODE (operands[6]) == CONST_INT
12114 && INTVAL (operands[6]) == INTVAL (operands[5]))
12115 operands[6] = operands[2];
12118 && GET_CODE (operands[7]) == CONST_INT
12119 && INTVAL (operands[7]) == INTVAL (operands[5]))
12120 operands[7] = operands[2];
12124 && GET_CODE (operands[6]) == CONST_INT
12125 && operands[6] != const0_rtx
12126 && REG_P (operands[3])
12127 && GET_CODE (operands[7]) == CONST_INT
12128 && INTVAL (operands[7]) == INTVAL (operands[6]))
12129 operands[7] = operands[3];
12132 emit_move_insn (operands[2], operands[5]);
12133 emit_move_insn (operands[3], operands[6]);
12135 emit_move_insn (operands[4], operands[7]);
12140 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12141 left shift by a constant, either using a single shift or
12142 a sequence of add instructions. */
12145 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12149 emit_insn ((mode == DImode
12151 : gen_adddi3) (operand, operand, operand));
12153 else if (!optimize_size
12154 && count * ix86_cost->add <= ix86_cost->shift_const)
12157 for (i=0; i<count; i++)
12159 emit_insn ((mode == DImode
12161 : gen_adddi3) (operand, operand, operand));
12165 emit_insn ((mode == DImode
12167 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12171 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12173 rtx low[2], high[2];
12175 const int single_width = mode == DImode ? 32 : 64;
12177 if (GET_CODE (operands[2]) == CONST_INT)
12179 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12180 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12182 if (count >= single_width)
12184 emit_move_insn (high[0], low[1]);
12185 emit_move_insn (low[0], const0_rtx);
12187 if (count > single_width)
12188 ix86_expand_ashl_const (high[0], count - single_width, mode);
12192 if (!rtx_equal_p (operands[0], operands[1]))
12193 emit_move_insn (operands[0], operands[1]);
12194 emit_insn ((mode == DImode
12196 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12197 ix86_expand_ashl_const (low[0], count, mode);
12202 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12204 if (operands[1] == const1_rtx)
12206 /* Assuming we've chosen a QImode capable registers, then 1 << N
12207 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12208 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12210 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12212 ix86_expand_clear (low[0]);
12213 ix86_expand_clear (high[0]);
12214 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12216 d = gen_lowpart (QImode, low[0]);
12217 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12218 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12219 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12221 d = gen_lowpart (QImode, high[0]);
12222 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12223 s = gen_rtx_NE (QImode, flags, const0_rtx);
12224 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12227 /* Otherwise, we can get the same results by manually performing
12228 a bit extract operation on bit 5/6, and then performing the two
12229 shifts. The two methods of getting 0/1 into low/high are exactly
12230 the same size. Avoiding the shift in the bit extract case helps
12231 pentium4 a bit; no one else seems to care much either way. */
12236 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12237 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12239 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12240 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12242 emit_insn ((mode == DImode
12244 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12245 emit_insn ((mode == DImode
12247 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12248 emit_move_insn (low[0], high[0]);
12249 emit_insn ((mode == DImode
12251 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12254 emit_insn ((mode == DImode
12256 : gen_ashldi3) (low[0], low[0], operands[2]));
12257 emit_insn ((mode == DImode
12259 : gen_ashldi3) (high[0], high[0], operands[2]));
12263 if (operands[1] == constm1_rtx)
12265 /* For -1 << N, we can avoid the shld instruction, because we
12266 know that we're shifting 0...31/63 ones into a -1. */
12267 emit_move_insn (low[0], constm1_rtx);
12269 emit_move_insn (high[0], low[0]);
12271 emit_move_insn (high[0], constm1_rtx);
12275 if (!rtx_equal_p (operands[0], operands[1]))
12276 emit_move_insn (operands[0], operands[1]);
12278 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12279 emit_insn ((mode == DImode
12281 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12284 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12286 if (TARGET_CMOVE && scratch)
12288 ix86_expand_clear (scratch);
12289 emit_insn ((mode == DImode
12290 ? gen_x86_shift_adj_1
12291 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12294 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12298 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12300 rtx low[2], high[2];
12302 const int single_width = mode == DImode ? 32 : 64;
12304 if (GET_CODE (operands[2]) == CONST_INT)
12306 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12307 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12309 if (count == single_width * 2 - 1)
12311 emit_move_insn (high[0], high[1]);
12312 emit_insn ((mode == DImode
12314 : gen_ashrdi3) (high[0], high[0],
12315 GEN_INT (single_width - 1)));
12316 emit_move_insn (low[0], high[0]);
12319 else if (count >= single_width)
12321 emit_move_insn (low[0], high[1]);
12322 emit_move_insn (high[0], low[0]);
12323 emit_insn ((mode == DImode
12325 : gen_ashrdi3) (high[0], high[0],
12326 GEN_INT (single_width - 1)));
12327 if (count > single_width)
12328 emit_insn ((mode == DImode
12330 : gen_ashrdi3) (low[0], low[0],
12331 GEN_INT (count - single_width)));
12335 if (!rtx_equal_p (operands[0], operands[1]))
12336 emit_move_insn (operands[0], operands[1]);
12337 emit_insn ((mode == DImode
12339 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12340 emit_insn ((mode == DImode
12342 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12347 if (!rtx_equal_p (operands[0], operands[1]))
12348 emit_move_insn (operands[0], operands[1]);
12350 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12352 emit_insn ((mode == DImode
12354 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12355 emit_insn ((mode == DImode
12357 : gen_ashrdi3) (high[0], high[0], operands[2]));
12359 if (TARGET_CMOVE && scratch)
12361 emit_move_insn (scratch, high[0]);
12362 emit_insn ((mode == DImode
12364 : gen_ashrdi3) (scratch, scratch,
12365 GEN_INT (single_width - 1)));
12366 emit_insn ((mode == DImode
12367 ? gen_x86_shift_adj_1
12368 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12372 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12377 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12379 rtx low[2], high[2];
12381 const int single_width = mode == DImode ? 32 : 64;
12383 if (GET_CODE (operands[2]) == CONST_INT)
12385 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12386 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12388 if (count >= single_width)
12390 emit_move_insn (low[0], high[1]);
12391 ix86_expand_clear (high[0]);
12393 if (count > single_width)
12394 emit_insn ((mode == DImode
12396 : gen_lshrdi3) (low[0], low[0],
12397 GEN_INT (count - single_width)));
12401 if (!rtx_equal_p (operands[0], operands[1]))
12402 emit_move_insn (operands[0], operands[1]);
12403 emit_insn ((mode == DImode
12405 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12406 emit_insn ((mode == DImode
12408 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12413 if (!rtx_equal_p (operands[0], operands[1]))
12414 emit_move_insn (operands[0], operands[1]);
12416 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12418 emit_insn ((mode == DImode
12420 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12421 emit_insn ((mode == DImode
12423 : gen_lshrdi3) (high[0], high[0], operands[2]));
12425 /* Heh. By reversing the arguments, we can reuse this pattern. */
12426 if (TARGET_CMOVE && scratch)
12428 ix86_expand_clear (scratch);
12429 emit_insn ((mode == DImode
12430 ? gen_x86_shift_adj_1
12431 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12435 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12439 /* Helper function for the string operations below. Dest VARIABLE whether
12440 it is aligned to VALUE bytes. If true, jump to the label. */
12442 ix86_expand_aligntest (rtx variable, int value)
12444 rtx label = gen_label_rtx ();
12445 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12446 if (GET_MODE (variable) == DImode)
12447 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12449 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12450 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12455 /* Adjust COUNTER by the VALUE. */
12457 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12459 if (GET_MODE (countreg) == DImode)
12460 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12462 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12465 /* Zero extend possibly SImode EXP to Pmode register. */
12467 ix86_zero_extend_to_Pmode (rtx exp)
12470 if (GET_MODE (exp) == VOIDmode)
12471 return force_reg (Pmode, exp);
12472 if (GET_MODE (exp) == Pmode)
12473 return copy_to_mode_reg (Pmode, exp);
12474 r = gen_reg_rtx (Pmode);
12475 emit_insn (gen_zero_extendsidi2 (r, exp));
12479 /* Expand string move (memcpy) operation. Use i386 string operations when
12480 profitable. expand_clrmem contains similar code. */
12482 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12484 rtx srcreg, destreg, countreg, srcexp, destexp;
12485 enum machine_mode counter_mode;
12486 HOST_WIDE_INT align = 0;
12487 unsigned HOST_WIDE_INT count = 0;
12489 if (GET_CODE (align_exp) == CONST_INT)
12490 align = INTVAL (align_exp);
12492 /* Can't use any of this if the user has appropriated esi or edi. */
12493 if (global_regs[4] || global_regs[5])
12496 /* This simple hack avoids all inlining code and simplifies code below. */
12497 if (!TARGET_ALIGN_STRINGOPS)
12500 if (GET_CODE (count_exp) == CONST_INT)
12502 count = INTVAL (count_exp);
12503 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12507 /* Figure out proper mode for counter. For 32bits it is always SImode,
12508 for 64bits use SImode when possible, otherwise DImode.
12509 Set count to number of bytes copied when known at compile time. */
12511 || GET_MODE (count_exp) == SImode
12512 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12513 counter_mode = SImode;
12515 counter_mode = DImode;
12517 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12519 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12520 if (destreg != XEXP (dst, 0))
12521 dst = replace_equiv_address_nv (dst, destreg);
12522 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12523 if (srcreg != XEXP (src, 0))
12524 src = replace_equiv_address_nv (src, srcreg);
12526 /* When optimizing for size emit simple rep ; movsb instruction for
12527 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12528 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12529 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12530 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12531 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12532 known to be zero or not. The rep; movsb sequence causes higher
12533 register pressure though, so take that into account. */
12535 if ((!optimize || optimize_size)
12540 || (count & 3) + count / 4 > 6))))
12542 emit_insn (gen_cld ());
12543 countreg = ix86_zero_extend_to_Pmode (count_exp);
12544 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12545 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12546 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12550 /* For constant aligned (or small unaligned) copies use rep movsl
12551 followed by code copying the rest. For PentiumPro ensure 8 byte
12552 alignment to allow rep movsl acceleration. */
12554 else if (count != 0
12556 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12557 || optimize_size || count < (unsigned int) 64))
12559 unsigned HOST_WIDE_INT offset = 0;
12560 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12561 rtx srcmem, dstmem;
12563 emit_insn (gen_cld ());
12564 if (count & ~(size - 1))
12566 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12568 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12570 while (offset < (count & ~(size - 1)))
12572 srcmem = adjust_automodify_address_nv (src, movs_mode,
12574 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12576 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12582 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12583 & (TARGET_64BIT ? -1 : 0x3fffffff));
12584 countreg = copy_to_mode_reg (counter_mode, countreg);
12585 countreg = ix86_zero_extend_to_Pmode (countreg);
12587 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12588 GEN_INT (size == 4 ? 2 : 3));
12589 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12590 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12592 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12593 countreg, destexp, srcexp));
12594 offset = count & ~(size - 1);
12597 if (size == 8 && (count & 0x04))
12599 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12601 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12603 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12608 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12610 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12612 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12617 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12619 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12621 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12624 /* The generic code based on the glibc implementation:
12625 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12626 allowing accelerated copying there)
12627 - copy the data using rep movsl
12628 - copy the rest. */
12633 rtx srcmem, dstmem;
12634 int desired_alignment = (TARGET_PENTIUMPRO
12635 && (count == 0 || count >= (unsigned int) 260)
12636 ? 8 : UNITS_PER_WORD);
12637 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12638 dst = change_address (dst, BLKmode, destreg);
12639 src = change_address (src, BLKmode, srcreg);
12641 /* In case we don't know anything about the alignment, default to
12642 library version, since it is usually equally fast and result in
12645 Also emit call when we know that the count is large and call overhead
12646 will not be important. */
12647 if (!TARGET_INLINE_ALL_STRINGOPS
12648 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12651 if (TARGET_SINGLE_STRINGOP)
12652 emit_insn (gen_cld ());
12654 countreg2 = gen_reg_rtx (Pmode);
12655 countreg = copy_to_mode_reg (counter_mode, count_exp);
12657 /* We don't use loops to align destination and to copy parts smaller
12658 than 4 bytes, because gcc is able to optimize such code better (in
12659 the case the destination or the count really is aligned, gcc is often
12660 able to predict the branches) and also it is friendlier to the
12661 hardware branch prediction.
12663 Using loops is beneficial for generic case, because we can
12664 handle small counts using the loops. Many CPUs (such as Athlon)
12665 have large REP prefix setup costs.
12667 This is quite costly. Maybe we can revisit this decision later or
12668 add some customizability to this code. */
12670 if (count == 0 && align < desired_alignment)
12672 label = gen_label_rtx ();
12673 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12674 LEU, 0, counter_mode, 1, label);
12678 rtx label = ix86_expand_aligntest (destreg, 1);
12679 srcmem = change_address (src, QImode, srcreg);
12680 dstmem = change_address (dst, QImode, destreg);
12681 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12682 ix86_adjust_counter (countreg, 1);
12683 emit_label (label);
12684 LABEL_NUSES (label) = 1;
12688 rtx label = ix86_expand_aligntest (destreg, 2);
12689 srcmem = change_address (src, HImode, srcreg);
12690 dstmem = change_address (dst, HImode, destreg);
12691 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12692 ix86_adjust_counter (countreg, 2);
12693 emit_label (label);
12694 LABEL_NUSES (label) = 1;
12696 if (align <= 4 && desired_alignment > 4)
12698 rtx label = ix86_expand_aligntest (destreg, 4);
12699 srcmem = change_address (src, SImode, srcreg);
12700 dstmem = change_address (dst, SImode, destreg);
12701 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12702 ix86_adjust_counter (countreg, 4);
12703 emit_label (label);
12704 LABEL_NUSES (label) = 1;
12707 if (label && desired_alignment > 4 && !TARGET_64BIT)
12709 emit_label (label);
12710 LABEL_NUSES (label) = 1;
12713 if (!TARGET_SINGLE_STRINGOP)
12714 emit_insn (gen_cld ());
12717 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12719 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12723 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12724 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12726 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12727 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12728 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12729 countreg2, destexp, srcexp));
12733 emit_label (label);
12734 LABEL_NUSES (label) = 1;
12736 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12738 srcmem = change_address (src, SImode, srcreg);
12739 dstmem = change_address (dst, SImode, destreg);
12740 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12742 if ((align <= 4 || count == 0) && TARGET_64BIT)
12744 rtx label = ix86_expand_aligntest (countreg, 4);
12745 srcmem = change_address (src, SImode, srcreg);
12746 dstmem = change_address (dst, SImode, destreg);
12747 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12748 emit_label (label);
12749 LABEL_NUSES (label) = 1;
12751 if (align > 2 && count != 0 && (count & 2))
12753 srcmem = change_address (src, HImode, srcreg);
12754 dstmem = change_address (dst, HImode, destreg);
12755 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12757 if (align <= 2 || count == 0)
12759 rtx label = ix86_expand_aligntest (countreg, 2);
12760 srcmem = change_address (src, HImode, srcreg);
12761 dstmem = change_address (dst, HImode, destreg);
12762 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12763 emit_label (label);
12764 LABEL_NUSES (label) = 1;
12766 if (align > 1 && count != 0 && (count & 1))
12768 srcmem = change_address (src, QImode, srcreg);
12769 dstmem = change_address (dst, QImode, destreg);
12770 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12772 if (align <= 1 || count == 0)
12774 rtx label = ix86_expand_aligntest (countreg, 1);
12775 srcmem = change_address (src, QImode, srcreg);
12776 dstmem = change_address (dst, QImode, destreg);
12777 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12778 emit_label (label);
12779 LABEL_NUSES (label) = 1;
12786 /* Expand string clear operation (bzero). Use i386 string operations when
12787 profitable. expand_movmem contains similar code. */
12789 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12791 rtx destreg, zeroreg, countreg, destexp;
12792 enum machine_mode counter_mode;
12793 HOST_WIDE_INT align = 0;
12794 unsigned HOST_WIDE_INT count = 0;
12796 if (GET_CODE (align_exp) == CONST_INT)
12797 align = INTVAL (align_exp);
12799 /* Can't use any of this if the user has appropriated esi. */
12800 if (global_regs[4])
12803 /* This simple hack avoids all inlining code and simplifies code below. */
12804 if (!TARGET_ALIGN_STRINGOPS)
12807 if (GET_CODE (count_exp) == CONST_INT)
12809 count = INTVAL (count_exp);
12810 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12813 /* Figure out proper mode for counter. For 32bits it is always SImode,
12814 for 64bits use SImode when possible, otherwise DImode.
12815 Set count to number of bytes copied when known at compile time. */
12817 || GET_MODE (count_exp) == SImode
12818 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12819 counter_mode = SImode;
12821 counter_mode = DImode;
12823 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12824 if (destreg != XEXP (dst, 0))
12825 dst = replace_equiv_address_nv (dst, destreg);
12828 /* When optimizing for size emit simple rep ; movsb instruction for
12829 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12830 sequence is 7 bytes long, so if optimizing for size and count is
12831 small enough that some stosl, stosw and stosb instructions without
12832 rep are shorter, fall back into the next if. */
12834 if ((!optimize || optimize_size)
12837 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12839 emit_insn (gen_cld ());
12841 countreg = ix86_zero_extend_to_Pmode (count_exp);
12842 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12843 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12844 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12846 else if (count != 0
12848 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12849 || optimize_size || count < (unsigned int) 64))
12851 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12852 unsigned HOST_WIDE_INT offset = 0;
12854 emit_insn (gen_cld ());
12856 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12857 if (count & ~(size - 1))
12859 unsigned HOST_WIDE_INT repcount;
12860 unsigned int max_nonrep;
12862 repcount = count >> (size == 4 ? 2 : 3);
12864 repcount &= 0x3fffffff;
12866 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12867 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12868 bytes. In both cases the latter seems to be faster for small
12870 max_nonrep = size == 4 ? 7 : 4;
12871 if (!optimize_size)
12874 case PROCESSOR_PENTIUM4:
12875 case PROCESSOR_NOCONA:
12882 if (repcount <= max_nonrep)
12883 while (repcount-- > 0)
12885 rtx mem = adjust_automodify_address_nv (dst,
12886 GET_MODE (zeroreg),
12888 emit_insn (gen_strset (destreg, mem, zeroreg));
12893 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12894 countreg = ix86_zero_extend_to_Pmode (countreg);
12895 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12896 GEN_INT (size == 4 ? 2 : 3));
12897 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12898 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12900 offset = count & ~(size - 1);
12903 if (size == 8 && (count & 0x04))
12905 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12907 emit_insn (gen_strset (destreg, mem,
12908 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12913 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12915 emit_insn (gen_strset (destreg, mem,
12916 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12921 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12923 emit_insn (gen_strset (destreg, mem,
12924 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12931 /* Compute desired alignment of the string operation. */
12932 int desired_alignment = (TARGET_PENTIUMPRO
12933 && (count == 0 || count >= (unsigned int) 260)
12934 ? 8 : UNITS_PER_WORD);
12936 /* In case we don't know anything about the alignment, default to
12937 library version, since it is usually equally fast and result in
12940 Also emit call when we know that the count is large and call overhead
12941 will not be important. */
12942 if (!TARGET_INLINE_ALL_STRINGOPS
12943 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12946 if (TARGET_SINGLE_STRINGOP)
12947 emit_insn (gen_cld ());
12949 countreg2 = gen_reg_rtx (Pmode);
12950 countreg = copy_to_mode_reg (counter_mode, count_exp);
12951 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12952 /* Get rid of MEM_OFFSET, it won't be accurate. */
12953 dst = change_address (dst, BLKmode, destreg);
12955 if (count == 0 && align < desired_alignment)
12957 label = gen_label_rtx ();
12958 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12959 LEU, 0, counter_mode, 1, label);
12963 rtx label = ix86_expand_aligntest (destreg, 1);
12964 emit_insn (gen_strset (destreg, dst,
12965 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12966 ix86_adjust_counter (countreg, 1);
12967 emit_label (label);
12968 LABEL_NUSES (label) = 1;
12972 rtx label = ix86_expand_aligntest (destreg, 2);
12973 emit_insn (gen_strset (destreg, dst,
12974 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12975 ix86_adjust_counter (countreg, 2);
12976 emit_label (label);
12977 LABEL_NUSES (label) = 1;
12979 if (align <= 4 && desired_alignment > 4)
12981 rtx label = ix86_expand_aligntest (destreg, 4);
12982 emit_insn (gen_strset (destreg, dst,
12984 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12986 ix86_adjust_counter (countreg, 4);
12987 emit_label (label);
12988 LABEL_NUSES (label) = 1;
12991 if (label && desired_alignment > 4 && !TARGET_64BIT)
12993 emit_label (label);
12994 LABEL_NUSES (label) = 1;
12998 if (!TARGET_SINGLE_STRINGOP)
12999 emit_insn (gen_cld ());
13002 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13004 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13008 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13009 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13011 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13012 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13016 emit_label (label);
13017 LABEL_NUSES (label) = 1;
13020 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13021 emit_insn (gen_strset (destreg, dst,
13022 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13023 if (TARGET_64BIT && (align <= 4 || count == 0))
13025 rtx label = ix86_expand_aligntest (countreg, 4);
13026 emit_insn (gen_strset (destreg, dst,
13027 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13028 emit_label (label);
13029 LABEL_NUSES (label) = 1;
13031 if (align > 2 && count != 0 && (count & 2))
13032 emit_insn (gen_strset (destreg, dst,
13033 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13034 if (align <= 2 || count == 0)
13036 rtx label = ix86_expand_aligntest (countreg, 2);
13037 emit_insn (gen_strset (destreg, dst,
13038 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13039 emit_label (label);
13040 LABEL_NUSES (label) = 1;
13042 if (align > 1 && count != 0 && (count & 1))
13043 emit_insn (gen_strset (destreg, dst,
13044 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13045 if (align <= 1 || count == 0)
13047 rtx label = ix86_expand_aligntest (countreg, 1);
13048 emit_insn (gen_strset (destreg, dst,
13049 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13050 emit_label (label);
13051 LABEL_NUSES (label) = 1;
13057 /* Expand strlen. */
13059 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13061 rtx addr, scratch1, scratch2, scratch3, scratch4;
13063 /* The generic case of strlen expander is long. Avoid it's
13064 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13066 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13067 && !TARGET_INLINE_ALL_STRINGOPS
13069 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13072 addr = force_reg (Pmode, XEXP (src, 0));
13073 scratch1 = gen_reg_rtx (Pmode);
13075 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13078 /* Well it seems that some optimizer does not combine a call like
13079 foo(strlen(bar), strlen(bar));
13080 when the move and the subtraction is done here. It does calculate
13081 the length just once when these instructions are done inside of
13082 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13083 often used and I use one fewer register for the lifetime of
13084 output_strlen_unroll() this is better. */
13086 emit_move_insn (out, addr);
13088 ix86_expand_strlensi_unroll_1 (out, src, align);
13090 /* strlensi_unroll_1 returns the address of the zero at the end of
13091 the string, like memchr(), so compute the length by subtracting
13092 the start address. */
13094 emit_insn (gen_subdi3 (out, out, addr));
13096 emit_insn (gen_subsi3 (out, out, addr));
13101 scratch2 = gen_reg_rtx (Pmode);
13102 scratch3 = gen_reg_rtx (Pmode);
13103 scratch4 = force_reg (Pmode, constm1_rtx);
13105 emit_move_insn (scratch3, addr);
13106 eoschar = force_reg (QImode, eoschar);
13108 emit_insn (gen_cld ());
13109 src = replace_equiv_address_nv (src, scratch3);
13111 /* If .md starts supporting :P, this can be done in .md. */
13112 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13113 scratch4), UNSPEC_SCAS);
13114 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13117 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13118 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13122 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13123 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13129 /* Expand the appropriate insns for doing strlen if not just doing
13132 out = result, initialized with the start address
13133 align_rtx = alignment of the address.
13134 scratch = scratch register, initialized with the startaddress when
13135 not aligned, otherwise undefined
13137 This is just the body. It needs the initializations mentioned above and
13138 some address computing at the end. These things are done in i386.md. */
13141 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13145 rtx align_2_label = NULL_RTX;
13146 rtx align_3_label = NULL_RTX;
13147 rtx align_4_label = gen_label_rtx ();
13148 rtx end_0_label = gen_label_rtx ();
13150 rtx tmpreg = gen_reg_rtx (SImode);
13151 rtx scratch = gen_reg_rtx (SImode);
13155 if (GET_CODE (align_rtx) == CONST_INT)
13156 align = INTVAL (align_rtx);
13158 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13160 /* Is there a known alignment and is it less than 4? */
13163 rtx scratch1 = gen_reg_rtx (Pmode);
13164 emit_move_insn (scratch1, out);
13165 /* Is there a known alignment and is it not 2? */
13168 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13169 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13171 /* Leave just the 3 lower bits. */
13172 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13173 NULL_RTX, 0, OPTAB_WIDEN);
13175 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13176 Pmode, 1, align_4_label);
13177 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13178 Pmode, 1, align_2_label);
13179 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13180 Pmode, 1, align_3_label);
13184 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13185 check if is aligned to 4 - byte. */
13187 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13188 NULL_RTX, 0, OPTAB_WIDEN);
13190 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13191 Pmode, 1, align_4_label);
13194 mem = change_address (src, QImode, out);
13196 /* Now compare the bytes. */
13198 /* Compare the first n unaligned byte on a byte per byte basis. */
13199 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13200 QImode, 1, end_0_label);
13202 /* Increment the address. */
13204 emit_insn (gen_adddi3 (out, out, const1_rtx));
13206 emit_insn (gen_addsi3 (out, out, const1_rtx));
13208 /* Not needed with an alignment of 2 */
13211 emit_label (align_2_label);
13213 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13217 emit_insn (gen_adddi3 (out, out, const1_rtx));
13219 emit_insn (gen_addsi3 (out, out, const1_rtx));
13221 emit_label (align_3_label);
13224 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13228 emit_insn (gen_adddi3 (out, out, const1_rtx));
13230 emit_insn (gen_addsi3 (out, out, const1_rtx));
13233 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13234 align this loop. It gives only huge programs, but does not help to
13236 emit_label (align_4_label);
13238 mem = change_address (src, SImode, out);
13239 emit_move_insn (scratch, mem);
13241 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13243 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13245 /* This formula yields a nonzero result iff one of the bytes is zero.
13246 This saves three branches inside loop and many cycles. */
13248 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13249 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13250 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13251 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13252 gen_int_mode (0x80808080, SImode)));
13253 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13258 rtx reg = gen_reg_rtx (SImode);
13259 rtx reg2 = gen_reg_rtx (Pmode);
13260 emit_move_insn (reg, tmpreg);
13261 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13263 /* If zero is not in the first two bytes, move two bytes forward. */
13264 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13265 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13266 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13267 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13268 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13271 /* Emit lea manually to avoid clobbering of flags. */
13272 emit_insn (gen_rtx_SET (SImode, reg2,
13273 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13275 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13276 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13277 emit_insn (gen_rtx_SET (VOIDmode, out,
13278 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13285 rtx end_2_label = gen_label_rtx ();
13286 /* Is zero in the first two bytes? */
13288 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13289 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13290 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13291 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13292 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13294 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13295 JUMP_LABEL (tmp) = end_2_label;
13297 /* Not in the first two. Move two bytes forward. */
13298 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13300 emit_insn (gen_adddi3 (out, out, const2_rtx));
13302 emit_insn (gen_addsi3 (out, out, const2_rtx));
13304 emit_label (end_2_label);
13308 /* Avoid branch in fixing the byte. */
13309 tmpreg = gen_lowpart (QImode, tmpreg);
13310 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13311 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13313 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13315 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13317 emit_label (end_0_label);
13321 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13322 rtx callarg2 ATTRIBUTE_UNUSED,
13323 rtx pop, int sibcall)
13325 rtx use = NULL, call;
13327 if (pop == const0_rtx)
13329 gcc_assert (!TARGET_64BIT || !pop);
13332 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13333 fnaddr = machopic_indirect_call_target (fnaddr);
13335 /* Static functions and indirect calls don't need the pic register. */
13336 if (! TARGET_64BIT && flag_pic
13337 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13338 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13339 use_reg (&use, pic_offset_table_rtx);
13341 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13343 rtx al = gen_rtx_REG (QImode, 0);
13344 emit_move_insn (al, callarg2);
13345 use_reg (&use, al);
13347 #endif /* TARGET_MACHO */
13349 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13351 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13352 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13354 if (sibcall && TARGET_64BIT
13355 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13358 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13359 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13360 emit_move_insn (fnaddr, addr);
13361 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13364 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13366 call = gen_rtx_SET (VOIDmode, retval, call);
13369 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13370 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13371 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13374 call = emit_call_insn (call);
13376 CALL_INSN_FUNCTION_USAGE (call) = use;
13380 /* Clear stack slot assignments remembered from previous functions.
13381 This is called from INIT_EXPANDERS once before RTL is emitted for each
13384 static struct machine_function *
13385 ix86_init_machine_status (void)
13387 struct machine_function *f;
13389 f = ggc_alloc_cleared (sizeof (struct machine_function));
13390 f->use_fast_prologue_epilogue_nregs = -1;
13391 f->tls_descriptor_call_expanded_p = 0;
13396 /* Return a MEM corresponding to a stack slot with mode MODE.
13397 Allocate a new slot if necessary.
13399 The RTL for a function can have several slots available: N is
13400 which slot to use. */
13403 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13405 struct stack_local_entry *s;
13407 gcc_assert (n < MAX_386_STACK_LOCALS);
13409 for (s = ix86_stack_locals; s; s = s->next)
13410 if (s->mode == mode && s->n == n)
13413 s = (struct stack_local_entry *)
13414 ggc_alloc (sizeof (struct stack_local_entry));
13417 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13419 s->next = ix86_stack_locals;
13420 ix86_stack_locals = s;
13424 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13426 static GTY(()) rtx ix86_tls_symbol;
13428 ix86_tls_get_addr (void)
13431 if (!ix86_tls_symbol)
13433 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13434 (TARGET_ANY_GNU_TLS
13436 ? "___tls_get_addr"
13437 : "__tls_get_addr");
13440 return ix86_tls_symbol;
13443 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13445 static GTY(()) rtx ix86_tls_module_base_symbol;
13447 ix86_tls_module_base (void)
13450 if (!ix86_tls_module_base_symbol)
13452 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13453 "_TLS_MODULE_BASE_");
13454 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13455 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13458 return ix86_tls_module_base_symbol;
13461 /* Calculate the length of the memory address in the instruction
13462 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13465 memory_address_length (rtx addr)
13467 struct ix86_address parts;
13468 rtx base, index, disp;
13472 if (GET_CODE (addr) == PRE_DEC
13473 || GET_CODE (addr) == POST_INC
13474 || GET_CODE (addr) == PRE_MODIFY
13475 || GET_CODE (addr) == POST_MODIFY)
13478 ok = ix86_decompose_address (addr, &parts);
13481 if (parts.base && GET_CODE (parts.base) == SUBREG)
13482 parts.base = SUBREG_REG (parts.base);
13483 if (parts.index && GET_CODE (parts.index) == SUBREG)
13484 parts.index = SUBREG_REG (parts.index);
13487 index = parts.index;
13492 - esp as the base always wants an index,
13493 - ebp as the base always wants a displacement. */
13495 /* Register Indirect. */
13496 if (base && !index && !disp)
13498 /* esp (for its index) and ebp (for its displacement) need
13499 the two-byte modrm form. */
13500 if (addr == stack_pointer_rtx
13501 || addr == arg_pointer_rtx
13502 || addr == frame_pointer_rtx
13503 || addr == hard_frame_pointer_rtx)
13507 /* Direct Addressing. */
13508 else if (disp && !base && !index)
13513 /* Find the length of the displacement constant. */
13516 if (base && satisfies_constraint_K (disp))
13521 /* ebp always wants a displacement. */
13522 else if (base == hard_frame_pointer_rtx)
13525 /* An index requires the two-byte modrm form.... */
13527 /* ...like esp, which always wants an index. */
13528 || base == stack_pointer_rtx
13529 || base == arg_pointer_rtx
13530 || base == frame_pointer_rtx)
13537 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13538 is set, expect that insn have 8bit immediate alternative. */
13540 ix86_attr_length_immediate_default (rtx insn, int shortform)
13544 extract_insn_cached (insn);
13545 for (i = recog_data.n_operands - 1; i >= 0; --i)
13546 if (CONSTANT_P (recog_data.operand[i]))
13549 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13553 switch (get_attr_mode (insn))
13564 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13569 fatal_insn ("unknown insn mode", insn);
13575 /* Compute default value for "length_address" attribute. */
13577 ix86_attr_length_address_default (rtx insn)
13581 if (get_attr_type (insn) == TYPE_LEA)
13583 rtx set = PATTERN (insn);
13585 if (GET_CODE (set) == PARALLEL)
13586 set = XVECEXP (set, 0, 0);
13588 gcc_assert (GET_CODE (set) == SET);
13590 return memory_address_length (SET_SRC (set));
13593 extract_insn_cached (insn);
13594 for (i = recog_data.n_operands - 1; i >= 0; --i)
13595 if (GET_CODE (recog_data.operand[i]) == MEM)
13597 return memory_address_length (XEXP (recog_data.operand[i], 0));
13603 /* Return the maximum number of instructions a cpu can issue. */
13606 ix86_issue_rate (void)
13610 case PROCESSOR_PENTIUM:
13614 case PROCESSOR_PENTIUMPRO:
13615 case PROCESSOR_PENTIUM4:
13616 case PROCESSOR_ATHLON:
13618 case PROCESSOR_NOCONA:
13619 case PROCESSOR_GENERIC32:
13620 case PROCESSOR_GENERIC64:
13628 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13629 by DEP_INSN and nothing set by DEP_INSN. */
13632 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13636 /* Simplify the test for uninteresting insns. */
13637 if (insn_type != TYPE_SETCC
13638 && insn_type != TYPE_ICMOV
13639 && insn_type != TYPE_FCMOV
13640 && insn_type != TYPE_IBR)
13643 if ((set = single_set (dep_insn)) != 0)
13645 set = SET_DEST (set);
13648 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13649 && XVECLEN (PATTERN (dep_insn), 0) == 2
13650 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13651 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13653 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13654 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13659 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13662 /* This test is true if the dependent insn reads the flags but
13663 not any other potentially set register. */
13664 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13667 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13673 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13674 address with operands set by DEP_INSN. */
13677 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13681 if (insn_type == TYPE_LEA
13684 addr = PATTERN (insn);
13686 if (GET_CODE (addr) == PARALLEL)
13687 addr = XVECEXP (addr, 0, 0);
13689 gcc_assert (GET_CODE (addr) == SET);
13691 addr = SET_SRC (addr);
13696 extract_insn_cached (insn);
13697 for (i = recog_data.n_operands - 1; i >= 0; --i)
13698 if (GET_CODE (recog_data.operand[i]) == MEM)
13700 addr = XEXP (recog_data.operand[i], 0);
13707 return modified_in_p (addr, dep_insn);
13711 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13713 enum attr_type insn_type, dep_insn_type;
13714 enum attr_memory memory;
13716 int dep_insn_code_number;
13718 /* Anti and output dependencies have zero cost on all CPUs. */
13719 if (REG_NOTE_KIND (link) != 0)
13722 dep_insn_code_number = recog_memoized (dep_insn);
13724 /* If we can't recognize the insns, we can't really do anything. */
13725 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13728 insn_type = get_attr_type (insn);
13729 dep_insn_type = get_attr_type (dep_insn);
13733 case PROCESSOR_PENTIUM:
13734 /* Address Generation Interlock adds a cycle of latency. */
13735 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13738 /* ??? Compares pair with jump/setcc. */
13739 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13742 /* Floating point stores require value to be ready one cycle earlier. */
13743 if (insn_type == TYPE_FMOV
13744 && get_attr_memory (insn) == MEMORY_STORE
13745 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13749 case PROCESSOR_PENTIUMPRO:
13750 memory = get_attr_memory (insn);
13752 /* INT->FP conversion is expensive. */
13753 if (get_attr_fp_int_src (dep_insn))
13756 /* There is one cycle extra latency between an FP op and a store. */
13757 if (insn_type == TYPE_FMOV
13758 && (set = single_set (dep_insn)) != NULL_RTX
13759 && (set2 = single_set (insn)) != NULL_RTX
13760 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13761 && GET_CODE (SET_DEST (set2)) == MEM)
13764 /* Show ability of reorder buffer to hide latency of load by executing
13765 in parallel with previous instruction in case
13766 previous instruction is not needed to compute the address. */
13767 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13768 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13770 /* Claim moves to take one cycle, as core can issue one load
13771 at time and the next load can start cycle later. */
13772 if (dep_insn_type == TYPE_IMOV
13773 || dep_insn_type == TYPE_FMOV)
13781 memory = get_attr_memory (insn);
13783 /* The esp dependency is resolved before the instruction is really
13785 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13786 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13789 /* INT->FP conversion is expensive. */
13790 if (get_attr_fp_int_src (dep_insn))
13793 /* Show ability of reorder buffer to hide latency of load by executing
13794 in parallel with previous instruction in case
13795 previous instruction is not needed to compute the address. */
13796 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13797 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13799 /* Claim moves to take one cycle, as core can issue one load
13800 at time and the next load can start cycle later. */
13801 if (dep_insn_type == TYPE_IMOV
13802 || dep_insn_type == TYPE_FMOV)
13811 case PROCESSOR_ATHLON:
13813 case PROCESSOR_GENERIC32:
13814 case PROCESSOR_GENERIC64:
13815 memory = get_attr_memory (insn);
13817 /* Show ability of reorder buffer to hide latency of load by executing
13818 in parallel with previous instruction in case
13819 previous instruction is not needed to compute the address. */
13820 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13821 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13823 enum attr_unit unit = get_attr_unit (insn);
13826 /* Because of the difference between the length of integer and
13827 floating unit pipeline preparation stages, the memory operands
13828 for floating point are cheaper.
13830 ??? For Athlon it the difference is most probably 2. */
13831 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13834 loadcost = TARGET_ATHLON ? 2 : 0;
13836 if (cost >= loadcost)
13849 /* How many alternative schedules to try. This should be as wide as the
13850 scheduling freedom in the DFA, but no wider. Making this value too
13851 large results extra work for the scheduler. */
13854 ia32_multipass_dfa_lookahead (void)
13856 if (ix86_tune == PROCESSOR_PENTIUM)
13859 if (ix86_tune == PROCESSOR_PENTIUMPRO
13860 || ix86_tune == PROCESSOR_K6)
13868 /* Compute the alignment given to a constant that is being placed in memory.
13869 EXP is the constant and ALIGN is the alignment that the object would
13871 The value of this function is used instead of that alignment to align
13875 ix86_constant_alignment (tree exp, int align)
13877 if (TREE_CODE (exp) == REAL_CST)
13879 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13881 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13884 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13885 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13886 return BITS_PER_WORD;
13891 /* Compute the alignment for a static variable.
13892 TYPE is the data type, and ALIGN is the alignment that
13893 the object would ordinarily have. The value of this function is used
13894 instead of that alignment to align the object. */
13897 ix86_data_alignment (tree type, int align)
13899 int max_align = optimize_size ? BITS_PER_WORD : 256;
13901 if (AGGREGATE_TYPE_P (type)
13902 && TYPE_SIZE (type)
13903 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13904 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13905 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13906 && align < max_align)
13909 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13910 to 16byte boundary. */
13913 if (AGGREGATE_TYPE_P (type)
13914 && TYPE_SIZE (type)
13915 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13916 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13917 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13921 if (TREE_CODE (type) == ARRAY_TYPE)
13923 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13925 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13928 else if (TREE_CODE (type) == COMPLEX_TYPE)
13931 if (TYPE_MODE (type) == DCmode && align < 64)
13933 if (TYPE_MODE (type) == XCmode && align < 128)
13936 else if ((TREE_CODE (type) == RECORD_TYPE
13937 || TREE_CODE (type) == UNION_TYPE
13938 || TREE_CODE (type) == QUAL_UNION_TYPE)
13939 && TYPE_FIELDS (type))
13941 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13943 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13946 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13947 || TREE_CODE (type) == INTEGER_TYPE)
13949 if (TYPE_MODE (type) == DFmode && align < 64)
13951 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13958 /* Compute the alignment for a local variable.
13959 TYPE is the data type, and ALIGN is the alignment that
13960 the object would ordinarily have. The value of this macro is used
13961 instead of that alignment to align the object. */
13964 ix86_local_alignment (tree type, int align)
13966 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13967 to 16byte boundary. */
13970 if (AGGREGATE_TYPE_P (type)
13971 && TYPE_SIZE (type)
13972 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13973 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13974 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13977 if (TREE_CODE (type) == ARRAY_TYPE)
13979 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13981 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13984 else if (TREE_CODE (type) == COMPLEX_TYPE)
13986 if (TYPE_MODE (type) == DCmode && align < 64)
13988 if (TYPE_MODE (type) == XCmode && align < 128)
13991 else if ((TREE_CODE (type) == RECORD_TYPE
13992 || TREE_CODE (type) == UNION_TYPE
13993 || TREE_CODE (type) == QUAL_UNION_TYPE)
13994 && TYPE_FIELDS (type))
13996 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13998 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14001 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14002 || TREE_CODE (type) == INTEGER_TYPE)
14005 if (TYPE_MODE (type) == DFmode && align < 64)
14007 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14013 /* Emit RTL insns to initialize the variable parts of a trampoline.
14014 FNADDR is an RTX for the address of the function's pure code.
14015 CXT is an RTX for the static chain value for the function. */
14017 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14021 /* Compute offset from the end of the jmp to the target function. */
14022 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14023 plus_constant (tramp, 10),
14024 NULL_RTX, 1, OPTAB_DIRECT);
14025 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14026 gen_int_mode (0xb9, QImode));
14027 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14028 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14029 gen_int_mode (0xe9, QImode));
14030 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14035 /* Try to load address using shorter movl instead of movabs.
14036 We may want to support movq for kernel mode, but kernel does not use
14037 trampolines at the moment. */
14038 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14040 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14041 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14042 gen_int_mode (0xbb41, HImode));
14043 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14044 gen_lowpart (SImode, fnaddr));
14049 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14050 gen_int_mode (0xbb49, HImode));
14051 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14055 /* Load static chain using movabs to r10. */
14056 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14057 gen_int_mode (0xba49, HImode));
14058 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14061 /* Jump to the r11 */
14062 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14063 gen_int_mode (0xff49, HImode));
14064 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14065 gen_int_mode (0xe3, QImode));
14067 gcc_assert (offset <= TRAMPOLINE_SIZE);
14070 #ifdef ENABLE_EXECUTE_STACK
14071 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14072 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14076 /* Codes for all the SSE/MMX builtins. */
14079 IX86_BUILTIN_ADDPS,
14080 IX86_BUILTIN_ADDSS,
14081 IX86_BUILTIN_DIVPS,
14082 IX86_BUILTIN_DIVSS,
14083 IX86_BUILTIN_MULPS,
14084 IX86_BUILTIN_MULSS,
14085 IX86_BUILTIN_SUBPS,
14086 IX86_BUILTIN_SUBSS,
14088 IX86_BUILTIN_CMPEQPS,
14089 IX86_BUILTIN_CMPLTPS,
14090 IX86_BUILTIN_CMPLEPS,
14091 IX86_BUILTIN_CMPGTPS,
14092 IX86_BUILTIN_CMPGEPS,
14093 IX86_BUILTIN_CMPNEQPS,
14094 IX86_BUILTIN_CMPNLTPS,
14095 IX86_BUILTIN_CMPNLEPS,
14096 IX86_BUILTIN_CMPNGTPS,
14097 IX86_BUILTIN_CMPNGEPS,
14098 IX86_BUILTIN_CMPORDPS,
14099 IX86_BUILTIN_CMPUNORDPS,
14100 IX86_BUILTIN_CMPEQSS,
14101 IX86_BUILTIN_CMPLTSS,
14102 IX86_BUILTIN_CMPLESS,
14103 IX86_BUILTIN_CMPNEQSS,
14104 IX86_BUILTIN_CMPNLTSS,
14105 IX86_BUILTIN_CMPNLESS,
14106 IX86_BUILTIN_CMPNGTSS,
14107 IX86_BUILTIN_CMPNGESS,
14108 IX86_BUILTIN_CMPORDSS,
14109 IX86_BUILTIN_CMPUNORDSS,
14111 IX86_BUILTIN_COMIEQSS,
14112 IX86_BUILTIN_COMILTSS,
14113 IX86_BUILTIN_COMILESS,
14114 IX86_BUILTIN_COMIGTSS,
14115 IX86_BUILTIN_COMIGESS,
14116 IX86_BUILTIN_COMINEQSS,
14117 IX86_BUILTIN_UCOMIEQSS,
14118 IX86_BUILTIN_UCOMILTSS,
14119 IX86_BUILTIN_UCOMILESS,
14120 IX86_BUILTIN_UCOMIGTSS,
14121 IX86_BUILTIN_UCOMIGESS,
14122 IX86_BUILTIN_UCOMINEQSS,
14124 IX86_BUILTIN_CVTPI2PS,
14125 IX86_BUILTIN_CVTPS2PI,
14126 IX86_BUILTIN_CVTSI2SS,
14127 IX86_BUILTIN_CVTSI642SS,
14128 IX86_BUILTIN_CVTSS2SI,
14129 IX86_BUILTIN_CVTSS2SI64,
14130 IX86_BUILTIN_CVTTPS2PI,
14131 IX86_BUILTIN_CVTTSS2SI,
14132 IX86_BUILTIN_CVTTSS2SI64,
14134 IX86_BUILTIN_MAXPS,
14135 IX86_BUILTIN_MAXSS,
14136 IX86_BUILTIN_MINPS,
14137 IX86_BUILTIN_MINSS,
14139 IX86_BUILTIN_LOADUPS,
14140 IX86_BUILTIN_STOREUPS,
14141 IX86_BUILTIN_MOVSS,
14143 IX86_BUILTIN_MOVHLPS,
14144 IX86_BUILTIN_MOVLHPS,
14145 IX86_BUILTIN_LOADHPS,
14146 IX86_BUILTIN_LOADLPS,
14147 IX86_BUILTIN_STOREHPS,
14148 IX86_BUILTIN_STORELPS,
14150 IX86_BUILTIN_MASKMOVQ,
14151 IX86_BUILTIN_MOVMSKPS,
14152 IX86_BUILTIN_PMOVMSKB,
14154 IX86_BUILTIN_MOVNTPS,
14155 IX86_BUILTIN_MOVNTQ,
14157 IX86_BUILTIN_LOADDQU,
14158 IX86_BUILTIN_STOREDQU,
14160 IX86_BUILTIN_PACKSSWB,
14161 IX86_BUILTIN_PACKSSDW,
14162 IX86_BUILTIN_PACKUSWB,
14164 IX86_BUILTIN_PADDB,
14165 IX86_BUILTIN_PADDW,
14166 IX86_BUILTIN_PADDD,
14167 IX86_BUILTIN_PADDQ,
14168 IX86_BUILTIN_PADDSB,
14169 IX86_BUILTIN_PADDSW,
14170 IX86_BUILTIN_PADDUSB,
14171 IX86_BUILTIN_PADDUSW,
14172 IX86_BUILTIN_PSUBB,
14173 IX86_BUILTIN_PSUBW,
14174 IX86_BUILTIN_PSUBD,
14175 IX86_BUILTIN_PSUBQ,
14176 IX86_BUILTIN_PSUBSB,
14177 IX86_BUILTIN_PSUBSW,
14178 IX86_BUILTIN_PSUBUSB,
14179 IX86_BUILTIN_PSUBUSW,
14182 IX86_BUILTIN_PANDN,
14186 IX86_BUILTIN_PAVGB,
14187 IX86_BUILTIN_PAVGW,
14189 IX86_BUILTIN_PCMPEQB,
14190 IX86_BUILTIN_PCMPEQW,
14191 IX86_BUILTIN_PCMPEQD,
14192 IX86_BUILTIN_PCMPGTB,
14193 IX86_BUILTIN_PCMPGTW,
14194 IX86_BUILTIN_PCMPGTD,
14196 IX86_BUILTIN_PMADDWD,
14198 IX86_BUILTIN_PMAXSW,
14199 IX86_BUILTIN_PMAXUB,
14200 IX86_BUILTIN_PMINSW,
14201 IX86_BUILTIN_PMINUB,
14203 IX86_BUILTIN_PMULHUW,
14204 IX86_BUILTIN_PMULHW,
14205 IX86_BUILTIN_PMULLW,
14207 IX86_BUILTIN_PSADBW,
14208 IX86_BUILTIN_PSHUFW,
14210 IX86_BUILTIN_PSLLW,
14211 IX86_BUILTIN_PSLLD,
14212 IX86_BUILTIN_PSLLQ,
14213 IX86_BUILTIN_PSRAW,
14214 IX86_BUILTIN_PSRAD,
14215 IX86_BUILTIN_PSRLW,
14216 IX86_BUILTIN_PSRLD,
14217 IX86_BUILTIN_PSRLQ,
14218 IX86_BUILTIN_PSLLWI,
14219 IX86_BUILTIN_PSLLDI,
14220 IX86_BUILTIN_PSLLQI,
14221 IX86_BUILTIN_PSRAWI,
14222 IX86_BUILTIN_PSRADI,
14223 IX86_BUILTIN_PSRLWI,
14224 IX86_BUILTIN_PSRLDI,
14225 IX86_BUILTIN_PSRLQI,
14227 IX86_BUILTIN_PUNPCKHBW,
14228 IX86_BUILTIN_PUNPCKHWD,
14229 IX86_BUILTIN_PUNPCKHDQ,
14230 IX86_BUILTIN_PUNPCKLBW,
14231 IX86_BUILTIN_PUNPCKLWD,
14232 IX86_BUILTIN_PUNPCKLDQ,
14234 IX86_BUILTIN_SHUFPS,
14236 IX86_BUILTIN_RCPPS,
14237 IX86_BUILTIN_RCPSS,
14238 IX86_BUILTIN_RSQRTPS,
14239 IX86_BUILTIN_RSQRTSS,
14240 IX86_BUILTIN_SQRTPS,
14241 IX86_BUILTIN_SQRTSS,
14243 IX86_BUILTIN_UNPCKHPS,
14244 IX86_BUILTIN_UNPCKLPS,
14246 IX86_BUILTIN_ANDPS,
14247 IX86_BUILTIN_ANDNPS,
14249 IX86_BUILTIN_XORPS,
14252 IX86_BUILTIN_LDMXCSR,
14253 IX86_BUILTIN_STMXCSR,
14254 IX86_BUILTIN_SFENCE,
14256 /* 3DNow! Original */
14257 IX86_BUILTIN_FEMMS,
14258 IX86_BUILTIN_PAVGUSB,
14259 IX86_BUILTIN_PF2ID,
14260 IX86_BUILTIN_PFACC,
14261 IX86_BUILTIN_PFADD,
14262 IX86_BUILTIN_PFCMPEQ,
14263 IX86_BUILTIN_PFCMPGE,
14264 IX86_BUILTIN_PFCMPGT,
14265 IX86_BUILTIN_PFMAX,
14266 IX86_BUILTIN_PFMIN,
14267 IX86_BUILTIN_PFMUL,
14268 IX86_BUILTIN_PFRCP,
14269 IX86_BUILTIN_PFRCPIT1,
14270 IX86_BUILTIN_PFRCPIT2,
14271 IX86_BUILTIN_PFRSQIT1,
14272 IX86_BUILTIN_PFRSQRT,
14273 IX86_BUILTIN_PFSUB,
14274 IX86_BUILTIN_PFSUBR,
14275 IX86_BUILTIN_PI2FD,
14276 IX86_BUILTIN_PMULHRW,
14278 /* 3DNow! Athlon Extensions */
14279 IX86_BUILTIN_PF2IW,
14280 IX86_BUILTIN_PFNACC,
14281 IX86_BUILTIN_PFPNACC,
14282 IX86_BUILTIN_PI2FW,
14283 IX86_BUILTIN_PSWAPDSI,
14284 IX86_BUILTIN_PSWAPDSF,
14287 IX86_BUILTIN_ADDPD,
14288 IX86_BUILTIN_ADDSD,
14289 IX86_BUILTIN_DIVPD,
14290 IX86_BUILTIN_DIVSD,
14291 IX86_BUILTIN_MULPD,
14292 IX86_BUILTIN_MULSD,
14293 IX86_BUILTIN_SUBPD,
14294 IX86_BUILTIN_SUBSD,
14296 IX86_BUILTIN_CMPEQPD,
14297 IX86_BUILTIN_CMPLTPD,
14298 IX86_BUILTIN_CMPLEPD,
14299 IX86_BUILTIN_CMPGTPD,
14300 IX86_BUILTIN_CMPGEPD,
14301 IX86_BUILTIN_CMPNEQPD,
14302 IX86_BUILTIN_CMPNLTPD,
14303 IX86_BUILTIN_CMPNLEPD,
14304 IX86_BUILTIN_CMPNGTPD,
14305 IX86_BUILTIN_CMPNGEPD,
14306 IX86_BUILTIN_CMPORDPD,
14307 IX86_BUILTIN_CMPUNORDPD,
14308 IX86_BUILTIN_CMPNEPD,
14309 IX86_BUILTIN_CMPEQSD,
14310 IX86_BUILTIN_CMPLTSD,
14311 IX86_BUILTIN_CMPLESD,
14312 IX86_BUILTIN_CMPNEQSD,
14313 IX86_BUILTIN_CMPNLTSD,
14314 IX86_BUILTIN_CMPNLESD,
14315 IX86_BUILTIN_CMPORDSD,
14316 IX86_BUILTIN_CMPUNORDSD,
14317 IX86_BUILTIN_CMPNESD,
14319 IX86_BUILTIN_COMIEQSD,
14320 IX86_BUILTIN_COMILTSD,
14321 IX86_BUILTIN_COMILESD,
14322 IX86_BUILTIN_COMIGTSD,
14323 IX86_BUILTIN_COMIGESD,
14324 IX86_BUILTIN_COMINEQSD,
14325 IX86_BUILTIN_UCOMIEQSD,
14326 IX86_BUILTIN_UCOMILTSD,
14327 IX86_BUILTIN_UCOMILESD,
14328 IX86_BUILTIN_UCOMIGTSD,
14329 IX86_BUILTIN_UCOMIGESD,
14330 IX86_BUILTIN_UCOMINEQSD,
14332 IX86_BUILTIN_MAXPD,
14333 IX86_BUILTIN_MAXSD,
14334 IX86_BUILTIN_MINPD,
14335 IX86_BUILTIN_MINSD,
14337 IX86_BUILTIN_ANDPD,
14338 IX86_BUILTIN_ANDNPD,
14340 IX86_BUILTIN_XORPD,
14342 IX86_BUILTIN_SQRTPD,
14343 IX86_BUILTIN_SQRTSD,
14345 IX86_BUILTIN_UNPCKHPD,
14346 IX86_BUILTIN_UNPCKLPD,
14348 IX86_BUILTIN_SHUFPD,
14350 IX86_BUILTIN_LOADUPD,
14351 IX86_BUILTIN_STOREUPD,
14352 IX86_BUILTIN_MOVSD,
14354 IX86_BUILTIN_LOADHPD,
14355 IX86_BUILTIN_LOADLPD,
14357 IX86_BUILTIN_CVTDQ2PD,
14358 IX86_BUILTIN_CVTDQ2PS,
14360 IX86_BUILTIN_CVTPD2DQ,
14361 IX86_BUILTIN_CVTPD2PI,
14362 IX86_BUILTIN_CVTPD2PS,
14363 IX86_BUILTIN_CVTTPD2DQ,
14364 IX86_BUILTIN_CVTTPD2PI,
14366 IX86_BUILTIN_CVTPI2PD,
14367 IX86_BUILTIN_CVTSI2SD,
14368 IX86_BUILTIN_CVTSI642SD,
14370 IX86_BUILTIN_CVTSD2SI,
14371 IX86_BUILTIN_CVTSD2SI64,
14372 IX86_BUILTIN_CVTSD2SS,
14373 IX86_BUILTIN_CVTSS2SD,
14374 IX86_BUILTIN_CVTTSD2SI,
14375 IX86_BUILTIN_CVTTSD2SI64,
14377 IX86_BUILTIN_CVTPS2DQ,
14378 IX86_BUILTIN_CVTPS2PD,
14379 IX86_BUILTIN_CVTTPS2DQ,
14381 IX86_BUILTIN_MOVNTI,
14382 IX86_BUILTIN_MOVNTPD,
14383 IX86_BUILTIN_MOVNTDQ,
14386 IX86_BUILTIN_MASKMOVDQU,
14387 IX86_BUILTIN_MOVMSKPD,
14388 IX86_BUILTIN_PMOVMSKB128,
14390 IX86_BUILTIN_PACKSSWB128,
14391 IX86_BUILTIN_PACKSSDW128,
14392 IX86_BUILTIN_PACKUSWB128,
14394 IX86_BUILTIN_PADDB128,
14395 IX86_BUILTIN_PADDW128,
14396 IX86_BUILTIN_PADDD128,
14397 IX86_BUILTIN_PADDQ128,
14398 IX86_BUILTIN_PADDSB128,
14399 IX86_BUILTIN_PADDSW128,
14400 IX86_BUILTIN_PADDUSB128,
14401 IX86_BUILTIN_PADDUSW128,
14402 IX86_BUILTIN_PSUBB128,
14403 IX86_BUILTIN_PSUBW128,
14404 IX86_BUILTIN_PSUBD128,
14405 IX86_BUILTIN_PSUBQ128,
14406 IX86_BUILTIN_PSUBSB128,
14407 IX86_BUILTIN_PSUBSW128,
14408 IX86_BUILTIN_PSUBUSB128,
14409 IX86_BUILTIN_PSUBUSW128,
14411 IX86_BUILTIN_PAND128,
14412 IX86_BUILTIN_PANDN128,
14413 IX86_BUILTIN_POR128,
14414 IX86_BUILTIN_PXOR128,
14416 IX86_BUILTIN_PAVGB128,
14417 IX86_BUILTIN_PAVGW128,
14419 IX86_BUILTIN_PCMPEQB128,
14420 IX86_BUILTIN_PCMPEQW128,
14421 IX86_BUILTIN_PCMPEQD128,
14422 IX86_BUILTIN_PCMPGTB128,
14423 IX86_BUILTIN_PCMPGTW128,
14424 IX86_BUILTIN_PCMPGTD128,
14426 IX86_BUILTIN_PMADDWD128,
14428 IX86_BUILTIN_PMAXSW128,
14429 IX86_BUILTIN_PMAXUB128,
14430 IX86_BUILTIN_PMINSW128,
14431 IX86_BUILTIN_PMINUB128,
14433 IX86_BUILTIN_PMULUDQ,
14434 IX86_BUILTIN_PMULUDQ128,
14435 IX86_BUILTIN_PMULHUW128,
14436 IX86_BUILTIN_PMULHW128,
14437 IX86_BUILTIN_PMULLW128,
14439 IX86_BUILTIN_PSADBW128,
14440 IX86_BUILTIN_PSHUFHW,
14441 IX86_BUILTIN_PSHUFLW,
14442 IX86_BUILTIN_PSHUFD,
14444 IX86_BUILTIN_PSLLW128,
14445 IX86_BUILTIN_PSLLD128,
14446 IX86_BUILTIN_PSLLQ128,
14447 IX86_BUILTIN_PSRAW128,
14448 IX86_BUILTIN_PSRAD128,
14449 IX86_BUILTIN_PSRLW128,
14450 IX86_BUILTIN_PSRLD128,
14451 IX86_BUILTIN_PSRLQ128,
14452 IX86_BUILTIN_PSLLDQI128,
14453 IX86_BUILTIN_PSLLWI128,
14454 IX86_BUILTIN_PSLLDI128,
14455 IX86_BUILTIN_PSLLQI128,
14456 IX86_BUILTIN_PSRAWI128,
14457 IX86_BUILTIN_PSRADI128,
14458 IX86_BUILTIN_PSRLDQI128,
14459 IX86_BUILTIN_PSRLWI128,
14460 IX86_BUILTIN_PSRLDI128,
14461 IX86_BUILTIN_PSRLQI128,
14463 IX86_BUILTIN_PUNPCKHBW128,
14464 IX86_BUILTIN_PUNPCKHWD128,
14465 IX86_BUILTIN_PUNPCKHDQ128,
14466 IX86_BUILTIN_PUNPCKHQDQ128,
14467 IX86_BUILTIN_PUNPCKLBW128,
14468 IX86_BUILTIN_PUNPCKLWD128,
14469 IX86_BUILTIN_PUNPCKLDQ128,
14470 IX86_BUILTIN_PUNPCKLQDQ128,
14472 IX86_BUILTIN_CLFLUSH,
14473 IX86_BUILTIN_MFENCE,
14474 IX86_BUILTIN_LFENCE,
14476 /* Prescott New Instructions. */
14477 IX86_BUILTIN_ADDSUBPS,
14478 IX86_BUILTIN_HADDPS,
14479 IX86_BUILTIN_HSUBPS,
14480 IX86_BUILTIN_MOVSHDUP,
14481 IX86_BUILTIN_MOVSLDUP,
14482 IX86_BUILTIN_ADDSUBPD,
14483 IX86_BUILTIN_HADDPD,
14484 IX86_BUILTIN_HSUBPD,
14485 IX86_BUILTIN_LDDQU,
14487 IX86_BUILTIN_MONITOR,
14488 IX86_BUILTIN_MWAIT,
14490 IX86_BUILTIN_VEC_INIT_V2SI,
14491 IX86_BUILTIN_VEC_INIT_V4HI,
14492 IX86_BUILTIN_VEC_INIT_V8QI,
14493 IX86_BUILTIN_VEC_EXT_V2DF,
14494 IX86_BUILTIN_VEC_EXT_V2DI,
14495 IX86_BUILTIN_VEC_EXT_V4SF,
14496 IX86_BUILTIN_VEC_EXT_V4SI,
14497 IX86_BUILTIN_VEC_EXT_V8HI,
14498 IX86_BUILTIN_VEC_EXT_V2SI,
14499 IX86_BUILTIN_VEC_EXT_V4HI,
14500 IX86_BUILTIN_VEC_SET_V8HI,
14501 IX86_BUILTIN_VEC_SET_V4HI,
14506 #define def_builtin(MASK, NAME, TYPE, CODE) \
14508 if ((MASK) & target_flags \
14509 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14510 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14511 NULL, NULL_TREE); \
14514 /* Bits for builtin_description.flag. */
14516 /* Set when we don't support the comparison natively, and should
14517 swap_comparison in order to support it. */
14518 #define BUILTIN_DESC_SWAP_OPERANDS 1
14520 struct builtin_description
14522 const unsigned int mask;
14523 const enum insn_code icode;
14524 const char *const name;
14525 const enum ix86_builtins code;
14526 const enum rtx_code comparison;
14527 const unsigned int flag;
14530 static const struct builtin_description bdesc_comi[] =
14532 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14533 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14534 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14535 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14536 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14537 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14538 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14539 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14540 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14541 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14542 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14543 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14544 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14545 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14546 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14547 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14548 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14549 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14550 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14551 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14552 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14553 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14554 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14555 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14558 static const struct builtin_description bdesc_2arg[] =
14561 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14562 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14563 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14564 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14565 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14566 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14567 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14568 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14570 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14571 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14572 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14573 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14574 BUILTIN_DESC_SWAP_OPERANDS },
14575 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14576 BUILTIN_DESC_SWAP_OPERANDS },
14577 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14578 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14579 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14580 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14581 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14582 BUILTIN_DESC_SWAP_OPERANDS },
14583 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14584 BUILTIN_DESC_SWAP_OPERANDS },
14585 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14586 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14587 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14588 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14589 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14590 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14591 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14592 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14593 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14594 BUILTIN_DESC_SWAP_OPERANDS },
14595 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14596 BUILTIN_DESC_SWAP_OPERANDS },
14597 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14599 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14600 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14601 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14602 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14604 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14605 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14606 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14607 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14609 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14610 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14611 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14612 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14613 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14616 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14617 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14618 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14619 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14620 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14621 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14622 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14623 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14625 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14626 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14627 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14628 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14629 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14630 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14631 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14632 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14634 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14635 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14636 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14638 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14639 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14640 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14641 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14643 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14644 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14646 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14647 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14648 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14649 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14650 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14651 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14653 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14654 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14655 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14656 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14658 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14659 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14660 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14661 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14662 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14663 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14666 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14667 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14668 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14670 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14671 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14672 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14674 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14675 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14676 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14677 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14678 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14679 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14681 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14682 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14683 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14684 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14685 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14686 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14688 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14689 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14690 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14691 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14693 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14694 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14697 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14698 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14699 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14700 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14701 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14702 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14703 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14704 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14706 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14707 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14708 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14709 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14710 BUILTIN_DESC_SWAP_OPERANDS },
14711 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14712 BUILTIN_DESC_SWAP_OPERANDS },
14713 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14714 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14715 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14716 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14717 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14718 BUILTIN_DESC_SWAP_OPERANDS },
14719 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14720 BUILTIN_DESC_SWAP_OPERANDS },
14721 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14722 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14723 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14724 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14725 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14726 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14727 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14728 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14729 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14731 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14732 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14733 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14734 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14736 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14737 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14738 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14739 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14741 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14742 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14743 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14746 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14747 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14748 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14749 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14750 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14751 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14752 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14753 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14755 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14756 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14757 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14758 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14759 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14760 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14761 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14762 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14764 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14765 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14767 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14768 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14769 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14770 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14772 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14773 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14775 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14776 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14777 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14778 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14779 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14780 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14782 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14783 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14784 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14785 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14787 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14788 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14789 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14790 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14791 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14792 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14793 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14794 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14796 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14797 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14798 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14800 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14801 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14806 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14807 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14808 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14810 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14811 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14812 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14814 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14815 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14819 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14820 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14822 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14825 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14826 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14827 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14828 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14829 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14830 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14833 static const struct builtin_description bdesc_1arg[] =
14835 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14836 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14838 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14839 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14840 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14842 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14843 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14844 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14845 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14846 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14847 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14849 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14850 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14852 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14854 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14855 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14858 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14859 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14860 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14861 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14863 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14865 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14867 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14868 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14870 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14871 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14872 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14875 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14876 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14880 ix86_init_builtins (void)
14883 ix86_init_mmx_sse_builtins ();
14886 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14887 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14890 ix86_init_mmx_sse_builtins (void)
14892 const struct builtin_description * d;
14895 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14896 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14897 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14898 tree V2DI_type_node
14899 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14900 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14901 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14902 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14903 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14904 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14905 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14907 tree pchar_type_node = build_pointer_type (char_type_node);
14908 tree pcchar_type_node = build_pointer_type (
14909 build_type_variant (char_type_node, 1, 0));
14910 tree pfloat_type_node = build_pointer_type (float_type_node);
14911 tree pcfloat_type_node = build_pointer_type (
14912 build_type_variant (float_type_node, 1, 0));
14913 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14914 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14915 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14918 tree int_ftype_v4sf_v4sf
14919 = build_function_type_list (integer_type_node,
14920 V4SF_type_node, V4SF_type_node, NULL_TREE);
14921 tree v4si_ftype_v4sf_v4sf
14922 = build_function_type_list (V4SI_type_node,
14923 V4SF_type_node, V4SF_type_node, NULL_TREE);
14924 /* MMX/SSE/integer conversions. */
14925 tree int_ftype_v4sf
14926 = build_function_type_list (integer_type_node,
14927 V4SF_type_node, NULL_TREE);
14928 tree int64_ftype_v4sf
14929 = build_function_type_list (long_long_integer_type_node,
14930 V4SF_type_node, NULL_TREE);
14931 tree int_ftype_v8qi
14932 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14933 tree v4sf_ftype_v4sf_int
14934 = build_function_type_list (V4SF_type_node,
14935 V4SF_type_node, integer_type_node, NULL_TREE);
14936 tree v4sf_ftype_v4sf_int64
14937 = build_function_type_list (V4SF_type_node,
14938 V4SF_type_node, long_long_integer_type_node,
14940 tree v4sf_ftype_v4sf_v2si
14941 = build_function_type_list (V4SF_type_node,
14942 V4SF_type_node, V2SI_type_node, NULL_TREE);
14944 /* Miscellaneous. */
14945 tree v8qi_ftype_v4hi_v4hi
14946 = build_function_type_list (V8QI_type_node,
14947 V4HI_type_node, V4HI_type_node, NULL_TREE);
14948 tree v4hi_ftype_v2si_v2si
14949 = build_function_type_list (V4HI_type_node,
14950 V2SI_type_node, V2SI_type_node, NULL_TREE);
14951 tree v4sf_ftype_v4sf_v4sf_int
14952 = build_function_type_list (V4SF_type_node,
14953 V4SF_type_node, V4SF_type_node,
14954 integer_type_node, NULL_TREE);
14955 tree v2si_ftype_v4hi_v4hi
14956 = build_function_type_list (V2SI_type_node,
14957 V4HI_type_node, V4HI_type_node, NULL_TREE);
14958 tree v4hi_ftype_v4hi_int
14959 = build_function_type_list (V4HI_type_node,
14960 V4HI_type_node, integer_type_node, NULL_TREE);
14961 tree v4hi_ftype_v4hi_di
14962 = build_function_type_list (V4HI_type_node,
14963 V4HI_type_node, long_long_unsigned_type_node,
14965 tree v2si_ftype_v2si_di
14966 = build_function_type_list (V2SI_type_node,
14967 V2SI_type_node, long_long_unsigned_type_node,
14969 tree void_ftype_void
14970 = build_function_type (void_type_node, void_list_node);
14971 tree void_ftype_unsigned
14972 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14973 tree void_ftype_unsigned_unsigned
14974 = build_function_type_list (void_type_node, unsigned_type_node,
14975 unsigned_type_node, NULL_TREE);
14976 tree void_ftype_pcvoid_unsigned_unsigned
14977 = build_function_type_list (void_type_node, const_ptr_type_node,
14978 unsigned_type_node, unsigned_type_node,
14980 tree unsigned_ftype_void
14981 = build_function_type (unsigned_type_node, void_list_node);
14982 tree v2si_ftype_v4sf
14983 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14984 /* Loads/stores. */
14985 tree void_ftype_v8qi_v8qi_pchar
14986 = build_function_type_list (void_type_node,
14987 V8QI_type_node, V8QI_type_node,
14988 pchar_type_node, NULL_TREE);
14989 tree v4sf_ftype_pcfloat
14990 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14991 /* @@@ the type is bogus */
14992 tree v4sf_ftype_v4sf_pv2si
14993 = build_function_type_list (V4SF_type_node,
14994 V4SF_type_node, pv2si_type_node, NULL_TREE);
14995 tree void_ftype_pv2si_v4sf
14996 = build_function_type_list (void_type_node,
14997 pv2si_type_node, V4SF_type_node, NULL_TREE);
14998 tree void_ftype_pfloat_v4sf
14999 = build_function_type_list (void_type_node,
15000 pfloat_type_node, V4SF_type_node, NULL_TREE);
15001 tree void_ftype_pdi_di
15002 = build_function_type_list (void_type_node,
15003 pdi_type_node, long_long_unsigned_type_node,
15005 tree void_ftype_pv2di_v2di
15006 = build_function_type_list (void_type_node,
15007 pv2di_type_node, V2DI_type_node, NULL_TREE);
15008 /* Normal vector unops. */
15009 tree v4sf_ftype_v4sf
15010 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15012 /* Normal vector binops. */
15013 tree v4sf_ftype_v4sf_v4sf
15014 = build_function_type_list (V4SF_type_node,
15015 V4SF_type_node, V4SF_type_node, NULL_TREE);
15016 tree v8qi_ftype_v8qi_v8qi
15017 = build_function_type_list (V8QI_type_node,
15018 V8QI_type_node, V8QI_type_node, NULL_TREE);
15019 tree v4hi_ftype_v4hi_v4hi
15020 = build_function_type_list (V4HI_type_node,
15021 V4HI_type_node, V4HI_type_node, NULL_TREE);
15022 tree v2si_ftype_v2si_v2si
15023 = build_function_type_list (V2SI_type_node,
15024 V2SI_type_node, V2SI_type_node, NULL_TREE);
15025 tree di_ftype_di_di
15026 = build_function_type_list (long_long_unsigned_type_node,
15027 long_long_unsigned_type_node,
15028 long_long_unsigned_type_node, NULL_TREE);
15030 tree v2si_ftype_v2sf
15031 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15032 tree v2sf_ftype_v2si
15033 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15034 tree v2si_ftype_v2si
15035 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15036 tree v2sf_ftype_v2sf
15037 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15038 tree v2sf_ftype_v2sf_v2sf
15039 = build_function_type_list (V2SF_type_node,
15040 V2SF_type_node, V2SF_type_node, NULL_TREE);
15041 tree v2si_ftype_v2sf_v2sf
15042 = build_function_type_list (V2SI_type_node,
15043 V2SF_type_node, V2SF_type_node, NULL_TREE);
15044 tree pint_type_node = build_pointer_type (integer_type_node);
15045 tree pdouble_type_node = build_pointer_type (double_type_node);
15046 tree pcdouble_type_node = build_pointer_type (
15047 build_type_variant (double_type_node, 1, 0));
15048 tree int_ftype_v2df_v2df
15049 = build_function_type_list (integer_type_node,
15050 V2DF_type_node, V2DF_type_node, NULL_TREE);
15052 tree void_ftype_pcvoid
15053 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15054 tree v4sf_ftype_v4si
15055 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15056 tree v4si_ftype_v4sf
15057 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15058 tree v2df_ftype_v4si
15059 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15060 tree v4si_ftype_v2df
15061 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15062 tree v2si_ftype_v2df
15063 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15064 tree v4sf_ftype_v2df
15065 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15066 tree v2df_ftype_v2si
15067 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15068 tree v2df_ftype_v4sf
15069 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15070 tree int_ftype_v2df
15071 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15072 tree int64_ftype_v2df
15073 = build_function_type_list (long_long_integer_type_node,
15074 V2DF_type_node, NULL_TREE);
15075 tree v2df_ftype_v2df_int
15076 = build_function_type_list (V2DF_type_node,
15077 V2DF_type_node, integer_type_node, NULL_TREE);
15078 tree v2df_ftype_v2df_int64
15079 = build_function_type_list (V2DF_type_node,
15080 V2DF_type_node, long_long_integer_type_node,
15082 tree v4sf_ftype_v4sf_v2df
15083 = build_function_type_list (V4SF_type_node,
15084 V4SF_type_node, V2DF_type_node, NULL_TREE);
15085 tree v2df_ftype_v2df_v4sf
15086 = build_function_type_list (V2DF_type_node,
15087 V2DF_type_node, V4SF_type_node, NULL_TREE);
15088 tree v2df_ftype_v2df_v2df_int
15089 = build_function_type_list (V2DF_type_node,
15090 V2DF_type_node, V2DF_type_node,
15093 tree v2df_ftype_v2df_pcdouble
15094 = build_function_type_list (V2DF_type_node,
15095 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15096 tree void_ftype_pdouble_v2df
15097 = build_function_type_list (void_type_node,
15098 pdouble_type_node, V2DF_type_node, NULL_TREE);
15099 tree void_ftype_pint_int
15100 = build_function_type_list (void_type_node,
15101 pint_type_node, integer_type_node, NULL_TREE);
15102 tree void_ftype_v16qi_v16qi_pchar
15103 = build_function_type_list (void_type_node,
15104 V16QI_type_node, V16QI_type_node,
15105 pchar_type_node, NULL_TREE);
15106 tree v2df_ftype_pcdouble
15107 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15108 tree v2df_ftype_v2df_v2df
15109 = build_function_type_list (V2DF_type_node,
15110 V2DF_type_node, V2DF_type_node, NULL_TREE);
15111 tree v16qi_ftype_v16qi_v16qi
15112 = build_function_type_list (V16QI_type_node,
15113 V16QI_type_node, V16QI_type_node, NULL_TREE);
15114 tree v8hi_ftype_v8hi_v8hi
15115 = build_function_type_list (V8HI_type_node,
15116 V8HI_type_node, V8HI_type_node, NULL_TREE);
15117 tree v4si_ftype_v4si_v4si
15118 = build_function_type_list (V4SI_type_node,
15119 V4SI_type_node, V4SI_type_node, NULL_TREE);
15120 tree v2di_ftype_v2di_v2di
15121 = build_function_type_list (V2DI_type_node,
15122 V2DI_type_node, V2DI_type_node, NULL_TREE);
15123 tree v2di_ftype_v2df_v2df
15124 = build_function_type_list (V2DI_type_node,
15125 V2DF_type_node, V2DF_type_node, NULL_TREE);
15126 tree v2df_ftype_v2df
15127 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15128 tree v2di_ftype_v2di_int
15129 = build_function_type_list (V2DI_type_node,
15130 V2DI_type_node, integer_type_node, NULL_TREE);
15131 tree v4si_ftype_v4si_int
15132 = build_function_type_list (V4SI_type_node,
15133 V4SI_type_node, integer_type_node, NULL_TREE);
15134 tree v8hi_ftype_v8hi_int
15135 = build_function_type_list (V8HI_type_node,
15136 V8HI_type_node, integer_type_node, NULL_TREE);
15137 tree v8hi_ftype_v8hi_v2di
15138 = build_function_type_list (V8HI_type_node,
15139 V8HI_type_node, V2DI_type_node, NULL_TREE);
15140 tree v4si_ftype_v4si_v2di
15141 = build_function_type_list (V4SI_type_node,
15142 V4SI_type_node, V2DI_type_node, NULL_TREE);
15143 tree v4si_ftype_v8hi_v8hi
15144 = build_function_type_list (V4SI_type_node,
15145 V8HI_type_node, V8HI_type_node, NULL_TREE);
15146 tree di_ftype_v8qi_v8qi
15147 = build_function_type_list (long_long_unsigned_type_node,
15148 V8QI_type_node, V8QI_type_node, NULL_TREE);
15149 tree di_ftype_v2si_v2si
15150 = build_function_type_list (long_long_unsigned_type_node,
15151 V2SI_type_node, V2SI_type_node, NULL_TREE);
15152 tree v2di_ftype_v16qi_v16qi
15153 = build_function_type_list (V2DI_type_node,
15154 V16QI_type_node, V16QI_type_node, NULL_TREE);
15155 tree v2di_ftype_v4si_v4si
15156 = build_function_type_list (V2DI_type_node,
15157 V4SI_type_node, V4SI_type_node, NULL_TREE);
15158 tree int_ftype_v16qi
15159 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15160 tree v16qi_ftype_pcchar
15161 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15162 tree void_ftype_pchar_v16qi
15163 = build_function_type_list (void_type_node,
15164 pchar_type_node, V16QI_type_node, NULL_TREE);
15167 tree float128_type;
15170 /* The __float80 type. */
15171 if (TYPE_MODE (long_double_type_node) == XFmode)
15172 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15176 /* The __float80 type. */
15177 float80_type = make_node (REAL_TYPE);
15178 TYPE_PRECISION (float80_type) = 80;
15179 layout_type (float80_type);
15180 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15185 float128_type = make_node (REAL_TYPE);
15186 TYPE_PRECISION (float128_type) = 128;
15187 layout_type (float128_type);
15188 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15191 /* Add all builtins that are more or less simple operations on two
15193 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15195 /* Use one of the operands; the target can have a different mode for
15196 mask-generating compares. */
15197 enum machine_mode mode;
15202 mode = insn_data[d->icode].operand[1].mode;
15207 type = v16qi_ftype_v16qi_v16qi;
15210 type = v8hi_ftype_v8hi_v8hi;
15213 type = v4si_ftype_v4si_v4si;
15216 type = v2di_ftype_v2di_v2di;
15219 type = v2df_ftype_v2df_v2df;
15222 type = v4sf_ftype_v4sf_v4sf;
15225 type = v8qi_ftype_v8qi_v8qi;
15228 type = v4hi_ftype_v4hi_v4hi;
15231 type = v2si_ftype_v2si_v2si;
15234 type = di_ftype_di_di;
15238 gcc_unreachable ();
15241 /* Override for comparisons. */
15242 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15243 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15244 type = v4si_ftype_v4sf_v4sf;
15246 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15247 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15248 type = v2di_ftype_v2df_v2df;
15250 def_builtin (d->mask, d->name, type, d->code);
15253 /* Add the remaining MMX insns with somewhat more complicated types. */
15254 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15255 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15256 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15257 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15259 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15260 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15261 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15263 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15264 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15266 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15267 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15269 /* comi/ucomi insns. */
15270 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15271 if (d->mask == MASK_SSE2)
15272 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15274 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15276 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15277 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15278 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15280 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15281 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15282 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15283 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15284 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15285 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15286 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15287 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15288 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15289 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15290 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15292 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15294 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15295 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15297 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15298 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15299 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15300 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15302 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15303 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15304 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15305 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15307 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15309 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15311 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15312 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15313 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15314 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15315 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15316 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15318 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15320 /* Original 3DNow! */
15321 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15322 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15323 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15324 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15325 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15326 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15327 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15328 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15329 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15330 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15331 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15332 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15333 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15334 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15335 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15336 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15337 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15338 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15339 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15340 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15342 /* 3DNow! extension as used in the Athlon CPU. */
15343 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15344 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15345 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15346 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15347 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15348 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15351 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15353 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15354 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15356 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15357 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15359 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15360 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15361 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15362 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15363 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15365 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15366 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15367 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15368 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15370 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15371 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15373 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15375 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15376 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15378 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15379 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15380 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15381 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15382 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15384 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15386 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15387 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15388 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15389 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15391 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15392 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15393 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15395 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15396 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15397 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15398 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15400 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15401 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15402 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15404 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15405 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15407 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15408 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15410 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15411 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15412 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15414 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15415 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15416 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15418 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15419 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15421 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15422 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15423 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15424 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15426 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15427 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15428 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15429 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15431 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15432 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15434 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15436 /* Prescott New Instructions. */
15437 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15438 void_ftype_pcvoid_unsigned_unsigned,
15439 IX86_BUILTIN_MONITOR);
15440 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15441 void_ftype_unsigned_unsigned,
15442 IX86_BUILTIN_MWAIT);
15443 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15445 IX86_BUILTIN_MOVSHDUP);
15446 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15448 IX86_BUILTIN_MOVSLDUP);
15449 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15450 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15452 /* Access to the vec_init patterns. */
15453 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15454 integer_type_node, NULL_TREE);
15455 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15456 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15458 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15459 short_integer_type_node,
15460 short_integer_type_node,
15461 short_integer_type_node, NULL_TREE);
15462 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15463 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15465 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15466 char_type_node, char_type_node,
15467 char_type_node, char_type_node,
15468 char_type_node, char_type_node,
15469 char_type_node, NULL_TREE);
15470 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15471 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15473 /* Access to the vec_extract patterns. */
15474 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15475 integer_type_node, NULL_TREE);
15476 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15477 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15479 ftype = build_function_type_list (long_long_integer_type_node,
15480 V2DI_type_node, integer_type_node,
15482 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15483 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15485 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15486 integer_type_node, NULL_TREE);
15487 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15488 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15490 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15491 integer_type_node, NULL_TREE);
15492 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15493 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15495 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15496 integer_type_node, NULL_TREE);
15497 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15498 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15500 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15501 integer_type_node, NULL_TREE);
15502 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15503 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15505 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15506 integer_type_node, NULL_TREE);
15507 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15508 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15510 /* Access to the vec_set patterns. */
15511 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15513 integer_type_node, NULL_TREE);
15514 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15515 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15517 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15519 integer_type_node, NULL_TREE);
15520 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15521 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15524 /* Errors in the source file can cause expand_expr to return const0_rtx
15525 where we expect a vector. To avoid crashing, use one of the vector
15526 clear instructions. */
15528 safe_vector_operand (rtx x, enum machine_mode mode)
15530 if (x == const0_rtx)
15531 x = CONST0_RTX (mode);
15535 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15538 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15541 tree arg0 = TREE_VALUE (arglist);
15542 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15543 rtx op0 = expand_normal (arg0);
15544 rtx op1 = expand_normal (arg1);
15545 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15546 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15547 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15549 if (VECTOR_MODE_P (mode0))
15550 op0 = safe_vector_operand (op0, mode0);
15551 if (VECTOR_MODE_P (mode1))
15552 op1 = safe_vector_operand (op1, mode1);
15554 if (optimize || !target
15555 || GET_MODE (target) != tmode
15556 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15557 target = gen_reg_rtx (tmode);
15559 if (GET_MODE (op1) == SImode && mode1 == TImode)
15561 rtx x = gen_reg_rtx (V4SImode);
15562 emit_insn (gen_sse2_loadd (x, op1));
15563 op1 = gen_lowpart (TImode, x);
15566 /* The insn must want input operands in the same modes as the
15568 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15569 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15571 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15572 op0 = copy_to_mode_reg (mode0, op0);
15573 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15574 op1 = copy_to_mode_reg (mode1, op1);
15576 /* ??? Using ix86_fixup_binary_operands is problematic when
15577 we've got mismatched modes. Fake it. */
15583 if (tmode == mode0 && tmode == mode1)
15585 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15589 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15591 op0 = force_reg (mode0, op0);
15592 op1 = force_reg (mode1, op1);
15593 target = gen_reg_rtx (tmode);
15596 pat = GEN_FCN (icode) (target, op0, op1);
15603 /* Subroutine of ix86_expand_builtin to take care of stores. */
15606 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15609 tree arg0 = TREE_VALUE (arglist);
15610 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15611 rtx op0 = expand_normal (arg0);
15612 rtx op1 = expand_normal (arg1);
15613 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15614 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15616 if (VECTOR_MODE_P (mode1))
15617 op1 = safe_vector_operand (op1, mode1);
15619 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15620 op1 = copy_to_mode_reg (mode1, op1);
15622 pat = GEN_FCN (icode) (op0, op1);
15628 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15631 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15632 rtx target, int do_load)
15635 tree arg0 = TREE_VALUE (arglist);
15636 rtx op0 = expand_normal (arg0);
15637 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15638 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15640 if (optimize || !target
15641 || GET_MODE (target) != tmode
15642 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15643 target = gen_reg_rtx (tmode);
15645 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15648 if (VECTOR_MODE_P (mode0))
15649 op0 = safe_vector_operand (op0, mode0);
15651 if ((optimize && !register_operand (op0, mode0))
15652 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15653 op0 = copy_to_mode_reg (mode0, op0);
15656 pat = GEN_FCN (icode) (target, op0);
15663 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15664 sqrtss, rsqrtss, rcpss. */
15667 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15670 tree arg0 = TREE_VALUE (arglist);
15671 rtx op1, op0 = expand_normal (arg0);
15672 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15673 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15675 if (optimize || !target
15676 || GET_MODE (target) != tmode
15677 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15678 target = gen_reg_rtx (tmode);
15680 if (VECTOR_MODE_P (mode0))
15681 op0 = safe_vector_operand (op0, mode0);
15683 if ((optimize && !register_operand (op0, mode0))
15684 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15685 op0 = copy_to_mode_reg (mode0, op0);
15688 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15689 op1 = copy_to_mode_reg (mode0, op1);
15691 pat = GEN_FCN (icode) (target, op0, op1);
15698 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15701 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15705 tree arg0 = TREE_VALUE (arglist);
15706 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15707 rtx op0 = expand_normal (arg0);
15708 rtx op1 = expand_normal (arg1);
15710 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15711 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15712 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15713 enum rtx_code comparison = d->comparison;
15715 if (VECTOR_MODE_P (mode0))
15716 op0 = safe_vector_operand (op0, mode0);
15717 if (VECTOR_MODE_P (mode1))
15718 op1 = safe_vector_operand (op1, mode1);
15720 /* Swap operands if we have a comparison that isn't available in
15722 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15724 rtx tmp = gen_reg_rtx (mode1);
15725 emit_move_insn (tmp, op1);
15730 if (optimize || !target
15731 || GET_MODE (target) != tmode
15732 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15733 target = gen_reg_rtx (tmode);
15735 if ((optimize && !register_operand (op0, mode0))
15736 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15737 op0 = copy_to_mode_reg (mode0, op0);
15738 if ((optimize && !register_operand (op1, mode1))
15739 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15740 op1 = copy_to_mode_reg (mode1, op1);
15742 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15743 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15750 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15753 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15757 tree arg0 = TREE_VALUE (arglist);
15758 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15759 rtx op0 = expand_normal (arg0);
15760 rtx op1 = expand_normal (arg1);
15762 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15763 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15764 enum rtx_code comparison = d->comparison;
15766 if (VECTOR_MODE_P (mode0))
15767 op0 = safe_vector_operand (op0, mode0);
15768 if (VECTOR_MODE_P (mode1))
15769 op1 = safe_vector_operand (op1, mode1);
15771 /* Swap operands if we have a comparison that isn't available in
15773 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15780 target = gen_reg_rtx (SImode);
15781 emit_move_insn (target, const0_rtx);
15782 target = gen_rtx_SUBREG (QImode, target, 0);
15784 if ((optimize && !register_operand (op0, mode0))
15785 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15786 op0 = copy_to_mode_reg (mode0, op0);
15787 if ((optimize && !register_operand (op1, mode1))
15788 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15789 op1 = copy_to_mode_reg (mode1, op1);
15791 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15792 pat = GEN_FCN (d->icode) (op0, op1);
15796 emit_insn (gen_rtx_SET (VOIDmode,
15797 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15798 gen_rtx_fmt_ee (comparison, QImode,
15802 return SUBREG_REG (target);
15805 /* Return the integer constant in ARG. Constrain it to be in the range
15806 of the subparts of VEC_TYPE; issue an error if not. */
15809 get_element_number (tree vec_type, tree arg)
15811 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15813 if (!host_integerp (arg, 1)
15814 || (elt = tree_low_cst (arg, 1), elt > max))
15816 error ("selector must be an integer constant in the range 0..%wi", max);
15823 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15824 ix86_expand_vector_init. We DO have language-level syntax for this, in
15825 the form of (type){ init-list }. Except that since we can't place emms
15826 instructions from inside the compiler, we can't allow the use of MMX
15827 registers unless the user explicitly asks for it. So we do *not* define
15828 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15829 we have builtins invoked by mmintrin.h that gives us license to emit
15830 these sorts of instructions. */
15833 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15835 enum machine_mode tmode = TYPE_MODE (type);
15836 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15837 int i, n_elt = GET_MODE_NUNITS (tmode);
15838 rtvec v = rtvec_alloc (n_elt);
15840 gcc_assert (VECTOR_MODE_P (tmode));
15842 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15844 rtx x = expand_normal (TREE_VALUE (arglist));
15845 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15848 gcc_assert (arglist == NULL);
15850 if (!target || !register_operand (target, tmode))
15851 target = gen_reg_rtx (tmode);
15853 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15857 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15858 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15859 had a language-level syntax for referencing vector elements. */
15862 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15864 enum machine_mode tmode, mode0;
15869 arg0 = TREE_VALUE (arglist);
15870 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15872 op0 = expand_normal (arg0);
15873 elt = get_element_number (TREE_TYPE (arg0), arg1);
15875 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15876 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15877 gcc_assert (VECTOR_MODE_P (mode0));
15879 op0 = force_reg (mode0, op0);
15881 if (optimize || !target || !register_operand (target, tmode))
15882 target = gen_reg_rtx (tmode);
15884 ix86_expand_vector_extract (true, target, op0, elt);
15889 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15890 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15891 a language-level syntax for referencing vector elements. */
15894 ix86_expand_vec_set_builtin (tree arglist)
15896 enum machine_mode tmode, mode1;
15897 tree arg0, arg1, arg2;
15901 arg0 = TREE_VALUE (arglist);
15902 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15903 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15905 tmode = TYPE_MODE (TREE_TYPE (arg0));
15906 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15907 gcc_assert (VECTOR_MODE_P (tmode));
15909 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15910 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15911 elt = get_element_number (TREE_TYPE (arg0), arg2);
15913 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15914 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15916 op0 = force_reg (tmode, op0);
15917 op1 = force_reg (mode1, op1);
15919 ix86_expand_vector_set (true, op0, op1, elt);
15924 /* Expand an expression EXP that calls a built-in function,
15925 with result going to TARGET if that's convenient
15926 (and in mode MODE if that's convenient).
15927 SUBTARGET may be used as the target for computing one of EXP's operands.
15928 IGNORE is nonzero if the value is to be ignored. */
15931 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15932 enum machine_mode mode ATTRIBUTE_UNUSED,
15933 int ignore ATTRIBUTE_UNUSED)
15935 const struct builtin_description *d;
15937 enum insn_code icode;
15938 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15939 tree arglist = TREE_OPERAND (exp, 1);
15940 tree arg0, arg1, arg2;
15941 rtx op0, op1, op2, pat;
15942 enum machine_mode tmode, mode0, mode1, mode2;
15943 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15947 case IX86_BUILTIN_EMMS:
15948 emit_insn (gen_mmx_emms ());
15951 case IX86_BUILTIN_SFENCE:
15952 emit_insn (gen_sse_sfence ());
15955 case IX86_BUILTIN_MASKMOVQ:
15956 case IX86_BUILTIN_MASKMOVDQU:
15957 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15958 ? CODE_FOR_mmx_maskmovq
15959 : CODE_FOR_sse2_maskmovdqu);
15960 /* Note the arg order is different from the operand order. */
15961 arg1 = TREE_VALUE (arglist);
15962 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15963 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15964 op0 = expand_normal (arg0);
15965 op1 = expand_normal (arg1);
15966 op2 = expand_normal (arg2);
15967 mode0 = insn_data[icode].operand[0].mode;
15968 mode1 = insn_data[icode].operand[1].mode;
15969 mode2 = insn_data[icode].operand[2].mode;
15971 op0 = force_reg (Pmode, op0);
15972 op0 = gen_rtx_MEM (mode1, op0);
15974 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15975 op0 = copy_to_mode_reg (mode0, op0);
15976 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15977 op1 = copy_to_mode_reg (mode1, op1);
15978 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15979 op2 = copy_to_mode_reg (mode2, op2);
15980 pat = GEN_FCN (icode) (op0, op1, op2);
15986 case IX86_BUILTIN_SQRTSS:
15987 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15988 case IX86_BUILTIN_RSQRTSS:
15989 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15990 case IX86_BUILTIN_RCPSS:
15991 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15993 case IX86_BUILTIN_LOADUPS:
15994 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15996 case IX86_BUILTIN_STOREUPS:
15997 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15999 case IX86_BUILTIN_LOADHPS:
16000 case IX86_BUILTIN_LOADLPS:
16001 case IX86_BUILTIN_LOADHPD:
16002 case IX86_BUILTIN_LOADLPD:
16003 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16004 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16005 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16006 : CODE_FOR_sse2_loadlpd);
16007 arg0 = TREE_VALUE (arglist);
16008 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16009 op0 = expand_normal (arg0);
16010 op1 = expand_normal (arg1);
16011 tmode = insn_data[icode].operand[0].mode;
16012 mode0 = insn_data[icode].operand[1].mode;
16013 mode1 = insn_data[icode].operand[2].mode;
16015 op0 = force_reg (mode0, op0);
16016 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16017 if (optimize || target == 0
16018 || GET_MODE (target) != tmode
16019 || !register_operand (target, tmode))
16020 target = gen_reg_rtx (tmode);
16021 pat = GEN_FCN (icode) (target, op0, op1);
16027 case IX86_BUILTIN_STOREHPS:
16028 case IX86_BUILTIN_STORELPS:
16029 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16030 : CODE_FOR_sse_storelps);
16031 arg0 = TREE_VALUE (arglist);
16032 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16033 op0 = expand_normal (arg0);
16034 op1 = expand_normal (arg1);
16035 mode0 = insn_data[icode].operand[0].mode;
16036 mode1 = insn_data[icode].operand[1].mode;
16038 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16039 op1 = force_reg (mode1, op1);
16041 pat = GEN_FCN (icode) (op0, op1);
16047 case IX86_BUILTIN_MOVNTPS:
16048 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16049 case IX86_BUILTIN_MOVNTQ:
16050 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16052 case IX86_BUILTIN_LDMXCSR:
16053 op0 = expand_normal (TREE_VALUE (arglist));
16054 target = assign_386_stack_local (SImode, SLOT_TEMP);
16055 emit_move_insn (target, op0);
16056 emit_insn (gen_sse_ldmxcsr (target));
16059 case IX86_BUILTIN_STMXCSR:
16060 target = assign_386_stack_local (SImode, SLOT_TEMP);
16061 emit_insn (gen_sse_stmxcsr (target));
16062 return copy_to_mode_reg (SImode, target);
16064 case IX86_BUILTIN_SHUFPS:
16065 case IX86_BUILTIN_SHUFPD:
16066 icode = (fcode == IX86_BUILTIN_SHUFPS
16067 ? CODE_FOR_sse_shufps
16068 : CODE_FOR_sse2_shufpd);
16069 arg0 = TREE_VALUE (arglist);
16070 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16071 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16072 op0 = expand_normal (arg0);
16073 op1 = expand_normal (arg1);
16074 op2 = expand_normal (arg2);
16075 tmode = insn_data[icode].operand[0].mode;
16076 mode0 = insn_data[icode].operand[1].mode;
16077 mode1 = insn_data[icode].operand[2].mode;
16078 mode2 = insn_data[icode].operand[3].mode;
16080 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16081 op0 = copy_to_mode_reg (mode0, op0);
16082 if ((optimize && !register_operand (op1, mode1))
16083 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16084 op1 = copy_to_mode_reg (mode1, op1);
16085 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16087 /* @@@ better error message */
16088 error ("mask must be an immediate");
16089 return gen_reg_rtx (tmode);
16091 if (optimize || target == 0
16092 || GET_MODE (target) != tmode
16093 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16094 target = gen_reg_rtx (tmode);
16095 pat = GEN_FCN (icode) (target, op0, op1, op2);
16101 case IX86_BUILTIN_PSHUFW:
16102 case IX86_BUILTIN_PSHUFD:
16103 case IX86_BUILTIN_PSHUFHW:
16104 case IX86_BUILTIN_PSHUFLW:
16105 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16106 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16107 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16108 : CODE_FOR_mmx_pshufw);
16109 arg0 = TREE_VALUE (arglist);
16110 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16111 op0 = expand_normal (arg0);
16112 op1 = expand_normal (arg1);
16113 tmode = insn_data[icode].operand[0].mode;
16114 mode1 = insn_data[icode].operand[1].mode;
16115 mode2 = insn_data[icode].operand[2].mode;
16117 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16118 op0 = copy_to_mode_reg (mode1, op0);
16119 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16121 /* @@@ better error message */
16122 error ("mask must be an immediate");
16126 || GET_MODE (target) != tmode
16127 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16128 target = gen_reg_rtx (tmode);
16129 pat = GEN_FCN (icode) (target, op0, op1);
16135 case IX86_BUILTIN_PSLLDQI128:
16136 case IX86_BUILTIN_PSRLDQI128:
16137 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16138 : CODE_FOR_sse2_lshrti3);
16139 arg0 = TREE_VALUE (arglist);
16140 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16141 op0 = expand_normal (arg0);
16142 op1 = expand_normal (arg1);
16143 tmode = insn_data[icode].operand[0].mode;
16144 mode1 = insn_data[icode].operand[1].mode;
16145 mode2 = insn_data[icode].operand[2].mode;
16147 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16149 op0 = copy_to_reg (op0);
16150 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16152 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16154 error ("shift must be an immediate");
16157 target = gen_reg_rtx (V2DImode);
16158 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16164 case IX86_BUILTIN_FEMMS:
16165 emit_insn (gen_mmx_femms ());
16168 case IX86_BUILTIN_PAVGUSB:
16169 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16171 case IX86_BUILTIN_PF2ID:
16172 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16174 case IX86_BUILTIN_PFACC:
16175 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16177 case IX86_BUILTIN_PFADD:
16178 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16180 case IX86_BUILTIN_PFCMPEQ:
16181 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16183 case IX86_BUILTIN_PFCMPGE:
16184 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16186 case IX86_BUILTIN_PFCMPGT:
16187 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16189 case IX86_BUILTIN_PFMAX:
16190 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16192 case IX86_BUILTIN_PFMIN:
16193 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16195 case IX86_BUILTIN_PFMUL:
16196 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16198 case IX86_BUILTIN_PFRCP:
16199 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16201 case IX86_BUILTIN_PFRCPIT1:
16202 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16204 case IX86_BUILTIN_PFRCPIT2:
16205 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16207 case IX86_BUILTIN_PFRSQIT1:
16208 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16210 case IX86_BUILTIN_PFRSQRT:
16211 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16213 case IX86_BUILTIN_PFSUB:
16214 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16216 case IX86_BUILTIN_PFSUBR:
16217 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16219 case IX86_BUILTIN_PI2FD:
16220 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16222 case IX86_BUILTIN_PMULHRW:
16223 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16225 case IX86_BUILTIN_PF2IW:
16226 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16228 case IX86_BUILTIN_PFNACC:
16229 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16231 case IX86_BUILTIN_PFPNACC:
16232 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16234 case IX86_BUILTIN_PI2FW:
16235 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16237 case IX86_BUILTIN_PSWAPDSI:
16238 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16240 case IX86_BUILTIN_PSWAPDSF:
16241 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16243 case IX86_BUILTIN_SQRTSD:
16244 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16245 case IX86_BUILTIN_LOADUPD:
16246 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16247 case IX86_BUILTIN_STOREUPD:
16248 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16250 case IX86_BUILTIN_MFENCE:
16251 emit_insn (gen_sse2_mfence ());
16253 case IX86_BUILTIN_LFENCE:
16254 emit_insn (gen_sse2_lfence ());
16257 case IX86_BUILTIN_CLFLUSH:
16258 arg0 = TREE_VALUE (arglist);
16259 op0 = expand_normal (arg0);
16260 icode = CODE_FOR_sse2_clflush;
16261 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16262 op0 = copy_to_mode_reg (Pmode, op0);
16264 emit_insn (gen_sse2_clflush (op0));
16267 case IX86_BUILTIN_MOVNTPD:
16268 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16269 case IX86_BUILTIN_MOVNTDQ:
16270 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16271 case IX86_BUILTIN_MOVNTI:
16272 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16274 case IX86_BUILTIN_LOADDQU:
16275 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16276 case IX86_BUILTIN_STOREDQU:
16277 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16279 case IX86_BUILTIN_MONITOR:
16280 arg0 = TREE_VALUE (arglist);
16281 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16282 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16283 op0 = expand_normal (arg0);
16284 op1 = expand_normal (arg1);
16285 op2 = expand_normal (arg2);
16287 op0 = copy_to_mode_reg (Pmode, op0);
16289 op1 = copy_to_mode_reg (SImode, op1);
16291 op2 = copy_to_mode_reg (SImode, op2);
16293 emit_insn (gen_sse3_monitor (op0, op1, op2));
16295 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16298 case IX86_BUILTIN_MWAIT:
16299 arg0 = TREE_VALUE (arglist);
16300 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16301 op0 = expand_normal (arg0);
16302 op1 = expand_normal (arg1);
16304 op0 = copy_to_mode_reg (SImode, op0);
16306 op1 = copy_to_mode_reg (SImode, op1);
16307 emit_insn (gen_sse3_mwait (op0, op1));
16310 case IX86_BUILTIN_LDDQU:
16311 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16314 case IX86_BUILTIN_VEC_INIT_V2SI:
16315 case IX86_BUILTIN_VEC_INIT_V4HI:
16316 case IX86_BUILTIN_VEC_INIT_V8QI:
16317 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16319 case IX86_BUILTIN_VEC_EXT_V2DF:
16320 case IX86_BUILTIN_VEC_EXT_V2DI:
16321 case IX86_BUILTIN_VEC_EXT_V4SF:
16322 case IX86_BUILTIN_VEC_EXT_V4SI:
16323 case IX86_BUILTIN_VEC_EXT_V8HI:
16324 case IX86_BUILTIN_VEC_EXT_V2SI:
16325 case IX86_BUILTIN_VEC_EXT_V4HI:
16326 return ix86_expand_vec_ext_builtin (arglist, target);
16328 case IX86_BUILTIN_VEC_SET_V8HI:
16329 case IX86_BUILTIN_VEC_SET_V4HI:
16330 return ix86_expand_vec_set_builtin (arglist);
16336 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16337 if (d->code == fcode)
16339 /* Compares are treated specially. */
16340 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16341 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16342 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16343 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16344 return ix86_expand_sse_compare (d, arglist, target);
16346 return ix86_expand_binop_builtin (d->icode, arglist, target);
16349 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16350 if (d->code == fcode)
16351 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16353 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16354 if (d->code == fcode)
16355 return ix86_expand_sse_comi (d, arglist, target);
16357 gcc_unreachable ();
16360 /* Store OPERAND to the memory after reload is completed. This means
16361 that we can't easily use assign_stack_local. */
16363 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16367 gcc_assert (reload_completed);
16368 if (TARGET_RED_ZONE)
16370 result = gen_rtx_MEM (mode,
16371 gen_rtx_PLUS (Pmode,
16373 GEN_INT (-RED_ZONE_SIZE)));
16374 emit_move_insn (result, operand);
16376 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16382 operand = gen_lowpart (DImode, operand);
16386 gen_rtx_SET (VOIDmode,
16387 gen_rtx_MEM (DImode,
16388 gen_rtx_PRE_DEC (DImode,
16389 stack_pointer_rtx)),
16393 gcc_unreachable ();
16395 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16404 split_di (&operand, 1, operands, operands + 1);
16406 gen_rtx_SET (VOIDmode,
16407 gen_rtx_MEM (SImode,
16408 gen_rtx_PRE_DEC (Pmode,
16409 stack_pointer_rtx)),
16412 gen_rtx_SET (VOIDmode,
16413 gen_rtx_MEM (SImode,
16414 gen_rtx_PRE_DEC (Pmode,
16415 stack_pointer_rtx)),
16420 /* Store HImodes as SImodes. */
16421 operand = gen_lowpart (SImode, operand);
16425 gen_rtx_SET (VOIDmode,
16426 gen_rtx_MEM (GET_MODE (operand),
16427 gen_rtx_PRE_DEC (SImode,
16428 stack_pointer_rtx)),
16432 gcc_unreachable ();
16434 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16439 /* Free operand from the memory. */
16441 ix86_free_from_memory (enum machine_mode mode)
16443 if (!TARGET_RED_ZONE)
16447 if (mode == DImode || TARGET_64BIT)
16451 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16452 to pop or add instruction if registers are available. */
16453 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16454 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16459 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16460 QImode must go into class Q_REGS.
16461 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16462 movdf to do mem-to-mem moves through integer regs. */
16464 ix86_preferred_reload_class (rtx x, enum reg_class class)
16466 enum machine_mode mode = GET_MODE (x);
16468 /* We're only allowed to return a subclass of CLASS. Many of the
16469 following checks fail for NO_REGS, so eliminate that early. */
16470 if (class == NO_REGS)
16473 /* All classes can load zeros. */
16474 if (x == CONST0_RTX (mode))
16477 /* Force constants into memory if we are loading a (nonzero) constant into
16478 an MMX or SSE register. This is because there are no MMX/SSE instructions
16479 to load from a constant. */
16481 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16484 /* Prefer SSE regs only, if we can use them for math. */
16485 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16486 return SSE_CLASS_P (class) ? class : NO_REGS;
16488 /* Floating-point constants need more complex checks. */
16489 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16491 /* General regs can load everything. */
16492 if (reg_class_subset_p (class, GENERAL_REGS))
16495 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16496 zero above. We only want to wind up preferring 80387 registers if
16497 we plan on doing computation with them. */
16499 && standard_80387_constant_p (x))
16501 /* Limit class to non-sse. */
16502 if (class == FLOAT_SSE_REGS)
16504 if (class == FP_TOP_SSE_REGS)
16506 if (class == FP_SECOND_SSE_REGS)
16507 return FP_SECOND_REG;
16508 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16515 /* Generally when we see PLUS here, it's the function invariant
16516 (plus soft-fp const_int). Which can only be computed into general
16518 if (GET_CODE (x) == PLUS)
16519 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16521 /* QImode constants are easy to load, but non-constant QImode data
16522 must go into Q_REGS. */
16523 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16525 if (reg_class_subset_p (class, Q_REGS))
16527 if (reg_class_subset_p (Q_REGS, class))
16535 /* Discourage putting floating-point values in SSE registers unless
16536 SSE math is being used, and likewise for the 387 registers. */
16538 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16540 enum machine_mode mode = GET_MODE (x);
16542 /* Restrict the output reload class to the register bank that we are doing
16543 math on. If we would like not to return a subset of CLASS, reject this
16544 alternative: if reload cannot do this, it will still use its choice. */
16545 mode = GET_MODE (x);
16546 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16547 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16549 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16551 if (class == FP_TOP_SSE_REGS)
16553 else if (class == FP_SECOND_SSE_REGS)
16554 return FP_SECOND_REG;
16556 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16562 /* If we are copying between general and FP registers, we need a memory
16563 location. The same is true for SSE and MMX registers.
16565 The macro can't work reliably when one of the CLASSES is class containing
16566 registers from multiple units (SSE, MMX, integer). We avoid this by never
16567 combining those units in single alternative in the machine description.
16568 Ensure that this constraint holds to avoid unexpected surprises.
16570 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16571 enforce these sanity checks. */
16574 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16575 enum machine_mode mode, int strict)
16577 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16578 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16579 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16580 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16581 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16582 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16584 gcc_assert (!strict);
16588 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16591 /* ??? This is a lie. We do have moves between mmx/general, and for
16592 mmx/sse2. But by saying we need secondary memory we discourage the
16593 register allocator from using the mmx registers unless needed. */
16594 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16597 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16599 /* SSE1 doesn't have any direct moves from other classes. */
16603 /* If the target says that inter-unit moves are more expensive
16604 than moving through memory, then don't generate them. */
16605 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16608 /* Between SSE and general, we have moves no larger than word size. */
16609 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16612 /* ??? For the cost of one register reformat penalty, we could use
16613 the same instructions to move SFmode and DFmode data, but the
16614 relevant move patterns don't support those alternatives. */
16615 if (mode == SFmode || mode == DFmode)
16622 /* Return true if the registers in CLASS cannot represent the change from
16623 modes FROM to TO. */
16626 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16627 enum reg_class class)
16632 /* x87 registers can't do subreg at all, as all values are reformatted
16633 to extended precision. */
16634 if (MAYBE_FLOAT_CLASS_P (class))
16637 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16639 /* Vector registers do not support QI or HImode loads. If we don't
16640 disallow a change to these modes, reload will assume it's ok to
16641 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16642 the vec_dupv4hi pattern. */
16643 if (GET_MODE_SIZE (from) < 4)
16646 /* Vector registers do not support subreg with nonzero offsets, which
16647 are otherwise valid for integer registers. Since we can't see
16648 whether we have a nonzero offset from here, prohibit all
16649 nonparadoxical subregs changing size. */
16650 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16657 /* Return the cost of moving data from a register in class CLASS1 to
16658 one in class CLASS2.
16660 It is not required that the cost always equal 2 when FROM is the same as TO;
16661 on some machines it is expensive to move between registers if they are not
16662 general registers. */
16665 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16666 enum reg_class class2)
16668 /* In case we require secondary memory, compute cost of the store followed
16669 by load. In order to avoid bad register allocation choices, we need
16670 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16672 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16676 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16677 MEMORY_MOVE_COST (mode, class1, 1));
16678 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16679 MEMORY_MOVE_COST (mode, class2, 1));
16681 /* In case of copying from general_purpose_register we may emit multiple
16682 stores followed by single load causing memory size mismatch stall.
16683 Count this as arbitrarily high cost of 20. */
16684 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16687 /* In the case of FP/MMX moves, the registers actually overlap, and we
16688 have to switch modes in order to treat them differently. */
16689 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16690 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16696 /* Moves between SSE/MMX and integer unit are expensive. */
16697 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16698 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16699 return ix86_cost->mmxsse_to_integer;
16700 if (MAYBE_FLOAT_CLASS_P (class1))
16701 return ix86_cost->fp_move;
16702 if (MAYBE_SSE_CLASS_P (class1))
16703 return ix86_cost->sse_move;
16704 if (MAYBE_MMX_CLASS_P (class1))
16705 return ix86_cost->mmx_move;
16709 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16712 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16714 /* Flags and only flags can only hold CCmode values. */
16715 if (CC_REGNO_P (regno))
16716 return GET_MODE_CLASS (mode) == MODE_CC;
16717 if (GET_MODE_CLASS (mode) == MODE_CC
16718 || GET_MODE_CLASS (mode) == MODE_RANDOM
16719 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16721 if (FP_REGNO_P (regno))
16722 return VALID_FP_MODE_P (mode);
16723 if (SSE_REGNO_P (regno))
16725 /* We implement the move patterns for all vector modes into and
16726 out of SSE registers, even when no operation instructions
16728 return (VALID_SSE_REG_MODE (mode)
16729 || VALID_SSE2_REG_MODE (mode)
16730 || VALID_MMX_REG_MODE (mode)
16731 || VALID_MMX_REG_MODE_3DNOW (mode));
16733 if (MMX_REGNO_P (regno))
16735 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16736 so if the register is available at all, then we can move data of
16737 the given mode into or out of it. */
16738 return (VALID_MMX_REG_MODE (mode)
16739 || VALID_MMX_REG_MODE_3DNOW (mode));
16742 if (mode == QImode)
16744 /* Take care for QImode values - they can be in non-QI regs,
16745 but then they do cause partial register stalls. */
16746 if (regno < 4 || TARGET_64BIT)
16748 if (!TARGET_PARTIAL_REG_STALL)
16750 return reload_in_progress || reload_completed;
16752 /* We handle both integer and floats in the general purpose registers. */
16753 else if (VALID_INT_MODE_P (mode))
16755 else if (VALID_FP_MODE_P (mode))
16757 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16758 on to use that value in smaller contexts, this can easily force a
16759 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16760 supporting DImode, allow it. */
16761 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16767 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16768 tieable integer mode. */
16771 ix86_tieable_integer_mode_p (enum machine_mode mode)
16780 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16783 return TARGET_64BIT;
16790 /* Return true if MODE1 is accessible in a register that can hold MODE2
16791 without copying. That is, all register classes that can hold MODE2
16792 can also hold MODE1. */
16795 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16797 if (mode1 == mode2)
16800 if (ix86_tieable_integer_mode_p (mode1)
16801 && ix86_tieable_integer_mode_p (mode2))
16804 /* MODE2 being XFmode implies fp stack or general regs, which means we
16805 can tie any smaller floating point modes to it. Note that we do not
16806 tie this with TFmode. */
16807 if (mode2 == XFmode)
16808 return mode1 == SFmode || mode1 == DFmode;
16810 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16811 that we can tie it with SFmode. */
16812 if (mode2 == DFmode)
16813 return mode1 == SFmode;
16815 /* If MODE2 is only appropriate for an SSE register, then tie with
16816 any other mode acceptable to SSE registers. */
16817 if (GET_MODE_SIZE (mode2) >= 8
16818 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16819 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16821 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16822 with any other mode acceptable to MMX registers. */
16823 if (GET_MODE_SIZE (mode2) == 8
16824 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16825 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16830 /* Return the cost of moving data of mode M between a
16831 register and memory. A value of 2 is the default; this cost is
16832 relative to those in `REGISTER_MOVE_COST'.
16834 If moving between registers and memory is more expensive than
16835 between two registers, you should define this macro to express the
16838 Model also increased moving costs of QImode registers in non
16842 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16844 if (FLOAT_CLASS_P (class))
16861 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16863 if (SSE_CLASS_P (class))
16866 switch (GET_MODE_SIZE (mode))
16880 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16882 if (MMX_CLASS_P (class))
16885 switch (GET_MODE_SIZE (mode))
16896 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16898 switch (GET_MODE_SIZE (mode))
16902 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16903 : ix86_cost->movzbl_load);
16905 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16906 : ix86_cost->int_store[0] + 4);
16909 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16911 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16912 if (mode == TFmode)
16914 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16915 * (((int) GET_MODE_SIZE (mode)
16916 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16920 /* Compute a (partial) cost for rtx X. Return true if the complete
16921 cost has been computed, and false if subexpressions should be
16922 scanned. In either case, *TOTAL contains the cost result. */
16925 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16927 enum machine_mode mode = GET_MODE (x);
16935 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16937 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16939 else if (flag_pic && SYMBOLIC_CONST (x)
16941 || (!GET_CODE (x) != LABEL_REF
16942 && (GET_CODE (x) != SYMBOL_REF
16943 || !SYMBOL_REF_LOCAL_P (x)))))
16950 if (mode == VOIDmode)
16953 switch (standard_80387_constant_p (x))
16958 default: /* Other constants */
16963 /* Start with (MEM (SYMBOL_REF)), since that's where
16964 it'll probably end up. Add a penalty for size. */
16965 *total = (COSTS_N_INSNS (1)
16966 + (flag_pic != 0 && !TARGET_64BIT)
16967 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16973 /* The zero extensions is often completely free on x86_64, so make
16974 it as cheap as possible. */
16975 if (TARGET_64BIT && mode == DImode
16976 && GET_MODE (XEXP (x, 0)) == SImode)
16978 else if (TARGET_ZERO_EXTEND_WITH_AND)
16979 *total = ix86_cost->add;
16981 *total = ix86_cost->movzx;
16985 *total = ix86_cost->movsx;
16989 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16990 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16992 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16995 *total = ix86_cost->add;
16998 if ((value == 2 || value == 3)
16999 && ix86_cost->lea <= ix86_cost->shift_const)
17001 *total = ix86_cost->lea;
17011 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17013 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17015 if (INTVAL (XEXP (x, 1)) > 32)
17016 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17018 *total = ix86_cost->shift_const * 2;
17022 if (GET_CODE (XEXP (x, 1)) == AND)
17023 *total = ix86_cost->shift_var * 2;
17025 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17030 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17031 *total = ix86_cost->shift_const;
17033 *total = ix86_cost->shift_var;
17038 if (FLOAT_MODE_P (mode))
17040 *total = ix86_cost->fmul;
17045 rtx op0 = XEXP (x, 0);
17046 rtx op1 = XEXP (x, 1);
17048 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17050 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17051 for (nbits = 0; value != 0; value &= value - 1)
17055 /* This is arbitrary. */
17058 /* Compute costs correctly for widening multiplication. */
17059 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17060 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17061 == GET_MODE_SIZE (mode))
17063 int is_mulwiden = 0;
17064 enum machine_mode inner_mode = GET_MODE (op0);
17066 if (GET_CODE (op0) == GET_CODE (op1))
17067 is_mulwiden = 1, op1 = XEXP (op1, 0);
17068 else if (GET_CODE (op1) == CONST_INT)
17070 if (GET_CODE (op0) == SIGN_EXTEND)
17071 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17074 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17078 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17081 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17082 + nbits * ix86_cost->mult_bit
17083 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17092 if (FLOAT_MODE_P (mode))
17093 *total = ix86_cost->fdiv;
17095 *total = ix86_cost->divide[MODE_INDEX (mode)];
17099 if (FLOAT_MODE_P (mode))
17100 *total = ix86_cost->fadd;
17101 else if (GET_MODE_CLASS (mode) == MODE_INT
17102 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17104 if (GET_CODE (XEXP (x, 0)) == PLUS
17105 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17106 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17107 && CONSTANT_P (XEXP (x, 1)))
17109 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17110 if (val == 2 || val == 4 || val == 8)
17112 *total = ix86_cost->lea;
17113 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17114 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17116 *total += rtx_cost (XEXP (x, 1), outer_code);
17120 else if (GET_CODE (XEXP (x, 0)) == MULT
17121 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17123 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17124 if (val == 2 || val == 4 || val == 8)
17126 *total = ix86_cost->lea;
17127 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17128 *total += rtx_cost (XEXP (x, 1), outer_code);
17132 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17134 *total = ix86_cost->lea;
17135 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17136 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17137 *total += rtx_cost (XEXP (x, 1), outer_code);
17144 if (FLOAT_MODE_P (mode))
17146 *total = ix86_cost->fadd;
17154 if (!TARGET_64BIT && mode == DImode)
17156 *total = (ix86_cost->add * 2
17157 + (rtx_cost (XEXP (x, 0), outer_code)
17158 << (GET_MODE (XEXP (x, 0)) != DImode))
17159 + (rtx_cost (XEXP (x, 1), outer_code)
17160 << (GET_MODE (XEXP (x, 1)) != DImode)));
17166 if (FLOAT_MODE_P (mode))
17168 *total = ix86_cost->fchs;
17174 if (!TARGET_64BIT && mode == DImode)
17175 *total = ix86_cost->add * 2;
17177 *total = ix86_cost->add;
17181 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17182 && XEXP (XEXP (x, 0), 1) == const1_rtx
17183 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17184 && XEXP (x, 1) == const0_rtx)
17186 /* This kind of construct is implemented using test[bwl].
17187 Treat it as if we had an AND. */
17188 *total = (ix86_cost->add
17189 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17190 + rtx_cost (const1_rtx, outer_code));
17196 if (!TARGET_SSE_MATH
17198 || (mode == DFmode && !TARGET_SSE2))
17199 /* For standard 80387 constants, raise the cost to prevent
17200 compress_float_constant() to generate load from memory. */
17201 switch (standard_80387_constant_p (XEXP (x, 0)))
17211 *total = (x86_ext_80387_constants & TUNEMASK
17218 if (FLOAT_MODE_P (mode))
17219 *total = ix86_cost->fabs;
17223 if (FLOAT_MODE_P (mode))
17224 *total = ix86_cost->fsqrt;
17228 if (XINT (x, 1) == UNSPEC_TP)
17239 static int current_machopic_label_num;
17241 /* Given a symbol name and its associated stub, write out the
17242 definition of the stub. */
17245 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17247 unsigned int length;
17248 char *binder_name, *symbol_name, lazy_ptr_name[32];
17249 int label = ++current_machopic_label_num;
17251 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17252 symb = (*targetm.strip_name_encoding) (symb);
17254 length = strlen (stub);
17255 binder_name = alloca (length + 32);
17256 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17258 length = strlen (symb);
17259 symbol_name = alloca (length + 32);
17260 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17262 sprintf (lazy_ptr_name, "L%d$lz", label);
17265 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17267 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17269 fprintf (file, "%s:\n", stub);
17270 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17274 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17275 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17276 fprintf (file, "\tjmp\t*%%edx\n");
17279 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17281 fprintf (file, "%s:\n", binder_name);
17285 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17286 fprintf (file, "\tpushl\t%%eax\n");
17289 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17291 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17293 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17294 fprintf (file, "%s:\n", lazy_ptr_name);
17295 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17296 fprintf (file, "\t.long %s\n", binder_name);
17300 darwin_x86_file_end (void)
17302 darwin_file_end ();
17305 #endif /* TARGET_MACHO */
17307 /* Order the registers for register allocator. */
17310 x86_order_regs_for_local_alloc (void)
17315 /* First allocate the local general purpose registers. */
17316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17317 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17318 reg_alloc_order [pos++] = i;
17320 /* Global general purpose registers. */
17321 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17322 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17323 reg_alloc_order [pos++] = i;
17325 /* x87 registers come first in case we are doing FP math
17327 if (!TARGET_SSE_MATH)
17328 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17329 reg_alloc_order [pos++] = i;
17331 /* SSE registers. */
17332 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17333 reg_alloc_order [pos++] = i;
17334 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17335 reg_alloc_order [pos++] = i;
17337 /* x87 registers. */
17338 if (TARGET_SSE_MATH)
17339 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17340 reg_alloc_order [pos++] = i;
17342 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17343 reg_alloc_order [pos++] = i;
17345 /* Initialize the rest of array as we do not allocate some registers
17347 while (pos < FIRST_PSEUDO_REGISTER)
17348 reg_alloc_order [pos++] = 0;
17351 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17352 struct attribute_spec.handler. */
17354 ix86_handle_struct_attribute (tree *node, tree name,
17355 tree args ATTRIBUTE_UNUSED,
17356 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17359 if (DECL_P (*node))
17361 if (TREE_CODE (*node) == TYPE_DECL)
17362 type = &TREE_TYPE (*node);
17367 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17368 || TREE_CODE (*type) == UNION_TYPE)))
17370 warning (OPT_Wattributes, "%qs attribute ignored",
17371 IDENTIFIER_POINTER (name));
17372 *no_add_attrs = true;
17375 else if ((is_attribute_p ("ms_struct", name)
17376 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17377 || ((is_attribute_p ("gcc_struct", name)
17378 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17380 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17381 IDENTIFIER_POINTER (name));
17382 *no_add_attrs = true;
17389 ix86_ms_bitfield_layout_p (tree record_type)
17391 return (TARGET_MS_BITFIELD_LAYOUT &&
17392 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17393 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17396 /* Returns an expression indicating where the this parameter is
17397 located on entry to the FUNCTION. */
17400 x86_this_parameter (tree function)
17402 tree type = TREE_TYPE (function);
17406 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17407 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17410 if (ix86_function_regparm (type, function) > 0)
17414 parm = TYPE_ARG_TYPES (type);
17415 /* Figure out whether or not the function has a variable number of
17417 for (; parm; parm = TREE_CHAIN (parm))
17418 if (TREE_VALUE (parm) == void_type_node)
17420 /* If not, the this parameter is in the first argument. */
17424 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17426 return gen_rtx_REG (SImode, regno);
17430 if (aggregate_value_p (TREE_TYPE (type), type))
17431 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17433 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17436 /* Determine whether x86_output_mi_thunk can succeed. */
17439 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17440 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17441 HOST_WIDE_INT vcall_offset, tree function)
17443 /* 64-bit can handle anything. */
17447 /* For 32-bit, everything's fine if we have one free register. */
17448 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17451 /* Need a free register for vcall_offset. */
17455 /* Need a free register for GOT references. */
17456 if (flag_pic && !(*targetm.binds_local_p) (function))
17459 /* Otherwise ok. */
17463 /* Output the assembler code for a thunk function. THUNK_DECL is the
17464 declaration for the thunk function itself, FUNCTION is the decl for
17465 the target function. DELTA is an immediate constant offset to be
17466 added to THIS. If VCALL_OFFSET is nonzero, the word at
17467 *(*this + vcall_offset) should be added to THIS. */
17470 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17471 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17472 HOST_WIDE_INT vcall_offset, tree function)
17475 rtx this = x86_this_parameter (function);
17478 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17479 pull it in now and let DELTA benefit. */
17482 else if (vcall_offset)
17484 /* Put the this parameter into %eax. */
17486 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17487 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17490 this_reg = NULL_RTX;
17492 /* Adjust the this parameter by a fixed constant. */
17495 xops[0] = GEN_INT (delta);
17496 xops[1] = this_reg ? this_reg : this;
17499 if (!x86_64_general_operand (xops[0], DImode))
17501 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17503 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17507 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17510 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17513 /* Adjust the this parameter by a value stored in the vtable. */
17517 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17520 int tmp_regno = 2 /* ECX */;
17521 if (lookup_attribute ("fastcall",
17522 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17523 tmp_regno = 0 /* EAX */;
17524 tmp = gen_rtx_REG (SImode, tmp_regno);
17527 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17530 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17532 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17534 /* Adjust the this parameter. */
17535 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17536 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17538 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17539 xops[0] = GEN_INT (vcall_offset);
17541 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17542 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17544 xops[1] = this_reg;
17546 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17548 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17551 /* If necessary, drop THIS back to its stack slot. */
17552 if (this_reg && this_reg != this)
17554 xops[0] = this_reg;
17556 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17559 xops[0] = XEXP (DECL_RTL (function), 0);
17562 if (!flag_pic || (*targetm.binds_local_p) (function))
17563 output_asm_insn ("jmp\t%P0", xops);
17566 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17567 tmp = gen_rtx_CONST (Pmode, tmp);
17568 tmp = gen_rtx_MEM (QImode, tmp);
17570 output_asm_insn ("jmp\t%A0", xops);
17575 if (!flag_pic || (*targetm.binds_local_p) (function))
17576 output_asm_insn ("jmp\t%P0", xops);
17581 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17582 tmp = (gen_rtx_SYMBOL_REF
17584 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17585 tmp = gen_rtx_MEM (QImode, tmp);
17587 output_asm_insn ("jmp\t%0", xops);
17590 #endif /* TARGET_MACHO */
17592 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17593 output_set_got (tmp, NULL_RTX);
17596 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17597 output_asm_insn ("jmp\t{*}%1", xops);
17603 x86_file_start (void)
17605 default_file_start ();
17607 darwin_file_start ();
17609 if (X86_FILE_START_VERSION_DIRECTIVE)
17610 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17611 if (X86_FILE_START_FLTUSED)
17612 fputs ("\t.global\t__fltused\n", asm_out_file);
17613 if (ix86_asm_dialect == ASM_INTEL)
17614 fputs ("\t.intel_syntax\n", asm_out_file);
17618 x86_field_alignment (tree field, int computed)
17620 enum machine_mode mode;
17621 tree type = TREE_TYPE (field);
17623 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17625 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17626 ? get_inner_array_type (type) : type);
17627 if (mode == DFmode || mode == DCmode
17628 || GET_MODE_CLASS (mode) == MODE_INT
17629 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17630 return MIN (32, computed);
17634 /* Output assembler code to FILE to increment profiler label # LABELNO
17635 for profiling a function entry. */
17637 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17642 #ifndef NO_PROFILE_COUNTERS
17643 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17645 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17649 #ifndef NO_PROFILE_COUNTERS
17650 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17652 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17656 #ifndef NO_PROFILE_COUNTERS
17657 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17658 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17660 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17664 #ifndef NO_PROFILE_COUNTERS
17665 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17666 PROFILE_COUNT_REGISTER);
17668 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17672 /* We don't have exact information about the insn sizes, but we may assume
17673 quite safely that we are informed about all 1 byte insns and memory
17674 address sizes. This is enough to eliminate unnecessary padding in
17678 min_insn_size (rtx insn)
17682 if (!INSN_P (insn) || !active_insn_p (insn))
17685 /* Discard alignments we've emit and jump instructions. */
17686 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17687 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17689 if (GET_CODE (insn) == JUMP_INSN
17690 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17691 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17694 /* Important case - calls are always 5 bytes.
17695 It is common to have many calls in the row. */
17696 if (GET_CODE (insn) == CALL_INSN
17697 && symbolic_reference_mentioned_p (PATTERN (insn))
17698 && !SIBLING_CALL_P (insn))
17700 if (get_attr_length (insn) <= 1)
17703 /* For normal instructions we may rely on the sizes of addresses
17704 and the presence of symbol to require 4 bytes of encoding.
17705 This is not the case for jumps where references are PC relative. */
17706 if (GET_CODE (insn) != JUMP_INSN)
17708 l = get_attr_length_address (insn);
17709 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17718 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17722 ix86_avoid_jump_misspredicts (void)
17724 rtx insn, start = get_insns ();
17725 int nbytes = 0, njumps = 0;
17728 /* Look for all minimal intervals of instructions containing 4 jumps.
17729 The intervals are bounded by START and INSN. NBYTES is the total
17730 size of instructions in the interval including INSN and not including
17731 START. When the NBYTES is smaller than 16 bytes, it is possible
17732 that the end of START and INSN ends up in the same 16byte page.
17734 The smallest offset in the page INSN can start is the case where START
17735 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17736 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17738 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17741 nbytes += min_insn_size (insn);
17743 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17744 INSN_UID (insn), min_insn_size (insn));
17745 if ((GET_CODE (insn) == JUMP_INSN
17746 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17747 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17748 || GET_CODE (insn) == CALL_INSN)
17755 start = NEXT_INSN (start);
17756 if ((GET_CODE (start) == JUMP_INSN
17757 && GET_CODE (PATTERN (start)) != ADDR_VEC
17758 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17759 || GET_CODE (start) == CALL_INSN)
17760 njumps--, isjump = 1;
17763 nbytes -= min_insn_size (start);
17765 gcc_assert (njumps >= 0);
17767 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17768 INSN_UID (start), INSN_UID (insn), nbytes);
17770 if (njumps == 3 && isjump && nbytes < 16)
17772 int padsize = 15 - nbytes + min_insn_size (insn);
17775 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17776 INSN_UID (insn), padsize);
17777 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17782 /* AMD Athlon works faster
17783 when RET is not destination of conditional jump or directly preceded
17784 by other jump instruction. We avoid the penalty by inserting NOP just
17785 before the RET instructions in such cases. */
17787 ix86_pad_returns (void)
17792 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17794 basic_block bb = e->src;
17795 rtx ret = BB_END (bb);
17797 bool replace = false;
17799 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17800 || !maybe_hot_bb_p (bb))
17802 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17803 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17805 if (prev && GET_CODE (prev) == CODE_LABEL)
17810 FOR_EACH_EDGE (e, ei, bb->preds)
17811 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17812 && !(e->flags & EDGE_FALLTHRU))
17817 prev = prev_active_insn (ret);
17819 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17820 || GET_CODE (prev) == CALL_INSN))
17822 /* Empty functions get branch mispredict even when the jump destination
17823 is not visible to us. */
17824 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17829 emit_insn_before (gen_return_internal_long (), ret);
17835 /* Implement machine specific optimizations. We implement padding of returns
17836 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17840 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17841 ix86_pad_returns ();
17842 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17843 ix86_avoid_jump_misspredicts ();
17846 /* Return nonzero when QImode register that must be represented via REX prefix
17849 x86_extended_QIreg_mentioned_p (rtx insn)
17852 extract_insn_cached (insn);
17853 for (i = 0; i < recog_data.n_operands; i++)
17854 if (REG_P (recog_data.operand[i])
17855 && REGNO (recog_data.operand[i]) >= 4)
17860 /* Return nonzero when P points to register encoded via REX prefix.
17861 Called via for_each_rtx. */
17863 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17865 unsigned int regno;
17868 regno = REGNO (*p);
17869 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17872 /* Return true when INSN mentions register that must be encoded using REX
17875 x86_extended_reg_mentioned_p (rtx insn)
17877 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17880 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17881 optabs would emit if we didn't have TFmode patterns. */
17884 x86_emit_floatuns (rtx operands[2])
17886 rtx neglab, donelab, i0, i1, f0, in, out;
17887 enum machine_mode mode, inmode;
17889 inmode = GET_MODE (operands[1]);
17890 gcc_assert (inmode == SImode || inmode == DImode);
17893 in = force_reg (inmode, operands[1]);
17894 mode = GET_MODE (out);
17895 neglab = gen_label_rtx ();
17896 donelab = gen_label_rtx ();
17897 i1 = gen_reg_rtx (Pmode);
17898 f0 = gen_reg_rtx (mode);
17900 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17902 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17903 emit_jump_insn (gen_jump (donelab));
17906 emit_label (neglab);
17908 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17909 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17910 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17911 expand_float (f0, i0, 0);
17912 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17914 emit_label (donelab);
17917 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17918 with all elements equal to VAR. Return true if successful. */
17921 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17922 rtx target, rtx val)
17924 enum machine_mode smode, wsmode, wvmode;
17939 val = force_reg (GET_MODE_INNER (mode), val);
17940 x = gen_rtx_VEC_DUPLICATE (mode, val);
17941 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17947 if (TARGET_SSE || TARGET_3DNOW_A)
17949 val = gen_lowpart (SImode, val);
17950 x = gen_rtx_TRUNCATE (HImode, val);
17951 x = gen_rtx_VEC_DUPLICATE (mode, x);
17952 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17974 /* Extend HImode to SImode using a paradoxical SUBREG. */
17975 tmp1 = gen_reg_rtx (SImode);
17976 emit_move_insn (tmp1, gen_lowpart (SImode, val));
17977 /* Insert the SImode value as low element of V4SImode vector. */
17978 tmp2 = gen_reg_rtx (V4SImode);
17979 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
17980 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
17981 CONST0_RTX (V4SImode),
17983 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
17984 /* Cast the V4SImode vector back to a V8HImode vector. */
17985 tmp1 = gen_reg_rtx (V8HImode);
17986 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
17987 /* Duplicate the low short through the whole low SImode word. */
17988 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
17989 /* Cast the V8HImode vector back to a V4SImode vector. */
17990 tmp2 = gen_reg_rtx (V4SImode);
17991 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
17992 /* Replicate the low element of the V4SImode vector. */
17993 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
17994 /* Cast the V2SImode back to V8HImode, and store in target. */
17995 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18006 /* Extend QImode to SImode using a paradoxical SUBREG. */
18007 tmp1 = gen_reg_rtx (SImode);
18008 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18009 /* Insert the SImode value as low element of V4SImode vector. */
18010 tmp2 = gen_reg_rtx (V4SImode);
18011 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18012 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18013 CONST0_RTX (V4SImode),
18015 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18016 /* Cast the V4SImode vector back to a V16QImode vector. */
18017 tmp1 = gen_reg_rtx (V16QImode);
18018 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18019 /* Duplicate the low byte through the whole low SImode word. */
18020 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18021 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18022 /* Cast the V16QImode vector back to a V4SImode vector. */
18023 tmp2 = gen_reg_rtx (V4SImode);
18024 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18025 /* Replicate the low element of the V4SImode vector. */
18026 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18027 /* Cast the V2SImode back to V16QImode, and store in target. */
18028 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18036 /* Replicate the value once into the next wider mode and recurse. */
18037 val = convert_modes (wsmode, smode, val, true);
18038 x = expand_simple_binop (wsmode, ASHIFT, val,
18039 GEN_INT (GET_MODE_BITSIZE (smode)),
18040 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18041 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18043 x = gen_reg_rtx (wvmode);
18044 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18045 gcc_unreachable ();
18046 emit_move_insn (target, gen_lowpart (mode, x));
18054 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18055 whose ONE_VAR element is VAR, and other elements are zero. Return true
18059 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18060 rtx target, rtx var, int one_var)
18062 enum machine_mode vsimode;
18078 var = force_reg (GET_MODE_INNER (mode), var);
18079 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18080 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18085 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18086 new_target = gen_reg_rtx (mode);
18088 new_target = target;
18089 var = force_reg (GET_MODE_INNER (mode), var);
18090 x = gen_rtx_VEC_DUPLICATE (mode, var);
18091 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18092 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18095 /* We need to shuffle the value to the correct position, so
18096 create a new pseudo to store the intermediate result. */
18098 /* With SSE2, we can use the integer shuffle insns. */
18099 if (mode != V4SFmode && TARGET_SSE2)
18101 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18103 GEN_INT (one_var == 1 ? 0 : 1),
18104 GEN_INT (one_var == 2 ? 0 : 1),
18105 GEN_INT (one_var == 3 ? 0 : 1)));
18106 if (target != new_target)
18107 emit_move_insn (target, new_target);
18111 /* Otherwise convert the intermediate result to V4SFmode and
18112 use the SSE1 shuffle instructions. */
18113 if (mode != V4SFmode)
18115 tmp = gen_reg_rtx (V4SFmode);
18116 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18121 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18123 GEN_INT (one_var == 1 ? 0 : 1),
18124 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18125 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18127 if (mode != V4SFmode)
18128 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18129 else if (tmp != target)
18130 emit_move_insn (target, tmp);
18132 else if (target != new_target)
18133 emit_move_insn (target, new_target);
18138 vsimode = V4SImode;
18144 vsimode = V2SImode;
18150 /* Zero extend the variable element to SImode and recurse. */
18151 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18153 x = gen_reg_rtx (vsimode);
18154 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18156 gcc_unreachable ();
18158 emit_move_insn (target, gen_lowpart (mode, x));
18166 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18167 consisting of the values in VALS. It is known that all elements
18168 except ONE_VAR are constants. Return true if successful. */
18171 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18172 rtx target, rtx vals, int one_var)
18174 rtx var = XVECEXP (vals, 0, one_var);
18175 enum machine_mode wmode;
18178 const_vec = copy_rtx (vals);
18179 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18180 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18188 /* For the two element vectors, it's just as easy to use
18189 the general case. */
18205 /* There's no way to set one QImode entry easily. Combine
18206 the variable value with its adjacent constant value, and
18207 promote to an HImode set. */
18208 x = XVECEXP (vals, 0, one_var ^ 1);
18211 var = convert_modes (HImode, QImode, var, true);
18212 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18213 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18214 x = GEN_INT (INTVAL (x) & 0xff);
18218 var = convert_modes (HImode, QImode, var, true);
18219 x = gen_int_mode (INTVAL (x) << 8, HImode);
18221 if (x != const0_rtx)
18222 var = expand_simple_binop (HImode, IOR, var, x, var,
18223 1, OPTAB_LIB_WIDEN);
18225 x = gen_reg_rtx (wmode);
18226 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18227 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18229 emit_move_insn (target, gen_lowpart (mode, x));
18236 emit_move_insn (target, const_vec);
18237 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18241 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18242 all values variable, and none identical. */
18245 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18246 rtx target, rtx vals)
18248 enum machine_mode half_mode = GET_MODE_INNER (mode);
18249 rtx op0 = NULL, op1 = NULL;
18250 bool use_vec_concat = false;
18256 if (!mmx_ok && !TARGET_SSE)
18262 /* For the two element vectors, we always implement VEC_CONCAT. */
18263 op0 = XVECEXP (vals, 0, 0);
18264 op1 = XVECEXP (vals, 0, 1);
18265 use_vec_concat = true;
18269 half_mode = V2SFmode;
18272 half_mode = V2SImode;
18278 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18279 Recurse to load the two halves. */
18281 op0 = gen_reg_rtx (half_mode);
18282 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18283 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18285 op1 = gen_reg_rtx (half_mode);
18286 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18287 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18289 use_vec_concat = true;
18300 gcc_unreachable ();
18303 if (use_vec_concat)
18305 if (!register_operand (op0, half_mode))
18306 op0 = force_reg (half_mode, op0);
18307 if (!register_operand (op1, half_mode))
18308 op1 = force_reg (half_mode, op1);
18310 emit_insn (gen_rtx_SET (VOIDmode, target,
18311 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18315 int i, j, n_elts, n_words, n_elt_per_word;
18316 enum machine_mode inner_mode;
18317 rtx words[4], shift;
18319 inner_mode = GET_MODE_INNER (mode);
18320 n_elts = GET_MODE_NUNITS (mode);
18321 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18322 n_elt_per_word = n_elts / n_words;
18323 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18325 for (i = 0; i < n_words; ++i)
18327 rtx word = NULL_RTX;
18329 for (j = 0; j < n_elt_per_word; ++j)
18331 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18332 elt = convert_modes (word_mode, inner_mode, elt, true);
18338 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18339 word, 1, OPTAB_LIB_WIDEN);
18340 word = expand_simple_binop (word_mode, IOR, word, elt,
18341 word, 1, OPTAB_LIB_WIDEN);
18349 emit_move_insn (target, gen_lowpart (mode, words[0]));
18350 else if (n_words == 2)
18352 rtx tmp = gen_reg_rtx (mode);
18353 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18354 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18355 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18356 emit_move_insn (target, tmp);
18358 else if (n_words == 4)
18360 rtx tmp = gen_reg_rtx (V4SImode);
18361 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18362 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18363 emit_move_insn (target, gen_lowpart (mode, tmp));
18366 gcc_unreachable ();
18370 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18371 instructions unless MMX_OK is true. */
18374 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18376 enum machine_mode mode = GET_MODE (target);
18377 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18378 int n_elts = GET_MODE_NUNITS (mode);
18379 int n_var = 0, one_var = -1;
18380 bool all_same = true, all_const_zero = true;
18384 for (i = 0; i < n_elts; ++i)
18386 x = XVECEXP (vals, 0, i);
18387 if (!CONSTANT_P (x))
18388 n_var++, one_var = i;
18389 else if (x != CONST0_RTX (inner_mode))
18390 all_const_zero = false;
18391 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18395 /* Constants are best loaded from the constant pool. */
18398 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18402 /* If all values are identical, broadcast the value. */
18404 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18405 XVECEXP (vals, 0, 0)))
18408 /* Values where only one field is non-constant are best loaded from
18409 the pool and overwritten via move later. */
18413 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18414 XVECEXP (vals, 0, one_var),
18418 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18422 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18426 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18428 enum machine_mode mode = GET_MODE (target);
18429 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18430 bool use_vec_merge = false;
18439 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18440 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18442 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18444 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18445 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18455 /* For the two element vectors, we implement a VEC_CONCAT with
18456 the extraction of the other element. */
18458 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18459 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18462 op0 = val, op1 = tmp;
18464 op0 = tmp, op1 = val;
18466 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18467 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18475 use_vec_merge = true;
18479 /* tmp = target = A B C D */
18480 tmp = copy_to_reg (target);
18481 /* target = A A B B */
18482 emit_insn (gen_sse_unpcklps (target, target, target));
18483 /* target = X A B B */
18484 ix86_expand_vector_set (false, target, val, 0);
18485 /* target = A X C D */
18486 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18487 GEN_INT (1), GEN_INT (0),
18488 GEN_INT (2+4), GEN_INT (3+4)));
18492 /* tmp = target = A B C D */
18493 tmp = copy_to_reg (target);
18494 /* tmp = X B C D */
18495 ix86_expand_vector_set (false, tmp, val, 0);
18496 /* target = A B X D */
18497 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18498 GEN_INT (0), GEN_INT (1),
18499 GEN_INT (0+4), GEN_INT (3+4)));
18503 /* tmp = target = A B C D */
18504 tmp = copy_to_reg (target);
18505 /* tmp = X B C D */
18506 ix86_expand_vector_set (false, tmp, val, 0);
18507 /* target = A B X D */
18508 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18509 GEN_INT (0), GEN_INT (1),
18510 GEN_INT (2+4), GEN_INT (0+4)));
18514 gcc_unreachable ();
18519 /* Element 0 handled by vec_merge below. */
18522 use_vec_merge = true;
18528 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18529 store into element 0, then shuffle them back. */
18533 order[0] = GEN_INT (elt);
18534 order[1] = const1_rtx;
18535 order[2] = const2_rtx;
18536 order[3] = GEN_INT (3);
18537 order[elt] = const0_rtx;
18539 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18540 order[1], order[2], order[3]));
18542 ix86_expand_vector_set (false, target, val, 0);
18544 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18545 order[1], order[2], order[3]));
18549 /* For SSE1, we have to reuse the V4SF code. */
18550 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18551 gen_lowpart (SFmode, val), elt);
18556 use_vec_merge = TARGET_SSE2;
18559 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18570 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18571 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18572 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18576 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18578 emit_move_insn (mem, target);
18580 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18581 emit_move_insn (tmp, val);
18583 emit_move_insn (target, mem);
18588 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18590 enum machine_mode mode = GET_MODE (vec);
18591 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18592 bool use_vec_extr = false;
18605 use_vec_extr = true;
18617 tmp = gen_reg_rtx (mode);
18618 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18619 GEN_INT (elt), GEN_INT (elt),
18620 GEN_INT (elt+4), GEN_INT (elt+4)));
18624 tmp = gen_reg_rtx (mode);
18625 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18629 gcc_unreachable ();
18632 use_vec_extr = true;
18647 tmp = gen_reg_rtx (mode);
18648 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18649 GEN_INT (elt), GEN_INT (elt),
18650 GEN_INT (elt), GEN_INT (elt)));
18654 tmp = gen_reg_rtx (mode);
18655 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18659 gcc_unreachable ();
18662 use_vec_extr = true;
18667 /* For SSE1, we have to reuse the V4SF code. */
18668 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18669 gen_lowpart (V4SFmode, vec), elt);
18675 use_vec_extr = TARGET_SSE2;
18678 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18683 /* ??? Could extract the appropriate HImode element and shift. */
18690 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18691 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18693 /* Let the rtl optimizers know about the zero extension performed. */
18694 if (inner_mode == HImode)
18696 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18697 target = gen_lowpart (SImode, target);
18700 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18704 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18706 emit_move_insn (mem, vec);
18708 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18709 emit_move_insn (target, tmp);
18713 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18714 pattern to reduce; DEST is the destination; IN is the input vector. */
18717 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18719 rtx tmp1, tmp2, tmp3;
18721 tmp1 = gen_reg_rtx (V4SFmode);
18722 tmp2 = gen_reg_rtx (V4SFmode);
18723 tmp3 = gen_reg_rtx (V4SFmode);
18725 emit_insn (gen_sse_movhlps (tmp1, in, in));
18726 emit_insn (fn (tmp2, tmp1, in));
18728 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18729 GEN_INT (1), GEN_INT (1),
18730 GEN_INT (1+4), GEN_INT (1+4)));
18731 emit_insn (fn (dest, tmp2, tmp3));
18734 /* Target hook for scalar_mode_supported_p. */
18736 ix86_scalar_mode_supported_p (enum machine_mode mode)
18738 if (DECIMAL_FLOAT_MODE_P (mode))
18741 return default_scalar_mode_supported_p (mode);
18744 /* Implements target hook vector_mode_supported_p. */
18746 ix86_vector_mode_supported_p (enum machine_mode mode)
18748 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18750 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18752 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18754 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18759 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18761 We do this in the new i386 backend to maintain source compatibility
18762 with the old cc0-based compiler. */
18765 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18766 tree inputs ATTRIBUTE_UNUSED,
18769 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18771 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18773 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18778 /* Return true if this goes in small data/bss. */
18781 ix86_in_large_data_p (tree exp)
18783 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18786 /* Functions are never large data. */
18787 if (TREE_CODE (exp) == FUNCTION_DECL)
18790 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18792 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18793 if (strcmp (section, ".ldata") == 0
18794 || strcmp (section, ".lbss") == 0)
18800 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18802 /* If this is an incomplete type with size 0, then we can't put it
18803 in data because it might be too big when completed. */
18804 if (!size || size > ix86_section_threshold)
18811 ix86_encode_section_info (tree decl, rtx rtl, int first)
18813 default_encode_section_info (decl, rtl, first);
18815 if (TREE_CODE (decl) == VAR_DECL
18816 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18817 && ix86_in_large_data_p (decl))
18818 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18821 /* Worker function for REVERSE_CONDITION. */
18824 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18826 return (mode != CCFPmode && mode != CCFPUmode
18827 ? reverse_condition (code)
18828 : reverse_condition_maybe_unordered (code));
18831 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18835 output_387_reg_move (rtx insn, rtx *operands)
18837 if (REG_P (operands[1])
18838 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18840 if (REGNO (operands[0]) == FIRST_STACK_REG)
18841 return output_387_ffreep (operands, 0);
18842 return "fstp\t%y0";
18844 if (STACK_TOP_P (operands[0]))
18845 return "fld%z1\t%y1";
18849 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18850 FP status register is set. */
18853 ix86_emit_fp_unordered_jump (rtx label)
18855 rtx reg = gen_reg_rtx (HImode);
18858 emit_insn (gen_x86_fnstsw_1 (reg));
18860 if (TARGET_USE_SAHF)
18862 emit_insn (gen_x86_sahf_1 (reg));
18864 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18865 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18869 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18871 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18872 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18875 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18876 gen_rtx_LABEL_REF (VOIDmode, label),
18878 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18879 emit_jump_insn (temp);
18882 /* Output code to perform a log1p XFmode calculation. */
18884 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18886 rtx label1 = gen_label_rtx ();
18887 rtx label2 = gen_label_rtx ();
18889 rtx tmp = gen_reg_rtx (XFmode);
18890 rtx tmp2 = gen_reg_rtx (XFmode);
18892 emit_insn (gen_absxf2 (tmp, op1));
18893 emit_insn (gen_cmpxf (tmp,
18894 CONST_DOUBLE_FROM_REAL_VALUE (
18895 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18897 emit_jump_insn (gen_bge (label1));
18899 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18900 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18901 emit_jump (label2);
18903 emit_label (label1);
18904 emit_move_insn (tmp, CONST1_RTX (XFmode));
18905 emit_insn (gen_addxf3 (tmp, op1, tmp));
18906 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18907 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18909 emit_label (label2);
18912 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18915 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18918 /* With Binutils 2.15, the "@unwind" marker must be specified on
18919 every occurrence of the ".eh_frame" section, not just the first
18922 && strcmp (name, ".eh_frame") == 0)
18924 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18925 flags & SECTION_WRITE ? "aw" : "a");
18928 default_elf_asm_named_section (name, flags, decl);
18931 /* Return the mangling of TYPE if it is an extended fundamental type. */
18933 static const char *
18934 ix86_mangle_fundamental_type (tree type)
18936 switch (TYPE_MODE (type))
18939 /* __float128 is "g". */
18942 /* "long double" or __float80 is "e". */
18949 /* For 32-bit code we can save PIC register setup by using
18950 __stack_chk_fail_local hidden function instead of calling
18951 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18952 register, so it is better to call __stack_chk_fail directly. */
18955 ix86_stack_protect_fail (void)
18957 return TARGET_64BIT
18958 ? default_external_stack_protect_fail ()
18959 : default_hidden_stack_protect_fail ();
18962 /* Select a format to encode pointers in exception handling data. CODE
18963 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18964 true if the symbol may be affected by dynamic relocations.
18966 ??? All x86 object file formats are capable of representing this.
18967 After all, the relocation needed is the same as for the call insn.
18968 Whether or not a particular assembler allows us to enter such, I
18969 guess we'll have to see. */
18971 asm_preferred_eh_data_format (int code, int global)
18975 int type = DW_EH_PE_sdata8;
18977 || ix86_cmodel == CM_SMALL_PIC
18978 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18979 type = DW_EH_PE_sdata4;
18980 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18982 if (ix86_cmodel == CM_SMALL
18983 || (ix86_cmodel == CM_MEDIUM && code))
18984 return DW_EH_PE_udata4;
18985 return DW_EH_PE_absptr;
18988 #include "gt-i386.h"