1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT (-1)
58 /* Return index of given mode in mult and division cost tables. */
59 #define MODE_INDEX(mode) \
60 ((mode) == QImode ? 0 \
61 : (mode) == HImode ? 1 \
62 : (mode) == SImode ? 2 \
63 : (mode) == DImode ? 3 \
66 /* Processor costs (relative to an add) */
67 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
68 #define COSTS_N_BYTES(N) ((N) * 2)
71 struct processor_costs size_cost = { /* costs for tuning for size */
72 COSTS_N_BYTES (2), /* cost of an add instruction */
73 COSTS_N_BYTES (3), /* cost of a lea instruction */
74 COSTS_N_BYTES (2), /* variable shift costs */
75 COSTS_N_BYTES (3), /* constant shift costs */
76 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
77 COSTS_N_BYTES (3), /* HI */
78 COSTS_N_BYTES (3), /* SI */
79 COSTS_N_BYTES (3), /* DI */
80 COSTS_N_BYTES (5)}, /* other */
81 0, /* cost of multiply per each bit set */
82 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
83 COSTS_N_BYTES (3), /* HI */
84 COSTS_N_BYTES (3), /* SI */
85 COSTS_N_BYTES (3), /* DI */
86 COSTS_N_BYTES (5)}, /* other */
87 COSTS_N_BYTES (3), /* cost of movsx */
88 COSTS_N_BYTES (3), /* cost of movzx */
91 2, /* cost for loading QImode using movzbl */
92 {2, 2, 2}, /* cost of loading integer registers
93 in QImode, HImode and SImode.
94 Relative to reg-reg move (2). */
95 {2, 2, 2}, /* cost of storing integer registers */
96 2, /* cost of reg,reg fld/fst */
97 {2, 2, 2}, /* cost of loading fp registers
98 in SFmode, DFmode and XFmode */
99 {2, 2, 2}, /* cost of storing fp registers
100 in SFmode, DFmode and XFmode */
101 3, /* cost of moving MMX register */
102 {3, 3}, /* cost of loading MMX registers
103 in SImode and DImode */
104 {3, 3}, /* cost of storing MMX registers
105 in SImode and DImode */
106 3, /* cost of moving SSE register */
107 {3, 3, 3}, /* cost of loading SSE registers
108 in SImode, DImode and TImode */
109 {3, 3, 3}, /* cost of storing SSE registers
110 in SImode, DImode and TImode */
111 3, /* MMX or SSE register to integer */
112 0, /* size of prefetch block */
113 0, /* number of parallel prefetches */
115 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
116 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
117 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
118 COSTS_N_BYTES (2), /* cost of FABS instruction. */
119 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
120 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
123 /* Processor costs (relative to an add) */
125 struct processor_costs i386_cost = { /* 386 specific costs */
126 COSTS_N_INSNS (1), /* cost of an add instruction */
127 COSTS_N_INSNS (1), /* cost of a lea instruction */
128 COSTS_N_INSNS (3), /* variable shift costs */
129 COSTS_N_INSNS (2), /* constant shift costs */
130 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
131 COSTS_N_INSNS (6), /* HI */
132 COSTS_N_INSNS (6), /* SI */
133 COSTS_N_INSNS (6), /* DI */
134 COSTS_N_INSNS (6)}, /* other */
135 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
136 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
137 COSTS_N_INSNS (23), /* HI */
138 COSTS_N_INSNS (23), /* SI */
139 COSTS_N_INSNS (23), /* DI */
140 COSTS_N_INSNS (23)}, /* other */
141 COSTS_N_INSNS (3), /* cost of movsx */
142 COSTS_N_INSNS (2), /* cost of movzx */
143 15, /* "large" insn */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, /* cost of moving SSE register */
161 {4, 8, 16}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {4, 8, 16}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of prefetch block */
167 0, /* number of parallel prefetches */
169 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
170 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
171 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
172 COSTS_N_INSNS (22), /* cost of FABS instruction. */
173 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
174 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
178 struct processor_costs i486_cost = { /* 486 specific costs */
179 COSTS_N_INSNS (1), /* cost of an add instruction */
180 COSTS_N_INSNS (1), /* cost of a lea instruction */
181 COSTS_N_INSNS (3), /* variable shift costs */
182 COSTS_N_INSNS (2), /* constant shift costs */
183 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
184 COSTS_N_INSNS (12), /* HI */
185 COSTS_N_INSNS (12), /* SI */
186 COSTS_N_INSNS (12), /* DI */
187 COSTS_N_INSNS (12)}, /* other */
188 1, /* cost of multiply per each bit set */
189 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
190 COSTS_N_INSNS (40), /* HI */
191 COSTS_N_INSNS (40), /* SI */
192 COSTS_N_INSNS (40), /* DI */
193 COSTS_N_INSNS (40)}, /* other */
194 COSTS_N_INSNS (3), /* cost of movsx */
195 COSTS_N_INSNS (2), /* cost of movzx */
196 15, /* "large" insn */
198 4, /* cost for loading QImode using movzbl */
199 {2, 4, 2}, /* cost of loading integer registers
200 in QImode, HImode and SImode.
201 Relative to reg-reg move (2). */
202 {2, 4, 2}, /* cost of storing integer registers */
203 2, /* cost of reg,reg fld/fst */
204 {8, 8, 8}, /* cost of loading fp registers
205 in SFmode, DFmode and XFmode */
206 {8, 8, 8}, /* cost of storing fp registers
207 in SFmode, DFmode and XFmode */
208 2, /* cost of moving MMX register */
209 {4, 8}, /* cost of loading MMX registers
210 in SImode and DImode */
211 {4, 8}, /* cost of storing MMX registers
212 in SImode and DImode */
213 2, /* cost of moving SSE register */
214 {4, 8, 16}, /* cost of loading SSE registers
215 in SImode, DImode and TImode */
216 {4, 8, 16}, /* cost of storing SSE registers
217 in SImode, DImode and TImode */
218 3, /* MMX or SSE register to integer */
219 0, /* size of prefetch block */
220 0, /* number of parallel prefetches */
222 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
223 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
224 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
225 COSTS_N_INSNS (3), /* cost of FABS instruction. */
226 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
227 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
231 struct processor_costs pentium_cost = {
232 COSTS_N_INSNS (1), /* cost of an add instruction */
233 COSTS_N_INSNS (1), /* cost of a lea instruction */
234 COSTS_N_INSNS (4), /* variable shift costs */
235 COSTS_N_INSNS (1), /* constant shift costs */
236 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
237 COSTS_N_INSNS (11), /* HI */
238 COSTS_N_INSNS (11), /* SI */
239 COSTS_N_INSNS (11), /* DI */
240 COSTS_N_INSNS (11)}, /* other */
241 0, /* cost of multiply per each bit set */
242 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
243 COSTS_N_INSNS (25), /* HI */
244 COSTS_N_INSNS (25), /* SI */
245 COSTS_N_INSNS (25), /* DI */
246 COSTS_N_INSNS (25)}, /* other */
247 COSTS_N_INSNS (3), /* cost of movsx */
248 COSTS_N_INSNS (2), /* cost of movzx */
249 8, /* "large" insn */
251 6, /* cost for loading QImode using movzbl */
252 {2, 4, 2}, /* cost of loading integer registers
253 in QImode, HImode and SImode.
254 Relative to reg-reg move (2). */
255 {2, 4, 2}, /* cost of storing integer registers */
256 2, /* cost of reg,reg fld/fst */
257 {2, 2, 6}, /* cost of loading fp registers
258 in SFmode, DFmode and XFmode */
259 {4, 4, 6}, /* cost of storing fp registers
260 in SFmode, DFmode and XFmode */
261 8, /* cost of moving MMX register */
262 {8, 8}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {8, 8}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {4, 8, 16}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {4, 8, 16}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 0, /* size of prefetch block */
273 0, /* number of parallel prefetches */
275 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
276 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
277 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
278 COSTS_N_INSNS (1), /* cost of FABS instruction. */
279 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
280 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
284 struct processor_costs pentiumpro_cost = {
285 COSTS_N_INSNS (1), /* cost of an add instruction */
286 COSTS_N_INSNS (1), /* cost of a lea instruction */
287 COSTS_N_INSNS (1), /* variable shift costs */
288 COSTS_N_INSNS (1), /* constant shift costs */
289 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
290 COSTS_N_INSNS (4), /* HI */
291 COSTS_N_INSNS (4), /* SI */
292 COSTS_N_INSNS (4), /* DI */
293 COSTS_N_INSNS (4)}, /* other */
294 0, /* cost of multiply per each bit set */
295 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
296 COSTS_N_INSNS (17), /* HI */
297 COSTS_N_INSNS (17), /* SI */
298 COSTS_N_INSNS (17), /* DI */
299 COSTS_N_INSNS (17)}, /* other */
300 COSTS_N_INSNS (1), /* cost of movsx */
301 COSTS_N_INSNS (1), /* cost of movzx */
302 8, /* "large" insn */
304 2, /* cost for loading QImode using movzbl */
305 {4, 4, 4}, /* cost of loading integer registers
306 in QImode, HImode and SImode.
307 Relative to reg-reg move (2). */
308 {2, 2, 2}, /* cost of storing integer registers */
309 2, /* cost of reg,reg fld/fst */
310 {2, 2, 6}, /* cost of loading fp registers
311 in SFmode, DFmode and XFmode */
312 {4, 4, 6}, /* cost of storing fp registers
313 in SFmode, DFmode and XFmode */
314 2, /* cost of moving MMX register */
315 {2, 2}, /* cost of loading MMX registers
316 in SImode and DImode */
317 {2, 2}, /* cost of storing MMX registers
318 in SImode and DImode */
319 2, /* cost of moving SSE register */
320 {2, 2, 8}, /* cost of loading SSE registers
321 in SImode, DImode and TImode */
322 {2, 2, 8}, /* cost of storing SSE registers
323 in SImode, DImode and TImode */
324 3, /* MMX or SSE register to integer */
325 32, /* size of prefetch block */
326 6, /* number of parallel prefetches */
328 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (2), /* cost of FABS instruction. */
332 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
337 struct processor_costs geode_cost = {
338 COSTS_N_INSNS (1), /* cost of an add instruction */
339 COSTS_N_INSNS (1), /* cost of a lea instruction */
340 COSTS_N_INSNS (2), /* variable shift costs */
341 COSTS_N_INSNS (1), /* constant shift costs */
342 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
343 COSTS_N_INSNS (4), /* HI */
344 COSTS_N_INSNS (7), /* SI */
345 COSTS_N_INSNS (7), /* DI */
346 COSTS_N_INSNS (7)}, /* other */
347 0, /* cost of multiply per each bit set */
348 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
349 COSTS_N_INSNS (23), /* HI */
350 COSTS_N_INSNS (39), /* SI */
351 COSTS_N_INSNS (39), /* DI */
352 COSTS_N_INSNS (39)}, /* other */
353 COSTS_N_INSNS (1), /* cost of movsx */
354 COSTS_N_INSNS (1), /* cost of movzx */
355 8, /* "large" insn */
357 1, /* cost for loading QImode using movzbl */
358 {1, 1, 1}, /* cost of loading integer registers
359 in QImode, HImode and SImode.
360 Relative to reg-reg move (2). */
361 {1, 1, 1}, /* cost of storing integer registers */
362 1, /* cost of reg,reg fld/fst */
363 {1, 1, 1}, /* cost of loading fp registers
364 in SFmode, DFmode and XFmode */
365 {4, 6, 6}, /* cost of storing fp registers
366 in SFmode, DFmode and XFmode */
368 1, /* cost of moving MMX register */
369 {1, 1}, /* cost of loading MMX registers
370 in SImode and DImode */
371 {1, 1}, /* cost of storing MMX registers
372 in SImode and DImode */
373 1, /* cost of moving SSE register */
374 {1, 1, 1}, /* cost of loading SSE registers
375 in SImode, DImode and TImode */
376 {1, 1, 1}, /* cost of storing SSE registers
377 in SImode, DImode and TImode */
378 1, /* MMX or SSE register to integer */
379 32, /* size of prefetch block */
380 1, /* number of parallel prefetches */
382 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
383 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
384 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
385 COSTS_N_INSNS (1), /* cost of FABS instruction. */
386 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
387 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
391 struct processor_costs k6_cost = {
392 COSTS_N_INSNS (1), /* cost of an add instruction */
393 COSTS_N_INSNS (2), /* cost of a lea instruction */
394 COSTS_N_INSNS (1), /* variable shift costs */
395 COSTS_N_INSNS (1), /* constant shift costs */
396 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
397 COSTS_N_INSNS (3), /* HI */
398 COSTS_N_INSNS (3), /* SI */
399 COSTS_N_INSNS (3), /* DI */
400 COSTS_N_INSNS (3)}, /* other */
401 0, /* cost of multiply per each bit set */
402 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
403 COSTS_N_INSNS (18), /* HI */
404 COSTS_N_INSNS (18), /* SI */
405 COSTS_N_INSNS (18), /* DI */
406 COSTS_N_INSNS (18)}, /* other */
407 COSTS_N_INSNS (2), /* cost of movsx */
408 COSTS_N_INSNS (2), /* cost of movzx */
409 8, /* "large" insn */
411 3, /* cost for loading QImode using movzbl */
412 {4, 5, 4}, /* cost of loading integer registers
413 in QImode, HImode and SImode.
414 Relative to reg-reg move (2). */
415 {2, 3, 2}, /* cost of storing integer registers */
416 4, /* cost of reg,reg fld/fst */
417 {6, 6, 6}, /* cost of loading fp registers
418 in SFmode, DFmode and XFmode */
419 {4, 4, 4}, /* cost of storing fp registers
420 in SFmode, DFmode and XFmode */
421 2, /* cost of moving MMX register */
422 {2, 2}, /* cost of loading MMX registers
423 in SImode and DImode */
424 {2, 2}, /* cost of storing MMX registers
425 in SImode and DImode */
426 2, /* cost of moving SSE register */
427 {2, 2, 8}, /* cost of loading SSE registers
428 in SImode, DImode and TImode */
429 {2, 2, 8}, /* cost of storing SSE registers
430 in SImode, DImode and TImode */
431 6, /* MMX or SSE register to integer */
432 32, /* size of prefetch block */
433 1, /* number of parallel prefetches */
435 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
436 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
437 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
438 COSTS_N_INSNS (2), /* cost of FABS instruction. */
439 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
440 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
444 struct processor_costs athlon_cost = {
445 COSTS_N_INSNS (1), /* cost of an add instruction */
446 COSTS_N_INSNS (2), /* cost of a lea instruction */
447 COSTS_N_INSNS (1), /* variable shift costs */
448 COSTS_N_INSNS (1), /* constant shift costs */
449 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
450 COSTS_N_INSNS (5), /* HI */
451 COSTS_N_INSNS (5), /* SI */
452 COSTS_N_INSNS (5), /* DI */
453 COSTS_N_INSNS (5)}, /* other */
454 0, /* cost of multiply per each bit set */
455 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
456 COSTS_N_INSNS (26), /* HI */
457 COSTS_N_INSNS (42), /* SI */
458 COSTS_N_INSNS (74), /* DI */
459 COSTS_N_INSNS (74)}, /* other */
460 COSTS_N_INSNS (1), /* cost of movsx */
461 COSTS_N_INSNS (1), /* cost of movzx */
462 8, /* "large" insn */
464 4, /* cost for loading QImode using movzbl */
465 {3, 4, 3}, /* cost of loading integer registers
466 in QImode, HImode and SImode.
467 Relative to reg-reg move (2). */
468 {3, 4, 3}, /* cost of storing integer registers */
469 4, /* cost of reg,reg fld/fst */
470 {4, 4, 12}, /* cost of loading fp registers
471 in SFmode, DFmode and XFmode */
472 {6, 6, 8}, /* cost of storing fp registers
473 in SFmode, DFmode and XFmode */
474 2, /* cost of moving MMX register */
475 {4, 4}, /* cost of loading MMX registers
476 in SImode and DImode */
477 {4, 4}, /* cost of storing MMX registers
478 in SImode and DImode */
479 2, /* cost of moving SSE register */
480 {4, 4, 6}, /* cost of loading SSE registers
481 in SImode, DImode and TImode */
482 {4, 4, 5}, /* cost of storing SSE registers
483 in SImode, DImode and TImode */
484 5, /* MMX or SSE register to integer */
485 64, /* size of prefetch block */
486 6, /* number of parallel prefetches */
488 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
497 struct processor_costs k8_cost = {
498 COSTS_N_INSNS (1), /* cost of an add instruction */
499 COSTS_N_INSNS (2), /* cost of a lea instruction */
500 COSTS_N_INSNS (1), /* variable shift costs */
501 COSTS_N_INSNS (1), /* constant shift costs */
502 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
503 COSTS_N_INSNS (4), /* HI */
504 COSTS_N_INSNS (3), /* SI */
505 COSTS_N_INSNS (4), /* DI */
506 COSTS_N_INSNS (5)}, /* other */
507 0, /* cost of multiply per each bit set */
508 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
509 COSTS_N_INSNS (26), /* HI */
510 COSTS_N_INSNS (42), /* SI */
511 COSTS_N_INSNS (74), /* DI */
512 COSTS_N_INSNS (74)}, /* other */
513 COSTS_N_INSNS (1), /* cost of movsx */
514 COSTS_N_INSNS (1), /* cost of movzx */
515 8, /* "large" insn */
517 4, /* cost for loading QImode using movzbl */
518 {3, 4, 3}, /* cost of loading integer registers
519 in QImode, HImode and SImode.
520 Relative to reg-reg move (2). */
521 {3, 4, 3}, /* cost of storing integer registers */
522 4, /* cost of reg,reg fld/fst */
523 {4, 4, 12}, /* cost of loading fp registers
524 in SFmode, DFmode and XFmode */
525 {6, 6, 8}, /* cost of storing fp registers
526 in SFmode, DFmode and XFmode */
527 2, /* cost of moving MMX register */
528 {3, 3}, /* cost of loading MMX registers
529 in SImode and DImode */
530 {4, 4}, /* cost of storing MMX registers
531 in SImode and DImode */
532 2, /* cost of moving SSE register */
533 {4, 3, 6}, /* cost of loading SSE registers
534 in SImode, DImode and TImode */
535 {4, 4, 5}, /* cost of storing SSE registers
536 in SImode, DImode and TImode */
537 5, /* MMX or SSE register to integer */
538 64, /* size of prefetch block */
539 6, /* number of parallel prefetches */
541 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
542 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
543 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
544 COSTS_N_INSNS (2), /* cost of FABS instruction. */
545 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
546 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
550 struct processor_costs pentium4_cost = {
551 COSTS_N_INSNS (1), /* cost of an add instruction */
552 COSTS_N_INSNS (3), /* cost of a lea instruction */
553 COSTS_N_INSNS (4), /* variable shift costs */
554 COSTS_N_INSNS (4), /* constant shift costs */
555 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
556 COSTS_N_INSNS (15), /* HI */
557 COSTS_N_INSNS (15), /* SI */
558 COSTS_N_INSNS (15), /* DI */
559 COSTS_N_INSNS (15)}, /* other */
560 0, /* cost of multiply per each bit set */
561 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
562 COSTS_N_INSNS (56), /* HI */
563 COSTS_N_INSNS (56), /* SI */
564 COSTS_N_INSNS (56), /* DI */
565 COSTS_N_INSNS (56)}, /* other */
566 COSTS_N_INSNS (1), /* cost of movsx */
567 COSTS_N_INSNS (1), /* cost of movzx */
568 16, /* "large" insn */
570 2, /* cost for loading QImode using movzbl */
571 {4, 5, 4}, /* cost of loading integer registers
572 in QImode, HImode and SImode.
573 Relative to reg-reg move (2). */
574 {2, 3, 2}, /* cost of storing integer registers */
575 2, /* cost of reg,reg fld/fst */
576 {2, 2, 6}, /* cost of loading fp registers
577 in SFmode, DFmode and XFmode */
578 {4, 4, 6}, /* cost of storing fp registers
579 in SFmode, DFmode and XFmode */
580 2, /* cost of moving MMX register */
581 {2, 2}, /* cost of loading MMX registers
582 in SImode and DImode */
583 {2, 2}, /* cost of storing MMX registers
584 in SImode and DImode */
585 12, /* cost of moving SSE register */
586 {12, 12, 12}, /* cost of loading SSE registers
587 in SImode, DImode and TImode */
588 {2, 2, 8}, /* cost of storing SSE registers
589 in SImode, DImode and TImode */
590 10, /* MMX or SSE register to integer */
591 64, /* size of prefetch block */
592 6, /* number of parallel prefetches */
594 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
595 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
596 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
597 COSTS_N_INSNS (2), /* cost of FABS instruction. */
598 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
599 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
603 struct processor_costs nocona_cost = {
604 COSTS_N_INSNS (1), /* cost of an add instruction */
605 COSTS_N_INSNS (1), /* cost of a lea instruction */
606 COSTS_N_INSNS (1), /* variable shift costs */
607 COSTS_N_INSNS (1), /* constant shift costs */
608 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
609 COSTS_N_INSNS (10), /* HI */
610 COSTS_N_INSNS (10), /* SI */
611 COSTS_N_INSNS (10), /* DI */
612 COSTS_N_INSNS (10)}, /* other */
613 0, /* cost of multiply per each bit set */
614 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
615 COSTS_N_INSNS (66), /* HI */
616 COSTS_N_INSNS (66), /* SI */
617 COSTS_N_INSNS (66), /* DI */
618 COSTS_N_INSNS (66)}, /* other */
619 COSTS_N_INSNS (1), /* cost of movsx */
620 COSTS_N_INSNS (1), /* cost of movzx */
621 16, /* "large" insn */
623 4, /* cost for loading QImode using movzbl */
624 {4, 4, 4}, /* cost of loading integer registers
625 in QImode, HImode and SImode.
626 Relative to reg-reg move (2). */
627 {4, 4, 4}, /* cost of storing integer registers */
628 3, /* cost of reg,reg fld/fst */
629 {12, 12, 12}, /* cost of loading fp registers
630 in SFmode, DFmode and XFmode */
631 {4, 4, 4}, /* cost of storing fp registers
632 in SFmode, DFmode and XFmode */
633 6, /* cost of moving MMX register */
634 {12, 12}, /* cost of loading MMX registers
635 in SImode and DImode */
636 {12, 12}, /* cost of storing MMX registers
637 in SImode and DImode */
638 6, /* cost of moving SSE register */
639 {12, 12, 12}, /* cost of loading SSE registers
640 in SImode, DImode and TImode */
641 {12, 12, 12}, /* cost of storing SSE registers
642 in SImode, DImode and TImode */
643 8, /* MMX or SSE register to integer */
644 128, /* size of prefetch block */
645 8, /* number of parallel prefetches */
647 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
648 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
649 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
650 COSTS_N_INSNS (3), /* cost of FABS instruction. */
651 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
652 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
655 /* Generic64 should produce code tuned for Nocona and K8. */
657 struct processor_costs generic64_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 /* On all chips taken into consideration lea is 2 cycles and more. With
660 this cost however our current implementation of synth_mult results in
661 use of unnecessary temporary registers causing regression on several
662 SPECfp benchmarks. */
663 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
664 COSTS_N_INSNS (1), /* variable shift costs */
665 COSTS_N_INSNS (1), /* constant shift costs */
666 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
667 COSTS_N_INSNS (4), /* HI */
668 COSTS_N_INSNS (3), /* SI */
669 COSTS_N_INSNS (4), /* DI */
670 COSTS_N_INSNS (2)}, /* other */
671 0, /* cost of multiply per each bit set */
672 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
673 COSTS_N_INSNS (26), /* HI */
674 COSTS_N_INSNS (42), /* SI */
675 COSTS_N_INSNS (74), /* DI */
676 COSTS_N_INSNS (74)}, /* other */
677 COSTS_N_INSNS (1), /* cost of movsx */
678 COSTS_N_INSNS (1), /* cost of movzx */
679 8, /* "large" insn */
681 4, /* cost for loading QImode using movzbl */
682 {4, 4, 4}, /* cost of loading integer registers
683 in QImode, HImode and SImode.
684 Relative to reg-reg move (2). */
685 {4, 4, 4}, /* cost of storing integer registers */
686 4, /* cost of reg,reg fld/fst */
687 {12, 12, 12}, /* cost of loading fp registers
688 in SFmode, DFmode and XFmode */
689 {6, 6, 8}, /* cost of storing fp registers
690 in SFmode, DFmode and XFmode */
691 2, /* cost of moving MMX register */
692 {8, 8}, /* cost of loading MMX registers
693 in SImode and DImode */
694 {8, 8}, /* cost of storing MMX registers
695 in SImode and DImode */
696 2, /* cost of moving SSE register */
697 {8, 8, 8}, /* cost of loading SSE registers
698 in SImode, DImode and TImode */
699 {8, 8, 8}, /* cost of storing SSE registers
700 in SImode, DImode and TImode */
701 5, /* MMX or SSE register to integer */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
705 is increased to perhaps more appropriate value of 5. */
707 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (8), /* cost of FABS instruction. */
711 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
715 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
717 struct processor_costs generic32_cost = {
718 COSTS_N_INSNS (1), /* cost of an add instruction */
719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
720 COSTS_N_INSNS (1), /* variable shift costs */
721 COSTS_N_INSNS (1), /* constant shift costs */
722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
723 COSTS_N_INSNS (4), /* HI */
724 COSTS_N_INSNS (3), /* SI */
725 COSTS_N_INSNS (4), /* DI */
726 COSTS_N_INSNS (2)}, /* other */
727 0, /* cost of multiply per each bit set */
728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
729 COSTS_N_INSNS (26), /* HI */
730 COSTS_N_INSNS (42), /* SI */
731 COSTS_N_INSNS (74), /* DI */
732 COSTS_N_INSNS (74)}, /* other */
733 COSTS_N_INSNS (1), /* cost of movsx */
734 COSTS_N_INSNS (1), /* cost of movzx */
735 8, /* "large" insn */
737 4, /* cost for loading QImode using movzbl */
738 {4, 4, 4}, /* cost of loading integer registers
739 in QImode, HImode and SImode.
740 Relative to reg-reg move (2). */
741 {4, 4, 4}, /* cost of storing integer registers */
742 4, /* cost of reg,reg fld/fst */
743 {12, 12, 12}, /* cost of loading fp registers
744 in SFmode, DFmode and XFmode */
745 {6, 6, 8}, /* cost of storing fp registers
746 in SFmode, DFmode and XFmode */
747 2, /* cost of moving MMX register */
748 {8, 8}, /* cost of loading MMX registers
749 in SImode and DImode */
750 {8, 8}, /* cost of storing MMX registers
751 in SImode and DImode */
752 2, /* cost of moving SSE register */
753 {8, 8, 8}, /* cost of loading SSE registers
754 in SImode, DImode and TImode */
755 {8, 8, 8}, /* cost of storing SSE registers
756 in SImode, DImode and TImode */
757 5, /* MMX or SSE register to integer */
758 64, /* size of prefetch block */
759 6, /* number of parallel prefetches */
761 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
762 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
763 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
764 COSTS_N_INSNS (8), /* cost of FABS instruction. */
765 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
766 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
769 const struct processor_costs *ix86_cost = &pentium_cost;
771 /* Processor feature/optimization bitmasks. */
772 #define m_386 (1<<PROCESSOR_I386)
773 #define m_486 (1<<PROCESSOR_I486)
774 #define m_PENT (1<<PROCESSOR_PENTIUM)
775 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
776 #define m_GEODE (1<<PROCESSOR_GEODE)
777 #define m_K6_GEODE (m_K6 | m_GEODE)
778 #define m_K6 (1<<PROCESSOR_K6)
779 #define m_ATHLON (1<<PROCESSOR_ATHLON)
780 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
781 #define m_K8 (1<<PROCESSOR_K8)
782 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
783 #define m_NOCONA (1<<PROCESSOR_NOCONA)
784 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
785 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
786 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
788 /* Generic instruction choice should be common subset of supported CPUs
789 (PPro/PENT4/NOCONA/Athlon/K8). */
791 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
792 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
793 generic because it is not working well with PPro base chips. */
794 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC64;
795 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
796 const int x86_zero_extend_with_and = m_486 | m_PENT;
797 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
798 const int x86_double_with_add = ~m_386;
799 const int x86_use_bit_test = m_386;
800 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
801 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
802 const int x86_3dnow_a = m_ATHLON_K8;
803 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
804 /* Branch hints were put in P4 based on simulation result. But
805 after P4 was made, no performance benefit was observed with
806 branch hints. It also increases the code size. As the result,
807 icc never generates branch hints. */
808 const int x86_branch_hints = 0;
809 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
810 /* We probably ought to watch for partial register stalls on Generic32
811 compilation setting as well. However in current implementation the
812 partial register stalls are not eliminated very well - they can
813 be introduced via subregs synthesized by combine and can happen
814 in caller/callee saving sequences.
815 Because this option pays back little on PPro based chips and is in conflict
816 with partial reg. dependencies used by Athlon/P4 based chips, it is better
817 to leave it off for generic32 for now. */
818 const int x86_partial_reg_stall = m_PPRO;
819 const int x86_partial_flag_reg_stall = m_GENERIC;
820 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
821 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
822 const int x86_use_mov0 = m_K6;
823 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
824 const int x86_read_modify_write = ~m_PENT;
825 const int x86_read_modify = ~(m_PENT | m_PPRO);
826 const int x86_split_long_moves = m_PPRO;
827 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
828 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
829 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
830 const int x86_qimode_math = ~(0);
831 const int x86_promote_qi_regs = 0;
832 /* On PPro this flag is meant to avoid partial register stalls. Just like
833 the x86_partial_reg_stall this option might be considered for Generic32
834 if our scheme for avoiding partial stalls was more effective. */
835 const int x86_himode_math = ~(m_PPRO);
836 const int x86_promote_hi_regs = m_PPRO;
837 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
838 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
839 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC;
840 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
841 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC | m_GEODE);
842 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
843 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
844 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
845 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
846 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
847 const int x86_shift1 = ~m_486;
848 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
849 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
850 that thread 128bit SSE registers as single units versus K8 based chips that
851 divide SSE registers to two 64bit halves.
852 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
853 to allow register renaming on 128bit SSE units, but usually results in one
854 extra microop on 64bit SSE units. Experimental results shows that disabling
855 this option on P4 brings over 20% SPECfp regression, while enabling it on
856 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
858 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
859 /* Set for machines where the type and dependencies are resolved on SSE
860 register parts instead of whole registers, so we may maintain just
861 lower part of scalar values in proper format leaving the upper part
863 const int x86_sse_split_regs = m_ATHLON_K8;
864 const int x86_sse_typeless_stores = m_ATHLON_K8;
865 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
866 const int x86_use_ffreep = m_ATHLON_K8;
867 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE;
868 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
870 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
871 integer data in xmm registers. Which results in pretty abysmal code. */
872 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
874 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
875 /* Some CPU cores are not able to predict more than 4 branch instructions in
876 the 16 byte window. */
877 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
878 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_GENERIC;
879 const int x86_use_bt = m_ATHLON_K8;
880 /* Compare and exchange was added for 80486. */
881 const int x86_cmpxchg = ~m_386;
882 /* Compare and exchange 8 bytes was added for pentium. */
883 const int x86_cmpxchg8b = ~(m_386 | m_486);
884 /* Compare and exchange 16 bytes was added for nocona. */
885 const int x86_cmpxchg16b = m_NOCONA;
886 /* Exchange and add was added for 80486. */
887 const int x86_xadd = ~m_386;
888 /* Byteswap was added for 80486. */
889 const int x86_bswap = ~m_386;
890 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
892 /* In case the average insn count for single function invocation is
893 lower than this constant, emit fast (but longer) prologue and
895 #define FAST_PROLOGUE_INSN_COUNT 20
897 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
898 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
899 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
900 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
902 /* Array of the smallest class containing reg number REGNO, indexed by
903 REGNO. Used by REGNO_REG_CLASS in i386.h. */
905 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
908 AREG, DREG, CREG, BREG,
910 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
912 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
913 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
916 /* flags, fpsr, fpcr, dirflag, frame */
917 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
918 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
920 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
922 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
923 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
924 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
928 /* The "default" register map used in 32bit mode. */
930 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
932 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
933 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
934 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
935 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
936 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
937 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
938 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
941 static int const x86_64_int_parameter_registers[6] =
943 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
944 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
947 static int const x86_64_int_return_registers[4] =
949 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
952 /* The "default" register map used in 64bit mode. */
953 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
955 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
956 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
957 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
958 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
959 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
960 8,9,10,11,12,13,14,15, /* extended integer registers */
961 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
964 /* Define the register numbers to be used in Dwarf debugging information.
965 The SVR4 reference port C compiler uses the following register numbers
966 in its Dwarf output code:
967 0 for %eax (gcc regno = 0)
968 1 for %ecx (gcc regno = 2)
969 2 for %edx (gcc regno = 1)
970 3 for %ebx (gcc regno = 3)
971 4 for %esp (gcc regno = 7)
972 5 for %ebp (gcc regno = 6)
973 6 for %esi (gcc regno = 4)
974 7 for %edi (gcc regno = 5)
975 The following three DWARF register numbers are never generated by
976 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
977 believes these numbers have these meanings.
978 8 for %eip (no gcc equivalent)
979 9 for %eflags (gcc regno = 17)
980 10 for %trapno (no gcc equivalent)
981 It is not at all clear how we should number the FP stack registers
982 for the x86 architecture. If the version of SDB on x86/svr4 were
983 a bit less brain dead with respect to floating-point then we would
984 have a precedent to follow with respect to DWARF register numbers
985 for x86 FP registers, but the SDB on x86/svr4 is so completely
986 broken with respect to FP registers that it is hardly worth thinking
987 of it as something to strive for compatibility with.
988 The version of x86/svr4 SDB I have at the moment does (partially)
989 seem to believe that DWARF register number 11 is associated with
990 the x86 register %st(0), but that's about all. Higher DWARF
991 register numbers don't seem to be associated with anything in
992 particular, and even for DWARF regno 11, SDB only seems to under-
993 stand that it should say that a variable lives in %st(0) (when
994 asked via an `=' command) if we said it was in DWARF regno 11,
995 but SDB still prints garbage when asked for the value of the
996 variable in question (via a `/' command).
997 (Also note that the labels SDB prints for various FP stack regs
998 when doing an `x' command are all wrong.)
999 Note that these problems generally don't affect the native SVR4
1000 C compiler because it doesn't allow the use of -O with -g and
1001 because when it is *not* optimizing, it allocates a memory
1002 location for each floating-point variable, and the memory
1003 location is what gets described in the DWARF AT_location
1004 attribute for the variable in question.
1005 Regardless of the severe mental illness of the x86/svr4 SDB, we
1006 do something sensible here and we use the following DWARF
1007 register numbers. Note that these are all stack-top-relative
1009 11 for %st(0) (gcc regno = 8)
1010 12 for %st(1) (gcc regno = 9)
1011 13 for %st(2) (gcc regno = 10)
1012 14 for %st(3) (gcc regno = 11)
1013 15 for %st(4) (gcc regno = 12)
1014 16 for %st(5) (gcc regno = 13)
1015 17 for %st(6) (gcc regno = 14)
1016 18 for %st(7) (gcc regno = 15)
1018 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1020 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1021 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1022 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1023 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1024 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1025 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1026 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1029 /* Test and compare insns in i386.md store the information needed to
1030 generate branch and scc insns here. */
1032 rtx ix86_compare_op0 = NULL_RTX;
1033 rtx ix86_compare_op1 = NULL_RTX;
1034 rtx ix86_compare_emitted = NULL_RTX;
1036 /* Size of the register save area. */
1037 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1039 /* Define the structure for the machine field in struct function. */
1041 struct stack_local_entry GTY(())
1043 unsigned short mode;
1046 struct stack_local_entry *next;
1049 /* Structure describing stack frame layout.
1050 Stack grows downward:
1056 saved frame pointer if frame_pointer_needed
1057 <- HARD_FRAME_POINTER
1062 [va_arg registers] (
1063 > to_allocate <- FRAME_POINTER
1073 HOST_WIDE_INT frame;
1075 int outgoing_arguments_size;
1078 HOST_WIDE_INT to_allocate;
1079 /* The offsets relative to ARG_POINTER. */
1080 HOST_WIDE_INT frame_pointer_offset;
1081 HOST_WIDE_INT hard_frame_pointer_offset;
1082 HOST_WIDE_INT stack_pointer_offset;
1084 /* When save_regs_using_mov is set, emit prologue using
1085 move instead of push instructions. */
1086 bool save_regs_using_mov;
1089 /* Code model option. */
1090 enum cmodel ix86_cmodel;
1092 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1094 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1096 /* Which unit we are generating floating point math for. */
1097 enum fpmath_unit ix86_fpmath;
1099 /* Which cpu are we scheduling for. */
1100 enum processor_type ix86_tune;
1101 /* Which instruction set architecture to use. */
1102 enum processor_type ix86_arch;
1104 /* true if sse prefetch instruction is not NOOP. */
1105 int x86_prefetch_sse;
1107 /* ix86_regparm_string as a number */
1108 static int ix86_regparm;
1110 /* -mstackrealign option */
1111 extern int ix86_force_align_arg_pointer;
1112 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1114 /* Preferred alignment for stack boundary in bits. */
1115 unsigned int ix86_preferred_stack_boundary;
1117 /* Values 1-5: see jump.c */
1118 int ix86_branch_cost;
1120 /* Variables which are this size or smaller are put in the data/bss
1121 or ldata/lbss sections. */
1123 int ix86_section_threshold = 65536;
1125 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1126 char internal_label_prefix[16];
1127 int internal_label_prefix_len;
1129 static bool ix86_handle_option (size_t, const char *, int);
1130 static void output_pic_addr_const (FILE *, rtx, int);
1131 static void put_condition_code (enum rtx_code, enum machine_mode,
1133 static const char *get_some_local_dynamic_name (void);
1134 static int get_some_local_dynamic_name_1 (rtx *, void *);
1135 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1136 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1138 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1139 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1141 static rtx get_thread_pointer (int);
1142 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1143 static void get_pc_thunk_name (char [32], unsigned int);
1144 static rtx gen_push (rtx);
1145 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1146 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1147 static struct machine_function * ix86_init_machine_status (void);
1148 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1149 static int ix86_nsaved_regs (void);
1150 static void ix86_emit_save_regs (void);
1151 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1152 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1153 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1154 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1155 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1156 static rtx ix86_expand_aligntest (rtx, int);
1157 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1158 static int ix86_issue_rate (void);
1159 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1160 static int ia32_multipass_dfa_lookahead (void);
1161 static void ix86_init_mmx_sse_builtins (void);
1162 static rtx x86_this_parameter (tree);
1163 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1164 HOST_WIDE_INT, tree);
1165 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1166 static void x86_file_start (void);
1167 static void ix86_reorg (void);
1168 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1169 static tree ix86_build_builtin_va_list (void);
1170 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1172 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1173 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1174 static bool ix86_vector_mode_supported_p (enum machine_mode);
1176 static int ix86_address_cost (rtx);
1177 static bool ix86_cannot_force_const_mem (rtx);
1178 static rtx ix86_delegitimize_address (rtx);
1180 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1182 struct builtin_description;
1183 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1185 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1187 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1188 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1189 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1190 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1191 static rtx safe_vector_operand (rtx, enum machine_mode);
1192 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1193 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1194 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1195 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1196 static int ix86_fp_comparison_cost (enum rtx_code code);
1197 static unsigned int ix86_select_alt_pic_regnum (void);
1198 static int ix86_save_reg (unsigned int, int);
1199 static void ix86_compute_frame_layout (struct ix86_frame *);
1200 static int ix86_comp_type_attributes (tree, tree);
1201 static int ix86_function_regparm (tree, tree);
1202 const struct attribute_spec ix86_attribute_table[];
1203 static bool ix86_function_ok_for_sibcall (tree, tree);
1204 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1205 static int ix86_value_regno (enum machine_mode, tree, tree);
1206 static bool contains_128bit_aligned_vector_p (tree);
1207 static rtx ix86_struct_value_rtx (tree, int);
1208 static bool ix86_ms_bitfield_layout_p (tree);
1209 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1210 static int extended_reg_mentioned_1 (rtx *, void *);
1211 static bool ix86_rtx_costs (rtx, int, int, int *);
1212 static int min_insn_size (rtx);
1213 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1214 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1215 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1217 static void ix86_init_builtins (void);
1218 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1219 static const char *ix86_mangle_fundamental_type (tree);
1220 static tree ix86_stack_protect_fail (void);
1221 static rtx ix86_internal_arg_pointer (void);
1222 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1224 /* This function is only used on Solaris. */
1225 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1228 /* Register class used for passing given 64bit part of the argument.
1229 These represent classes as documented by the PS ABI, with the exception
1230 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1231 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1233 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1234 whenever possible (upper half does contain padding).
1236 enum x86_64_reg_class
1239 X86_64_INTEGER_CLASS,
1240 X86_64_INTEGERSI_CLASS,
1247 X86_64_COMPLEX_X87_CLASS,
1250 static const char * const x86_64_reg_class_name[] = {
1251 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1252 "sseup", "x87", "x87up", "cplx87", "no"
1255 #define MAX_CLASSES 4
1257 /* Table of constants used by fldpi, fldln2, etc.... */
1258 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1259 static bool ext_80387_constants_init = 0;
1260 static void init_ext_80387_constants (void);
1261 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1262 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1263 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1264 static section *x86_64_elf_select_section (tree decl, int reloc,
1265 unsigned HOST_WIDE_INT align)
1268 /* Initialize the GCC target structure. */
1269 #undef TARGET_ATTRIBUTE_TABLE
1270 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1271 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1272 # undef TARGET_MERGE_DECL_ATTRIBUTES
1273 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1276 #undef TARGET_COMP_TYPE_ATTRIBUTES
1277 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1279 #undef TARGET_INIT_BUILTINS
1280 #define TARGET_INIT_BUILTINS ix86_init_builtins
1281 #undef TARGET_EXPAND_BUILTIN
1282 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1284 #undef TARGET_ASM_FUNCTION_EPILOGUE
1285 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1287 #undef TARGET_ENCODE_SECTION_INFO
1288 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1289 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1291 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1294 #undef TARGET_ASM_OPEN_PAREN
1295 #define TARGET_ASM_OPEN_PAREN ""
1296 #undef TARGET_ASM_CLOSE_PAREN
1297 #define TARGET_ASM_CLOSE_PAREN ""
1299 #undef TARGET_ASM_ALIGNED_HI_OP
1300 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1301 #undef TARGET_ASM_ALIGNED_SI_OP
1302 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1304 #undef TARGET_ASM_ALIGNED_DI_OP
1305 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1308 #undef TARGET_ASM_UNALIGNED_HI_OP
1309 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1310 #undef TARGET_ASM_UNALIGNED_SI_OP
1311 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1312 #undef TARGET_ASM_UNALIGNED_DI_OP
1313 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1315 #undef TARGET_SCHED_ADJUST_COST
1316 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1317 #undef TARGET_SCHED_ISSUE_RATE
1318 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1319 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1320 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1321 ia32_multipass_dfa_lookahead
1323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1324 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1327 #undef TARGET_HAVE_TLS
1328 #define TARGET_HAVE_TLS true
1330 #undef TARGET_CANNOT_FORCE_CONST_MEM
1331 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1332 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1333 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1335 #undef TARGET_DELEGITIMIZE_ADDRESS
1336 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1338 #undef TARGET_MS_BITFIELD_LAYOUT_P
1339 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1342 #undef TARGET_BINDS_LOCAL_P
1343 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1346 #undef TARGET_ASM_OUTPUT_MI_THUNK
1347 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1348 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1349 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1351 #undef TARGET_ASM_FILE_START
1352 #define TARGET_ASM_FILE_START x86_file_start
1354 #undef TARGET_DEFAULT_TARGET_FLAGS
1355 #define TARGET_DEFAULT_TARGET_FLAGS \
1357 | TARGET_64BIT_DEFAULT \
1358 | TARGET_SUBTARGET_DEFAULT \
1359 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1361 #undef TARGET_HANDLE_OPTION
1362 #define TARGET_HANDLE_OPTION ix86_handle_option
1364 #undef TARGET_RTX_COSTS
1365 #define TARGET_RTX_COSTS ix86_rtx_costs
1366 #undef TARGET_ADDRESS_COST
1367 #define TARGET_ADDRESS_COST ix86_address_cost
1369 #undef TARGET_FIXED_CONDITION_CODE_REGS
1370 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1371 #undef TARGET_CC_MODES_COMPATIBLE
1372 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1374 #undef TARGET_MACHINE_DEPENDENT_REORG
1375 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1377 #undef TARGET_BUILD_BUILTIN_VA_LIST
1378 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1380 #undef TARGET_MD_ASM_CLOBBERS
1381 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1383 #undef TARGET_PROMOTE_PROTOTYPES
1384 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1385 #undef TARGET_STRUCT_VALUE_RTX
1386 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1387 #undef TARGET_SETUP_INCOMING_VARARGS
1388 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1389 #undef TARGET_MUST_PASS_IN_STACK
1390 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1391 #undef TARGET_PASS_BY_REFERENCE
1392 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1393 #undef TARGET_INTERNAL_ARG_POINTER
1394 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1395 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1396 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1398 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1399 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1401 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1402 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1405 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1408 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1409 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1412 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1413 #undef TARGET_INSERT_ATTRIBUTES
1414 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1417 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1418 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1420 #undef TARGET_STACK_PROTECT_FAIL
1421 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1423 #undef TARGET_FUNCTION_VALUE
1424 #define TARGET_FUNCTION_VALUE ix86_function_value
1426 struct gcc_target targetm = TARGET_INITIALIZER;
1429 /* The svr4 ABI for the i386 says that records and unions are returned
1431 #ifndef DEFAULT_PCC_STRUCT_RETURN
1432 #define DEFAULT_PCC_STRUCT_RETURN 1
1435 /* Implement TARGET_HANDLE_OPTION. */
1438 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1445 target_flags &= ~MASK_3DNOW_A;
1446 target_flags_explicit |= MASK_3DNOW_A;
1453 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1454 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1461 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1462 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1469 target_flags &= ~MASK_SSE3;
1470 target_flags_explicit |= MASK_SSE3;
1479 /* Sometimes certain combinations of command options do not make
1480 sense on a particular target machine. You can define a macro
1481 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1482 defined, is executed once just after all the command options have
1485 Don't use this macro to turn on various extra optimizations for
1486 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1489 override_options (void)
1492 int ix86_tune_defaulted = 0;
1494 /* Comes from final.c -- no real reason to change it. */
1495 #define MAX_CODE_ALIGN 16
1499 const struct processor_costs *cost; /* Processor costs */
1500 const int target_enable; /* Target flags to enable. */
1501 const int target_disable; /* Target flags to disable. */
1502 const int align_loop; /* Default alignments. */
1503 const int align_loop_max_skip;
1504 const int align_jump;
1505 const int align_jump_max_skip;
1506 const int align_func;
1508 const processor_target_table[PROCESSOR_max] =
1510 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1511 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1512 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1513 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1514 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1515 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1516 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1517 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1518 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1519 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1520 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1521 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1524 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1527 const char *const name; /* processor name or nickname. */
1528 const enum processor_type processor;
1529 const enum pta_flags
1535 PTA_PREFETCH_SSE = 16,
1542 const processor_alias_table[] =
1544 {"i386", PROCESSOR_I386, 0},
1545 {"i486", PROCESSOR_I486, 0},
1546 {"i586", PROCESSOR_PENTIUM, 0},
1547 {"pentium", PROCESSOR_PENTIUM, 0},
1548 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1549 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1550 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1551 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1552 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1553 {"i686", PROCESSOR_PENTIUMPRO, 0},
1554 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1555 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1556 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1557 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1558 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1559 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1560 | PTA_MMX | PTA_PREFETCH_SSE},
1561 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1562 | PTA_MMX | PTA_PREFETCH_SSE},
1563 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1564 | PTA_MMX | PTA_PREFETCH_SSE},
1565 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1566 | PTA_MMX | PTA_PREFETCH_SSE},
1567 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1569 {"k6", PROCESSOR_K6, PTA_MMX},
1570 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1571 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1572 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1574 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1575 | PTA_3DNOW | PTA_3DNOW_A},
1576 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1577 | PTA_3DNOW_A | PTA_SSE},
1578 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1579 | PTA_3DNOW_A | PTA_SSE},
1580 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1581 | PTA_3DNOW_A | PTA_SSE},
1582 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1583 | PTA_SSE | PTA_SSE2 },
1584 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1585 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1586 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1587 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1588 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1589 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1590 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1591 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1592 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1593 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1596 int const pta_size = ARRAY_SIZE (processor_alias_table);
1598 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1599 SUBTARGET_OVERRIDE_OPTIONS;
1602 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1603 SUBSUBTARGET_OVERRIDE_OPTIONS;
1606 /* -fPIC is the default for x86_64. */
1607 if (TARGET_MACHO && TARGET_64BIT)
1610 /* Set the default values for switches whose default depends on TARGET_64BIT
1611 in case they weren't overwritten by command line options. */
1614 /* Mach-O doesn't support omitting the frame pointer for now. */
1615 if (flag_omit_frame_pointer == 2)
1616 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1617 if (flag_asynchronous_unwind_tables == 2)
1618 flag_asynchronous_unwind_tables = 1;
1619 if (flag_pcc_struct_return == 2)
1620 flag_pcc_struct_return = 0;
1624 if (flag_omit_frame_pointer == 2)
1625 flag_omit_frame_pointer = 0;
1626 if (flag_asynchronous_unwind_tables == 2)
1627 flag_asynchronous_unwind_tables = 0;
1628 if (flag_pcc_struct_return == 2)
1629 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1632 /* Need to check -mtune=generic first. */
1633 if (ix86_tune_string)
1635 if (!strcmp (ix86_tune_string, "generic")
1636 || !strcmp (ix86_tune_string, "i686")
1637 /* As special support for cross compilers we read -mtune=native
1638 as -mtune=generic. With native compilers we won't see the
1639 -mtune=native, as it was changed by the driver. */
1640 || !strcmp (ix86_tune_string, "native"))
1643 ix86_tune_string = "generic64";
1645 ix86_tune_string = "generic32";
1647 else if (!strncmp (ix86_tune_string, "generic", 7))
1648 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1652 if (ix86_arch_string)
1653 ix86_tune_string = ix86_arch_string;
1654 if (!ix86_tune_string)
1656 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1657 ix86_tune_defaulted = 1;
1660 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1661 need to use a sensible tune option. */
1662 if (!strcmp (ix86_tune_string, "generic")
1663 || !strcmp (ix86_tune_string, "x86-64")
1664 || !strcmp (ix86_tune_string, "i686"))
1667 ix86_tune_string = "generic64";
1669 ix86_tune_string = "generic32";
1672 if (!strcmp (ix86_tune_string, "x86-64"))
1673 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1674 "-mtune=generic instead as appropriate.");
1676 if (!ix86_arch_string)
1677 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1678 if (!strcmp (ix86_arch_string, "generic"))
1679 error ("generic CPU can be used only for -mtune= switch");
1680 if (!strncmp (ix86_arch_string, "generic", 7))
1681 error ("bad value (%s) for -march= switch", ix86_arch_string);
1683 if (ix86_cmodel_string != 0)
1685 if (!strcmp (ix86_cmodel_string, "small"))
1686 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1687 else if (!strcmp (ix86_cmodel_string, "medium"))
1688 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1690 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1691 else if (!strcmp (ix86_cmodel_string, "32"))
1692 ix86_cmodel = CM_32;
1693 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1694 ix86_cmodel = CM_KERNEL;
1695 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1696 ix86_cmodel = CM_LARGE;
1698 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1702 ix86_cmodel = CM_32;
1704 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1706 if (ix86_asm_string != 0)
1709 && !strcmp (ix86_asm_string, "intel"))
1710 ix86_asm_dialect = ASM_INTEL;
1711 else if (!strcmp (ix86_asm_string, "att"))
1712 ix86_asm_dialect = ASM_ATT;
1714 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1716 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1717 error ("code model %qs not supported in the %s bit mode",
1718 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1719 if (ix86_cmodel == CM_LARGE)
1720 sorry ("code model %<large%> not supported yet");
1721 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1722 sorry ("%i-bit mode not compiled in",
1723 (target_flags & MASK_64BIT) ? 64 : 32);
1725 for (i = 0; i < pta_size; i++)
1726 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1728 ix86_arch = processor_alias_table[i].processor;
1729 /* Default cpu tuning to the architecture. */
1730 ix86_tune = ix86_arch;
1731 if (processor_alias_table[i].flags & PTA_MMX
1732 && !(target_flags_explicit & MASK_MMX))
1733 target_flags |= MASK_MMX;
1734 if (processor_alias_table[i].flags & PTA_3DNOW
1735 && !(target_flags_explicit & MASK_3DNOW))
1736 target_flags |= MASK_3DNOW;
1737 if (processor_alias_table[i].flags & PTA_3DNOW_A
1738 && !(target_flags_explicit & MASK_3DNOW_A))
1739 target_flags |= MASK_3DNOW_A;
1740 if (processor_alias_table[i].flags & PTA_SSE
1741 && !(target_flags_explicit & MASK_SSE))
1742 target_flags |= MASK_SSE;
1743 if (processor_alias_table[i].flags & PTA_SSE2
1744 && !(target_flags_explicit & MASK_SSE2))
1745 target_flags |= MASK_SSE2;
1746 if (processor_alias_table[i].flags & PTA_SSE3
1747 && !(target_flags_explicit & MASK_SSE3))
1748 target_flags |= MASK_SSE3;
1749 if (processor_alias_table[i].flags & PTA_SSSE3
1750 && !(target_flags_explicit & MASK_SSSE3))
1751 target_flags |= MASK_SSSE3;
1752 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1753 x86_prefetch_sse = true;
1754 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1755 error ("CPU you selected does not support x86-64 "
1761 error ("bad value (%s) for -march= switch", ix86_arch_string);
1763 for (i = 0; i < pta_size; i++)
1764 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1766 ix86_tune = processor_alias_table[i].processor;
1767 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1769 if (ix86_tune_defaulted)
1771 ix86_tune_string = "x86-64";
1772 for (i = 0; i < pta_size; i++)
1773 if (! strcmp (ix86_tune_string,
1774 processor_alias_table[i].name))
1776 ix86_tune = processor_alias_table[i].processor;
1779 error ("CPU you selected does not support x86-64 "
1782 /* Intel CPUs have always interpreted SSE prefetch instructions as
1783 NOPs; so, we can enable SSE prefetch instructions even when
1784 -mtune (rather than -march) points us to a processor that has them.
1785 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1786 higher processors. */
1787 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1788 x86_prefetch_sse = true;
1792 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1795 ix86_cost = &size_cost;
1797 ix86_cost = processor_target_table[ix86_tune].cost;
1798 target_flags |= processor_target_table[ix86_tune].target_enable;
1799 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1801 /* Arrange to set up i386_stack_locals for all functions. */
1802 init_machine_status = ix86_init_machine_status;
1804 /* Validate -mregparm= value. */
1805 if (ix86_regparm_string)
1807 i = atoi (ix86_regparm_string);
1808 if (i < 0 || i > REGPARM_MAX)
1809 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1815 ix86_regparm = REGPARM_MAX;
1817 /* If the user has provided any of the -malign-* options,
1818 warn and use that value only if -falign-* is not set.
1819 Remove this code in GCC 3.2 or later. */
1820 if (ix86_align_loops_string)
1822 warning (0, "-malign-loops is obsolete, use -falign-loops");
1823 if (align_loops == 0)
1825 i = atoi (ix86_align_loops_string);
1826 if (i < 0 || i > MAX_CODE_ALIGN)
1827 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1829 align_loops = 1 << i;
1833 if (ix86_align_jumps_string)
1835 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1836 if (align_jumps == 0)
1838 i = atoi (ix86_align_jumps_string);
1839 if (i < 0 || i > MAX_CODE_ALIGN)
1840 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1842 align_jumps = 1 << i;
1846 if (ix86_align_funcs_string)
1848 warning (0, "-malign-functions is obsolete, use -falign-functions");
1849 if (align_functions == 0)
1851 i = atoi (ix86_align_funcs_string);
1852 if (i < 0 || i > MAX_CODE_ALIGN)
1853 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1855 align_functions = 1 << i;
1859 /* Default align_* from the processor table. */
1860 if (align_loops == 0)
1862 align_loops = processor_target_table[ix86_tune].align_loop;
1863 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1865 if (align_jumps == 0)
1867 align_jumps = processor_target_table[ix86_tune].align_jump;
1868 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1870 if (align_functions == 0)
1872 align_functions = processor_target_table[ix86_tune].align_func;
1875 /* Validate -mbranch-cost= value, or provide default. */
1876 ix86_branch_cost = ix86_cost->branch_cost;
1877 if (ix86_branch_cost_string)
1879 i = atoi (ix86_branch_cost_string);
1881 error ("-mbranch-cost=%d is not between 0 and 5", i);
1883 ix86_branch_cost = i;
1885 if (ix86_section_threshold_string)
1887 i = atoi (ix86_section_threshold_string);
1889 error ("-mlarge-data-threshold=%d is negative", i);
1891 ix86_section_threshold = i;
1894 if (ix86_tls_dialect_string)
1896 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1897 ix86_tls_dialect = TLS_DIALECT_GNU;
1898 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1899 ix86_tls_dialect = TLS_DIALECT_GNU2;
1900 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1901 ix86_tls_dialect = TLS_DIALECT_SUN;
1903 error ("bad value (%s) for -mtls-dialect= switch",
1904 ix86_tls_dialect_string);
1907 /* Keep nonleaf frame pointers. */
1908 if (flag_omit_frame_pointer)
1909 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1910 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1911 flag_omit_frame_pointer = 1;
1913 /* If we're doing fast math, we don't care about comparison order
1914 wrt NaNs. This lets us use a shorter comparison sequence. */
1915 if (flag_finite_math_only)
1916 target_flags &= ~MASK_IEEE_FP;
1918 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1919 since the insns won't need emulation. */
1920 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1921 target_flags &= ~MASK_NO_FANCY_MATH_387;
1923 /* Likewise, if the target doesn't have a 387, or we've specified
1924 software floating point, don't use 387 inline intrinsics. */
1926 target_flags |= MASK_NO_FANCY_MATH_387;
1928 /* Turn on SSE3 builtins for -mssse3. */
1930 target_flags |= MASK_SSE3;
1932 /* Turn on SSE2 builtins for -msse3. */
1934 target_flags |= MASK_SSE2;
1936 /* Turn on SSE builtins for -msse2. */
1938 target_flags |= MASK_SSE;
1940 /* Turn on MMX builtins for -msse. */
1943 target_flags |= MASK_MMX & ~target_flags_explicit;
1944 x86_prefetch_sse = true;
1947 /* Turn on MMX builtins for 3Dnow. */
1949 target_flags |= MASK_MMX;
1953 if (TARGET_ALIGN_DOUBLE)
1954 error ("-malign-double makes no sense in the 64bit mode");
1956 error ("-mrtd calling convention not supported in the 64bit mode");
1958 /* Enable by default the SSE and MMX builtins. Do allow the user to
1959 explicitly disable any of these. In particular, disabling SSE and
1960 MMX for kernel code is extremely useful. */
1962 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1963 & ~target_flags_explicit);
1967 /* i386 ABI does not specify red zone. It still makes sense to use it
1968 when programmer takes care to stack from being destroyed. */
1969 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1970 target_flags |= MASK_NO_RED_ZONE;
1973 /* Validate -mpreferred-stack-boundary= value, or provide default.
1974 The default of 128 bits is for Pentium III's SSE __m128. We can't
1975 change it because of optimize_size. Otherwise, we can't mix object
1976 files compiled with -Os and -On. */
1977 ix86_preferred_stack_boundary = 128;
1978 if (ix86_preferred_stack_boundary_string)
1980 i = atoi (ix86_preferred_stack_boundary_string);
1981 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1982 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1983 TARGET_64BIT ? 4 : 2);
1985 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1988 /* Accept -msseregparm only if at least SSE support is enabled. */
1989 if (TARGET_SSEREGPARM
1991 error ("-msseregparm used without SSE enabled");
1993 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1995 if (ix86_fpmath_string != 0)
1997 if (! strcmp (ix86_fpmath_string, "387"))
1998 ix86_fpmath = FPMATH_387;
1999 else if (! strcmp (ix86_fpmath_string, "sse"))
2003 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2004 ix86_fpmath = FPMATH_387;
2007 ix86_fpmath = FPMATH_SSE;
2009 else if (! strcmp (ix86_fpmath_string, "387,sse")
2010 || ! strcmp (ix86_fpmath_string, "sse,387"))
2014 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2015 ix86_fpmath = FPMATH_387;
2017 else if (!TARGET_80387)
2019 warning (0, "387 instruction set disabled, using SSE arithmetics");
2020 ix86_fpmath = FPMATH_SSE;
2023 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2026 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2029 /* If the i387 is disabled, then do not return values in it. */
2031 target_flags &= ~MASK_FLOAT_RETURNS;
2033 if ((x86_accumulate_outgoing_args & TUNEMASK)
2034 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2036 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2038 /* ??? Unwind info is not correct around the CFG unless either a frame
2039 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2040 unwind info generation to be aware of the CFG and propagating states
2042 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2043 || flag_exceptions || flag_non_call_exceptions)
2044 && flag_omit_frame_pointer
2045 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2047 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2048 warning (0, "unwind tables currently require either a frame pointer "
2049 "or -maccumulate-outgoing-args for correctness");
2050 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2053 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2056 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2057 p = strchr (internal_label_prefix, 'X');
2058 internal_label_prefix_len = p - internal_label_prefix;
2062 /* When scheduling description is not available, disable scheduler pass
2063 so it won't slow down the compilation and make x87 code slower. */
2064 if (!TARGET_SCHEDULE)
2065 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2068 /* switch to the appropriate section for output of DECL.
2069 DECL is either a `VAR_DECL' node or a constant of some sort.
2070 RELOC indicates whether forming the initial value of DECL requires
2071 link-time relocations. */
2074 x86_64_elf_select_section (tree decl, int reloc,
2075 unsigned HOST_WIDE_INT align)
2077 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2078 && ix86_in_large_data_p (decl))
2080 const char *sname = NULL;
2081 unsigned int flags = SECTION_WRITE;
2082 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2087 case SECCAT_DATA_REL:
2088 sname = ".ldata.rel";
2090 case SECCAT_DATA_REL_LOCAL:
2091 sname = ".ldata.rel.local";
2093 case SECCAT_DATA_REL_RO:
2094 sname = ".ldata.rel.ro";
2096 case SECCAT_DATA_REL_RO_LOCAL:
2097 sname = ".ldata.rel.ro.local";
2101 flags |= SECTION_BSS;
2104 case SECCAT_RODATA_MERGE_STR:
2105 case SECCAT_RODATA_MERGE_STR_INIT:
2106 case SECCAT_RODATA_MERGE_CONST:
2110 case SECCAT_SRODATA:
2117 /* We don't split these for medium model. Place them into
2118 default sections and hope for best. */
2123 /* We might get called with string constants, but get_named_section
2124 doesn't like them as they are not DECLs. Also, we need to set
2125 flags in that case. */
2127 return get_section (sname, flags, NULL);
2128 return get_named_section (decl, sname, reloc);
2131 return default_elf_select_section (decl, reloc, align);
2134 /* Build up a unique section name, expressed as a
2135 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2136 RELOC indicates whether the initial value of EXP requires
2137 link-time relocations. */
2140 x86_64_elf_unique_section (tree decl, int reloc)
2142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2143 && ix86_in_large_data_p (decl))
2145 const char *prefix = NULL;
2146 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2147 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2149 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2152 case SECCAT_DATA_REL:
2153 case SECCAT_DATA_REL_LOCAL:
2154 case SECCAT_DATA_REL_RO:
2155 case SECCAT_DATA_REL_RO_LOCAL:
2156 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2159 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2162 case SECCAT_RODATA_MERGE_STR:
2163 case SECCAT_RODATA_MERGE_STR_INIT:
2164 case SECCAT_RODATA_MERGE_CONST:
2165 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2167 case SECCAT_SRODATA:
2174 /* We don't split these for medium model. Place them into
2175 default sections and hope for best. */
2183 plen = strlen (prefix);
2185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2186 name = targetm.strip_name_encoding (name);
2187 nlen = strlen (name);
2189 string = alloca (nlen + plen + 1);
2190 memcpy (string, prefix, plen);
2191 memcpy (string + plen, name, nlen + 1);
2193 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2197 default_unique_section (decl, reloc);
2200 #ifdef COMMON_ASM_OP
2201 /* This says how to output assembler code to declare an
2202 uninitialized external linkage data object.
2204 For medium model x86-64 we need to use .largecomm opcode for
2207 x86_elf_aligned_common (FILE *file,
2208 const char *name, unsigned HOST_WIDE_INT size,
2211 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2212 && size > (unsigned int)ix86_section_threshold)
2213 fprintf (file, ".largecomm\t");
2215 fprintf (file, "%s", COMMON_ASM_OP);
2216 assemble_name (file, name);
2217 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2218 size, align / BITS_PER_UNIT);
2221 /* Utility function for targets to use in implementing
2222 ASM_OUTPUT_ALIGNED_BSS. */
2225 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2226 const char *name, unsigned HOST_WIDE_INT size,
2229 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2230 && size > (unsigned int)ix86_section_threshold)
2231 switch_to_section (get_named_section (decl, ".lbss", 0));
2233 switch_to_section (bss_section);
2234 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2235 #ifdef ASM_DECLARE_OBJECT_NAME
2236 last_assemble_variable_decl = decl;
2237 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2239 /* Standard thing is just output label for the object. */
2240 ASM_OUTPUT_LABEL (file, name);
2241 #endif /* ASM_DECLARE_OBJECT_NAME */
2242 ASM_OUTPUT_SKIP (file, size ? size : 1);
2247 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2249 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2250 make the problem with not enough registers even worse. */
2251 #ifdef INSN_SCHEDULING
2253 flag_schedule_insns = 0;
2257 /* The Darwin libraries never set errno, so we might as well
2258 avoid calling them when that's the only reason we would. */
2259 flag_errno_math = 0;
2261 /* The default values of these switches depend on the TARGET_64BIT
2262 that is not known at this moment. Mark these values with 2 and
2263 let user the to override these. In case there is no command line option
2264 specifying them, we will set the defaults in override_options. */
2266 flag_omit_frame_pointer = 2;
2267 flag_pcc_struct_return = 2;
2268 flag_asynchronous_unwind_tables = 2;
2269 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2270 SUBTARGET_OPTIMIZATION_OPTIONS;
2274 /* Table of valid machine attributes. */
2275 const struct attribute_spec ix86_attribute_table[] =
2277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2278 /* Stdcall attribute says callee is responsible for popping arguments
2279 if they are not variable. */
2280 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2281 /* Fastcall attribute says callee is responsible for popping arguments
2282 if they are not variable. */
2283 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2284 /* Cdecl attribute says the callee is a normal C declaration */
2285 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2286 /* Regparm attribute specifies how many integer arguments are to be
2287 passed in registers. */
2288 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2289 /* Sseregparm attribute says we are using x86_64 calling conventions
2290 for FP arguments. */
2291 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2292 /* force_align_arg_pointer says this function realigns the stack at entry. */
2293 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2294 false, true, true, ix86_handle_cconv_attribute },
2295 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2296 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2297 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2298 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2300 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2301 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2302 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2303 SUBTARGET_ATTRIBUTE_TABLE,
2305 { NULL, 0, 0, false, false, false, NULL }
2308 /* Decide whether we can make a sibling call to a function. DECL is the
2309 declaration of the function being targeted by the call and EXP is the
2310 CALL_EXPR representing the call. */
2313 ix86_function_ok_for_sibcall (tree decl, tree exp)
2318 /* If we are generating position-independent code, we cannot sibcall
2319 optimize any indirect call, or a direct call to a global function,
2320 as the PLT requires %ebx be live. */
2321 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2328 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2329 if (POINTER_TYPE_P (func))
2330 func = TREE_TYPE (func);
2333 /* Check that the return value locations are the same. Like
2334 if we are returning floats on the 80387 register stack, we cannot
2335 make a sibcall from a function that doesn't return a float to a
2336 function that does or, conversely, from a function that does return
2337 a float to a function that doesn't; the necessary stack adjustment
2338 would not be executed. This is also the place we notice
2339 differences in the return value ABI. Note that it is ok for one
2340 of the functions to have void return type as long as the return
2341 value of the other is passed in a register. */
2342 a = ix86_function_value (TREE_TYPE (exp), func, false);
2343 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2345 if (STACK_REG_P (a) || STACK_REG_P (b))
2347 if (!rtx_equal_p (a, b))
2350 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2352 else if (!rtx_equal_p (a, b))
2355 /* If this call is indirect, we'll need to be able to use a call-clobbered
2356 register for the address of the target function. Make sure that all
2357 such registers are not used for passing parameters. */
2358 if (!decl && !TARGET_64BIT)
2362 /* We're looking at the CALL_EXPR, we need the type of the function. */
2363 type = TREE_OPERAND (exp, 0); /* pointer expression */
2364 type = TREE_TYPE (type); /* pointer type */
2365 type = TREE_TYPE (type); /* function type */
2367 if (ix86_function_regparm (type, NULL) >= 3)
2369 /* ??? Need to count the actual number of registers to be used,
2370 not the possible number of registers. Fix later. */
2375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2376 /* Dllimport'd functions are also called indirectly. */
2377 if (decl && DECL_DLLIMPORT_P (decl)
2378 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2382 /* If we forced aligned the stack, then sibcalling would unalign the
2383 stack, which may break the called function. */
2384 if (cfun->machine->force_align_arg_pointer)
2387 /* Otherwise okay. That also includes certain types of indirect calls. */
2391 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2392 calling convention attributes;
2393 arguments as in struct attribute_spec.handler. */
2396 ix86_handle_cconv_attribute (tree *node, tree name,
2398 int flags ATTRIBUTE_UNUSED,
2401 if (TREE_CODE (*node) != FUNCTION_TYPE
2402 && TREE_CODE (*node) != METHOD_TYPE
2403 && TREE_CODE (*node) != FIELD_DECL
2404 && TREE_CODE (*node) != TYPE_DECL)
2406 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2407 IDENTIFIER_POINTER (name));
2408 *no_add_attrs = true;
2412 /* Can combine regparm with all attributes but fastcall. */
2413 if (is_attribute_p ("regparm", name))
2417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2419 error ("fastcall and regparm attributes are not compatible");
2422 cst = TREE_VALUE (args);
2423 if (TREE_CODE (cst) != INTEGER_CST)
2425 warning (OPT_Wattributes,
2426 "%qs attribute requires an integer constant argument",
2427 IDENTIFIER_POINTER (name));
2428 *no_add_attrs = true;
2430 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2432 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2433 IDENTIFIER_POINTER (name), REGPARM_MAX);
2434 *no_add_attrs = true;
2438 && lookup_attribute (ix86_force_align_arg_pointer_string,
2439 TYPE_ATTRIBUTES (*node))
2440 && compare_tree_int (cst, REGPARM_MAX-1))
2442 error ("%s functions limited to %d register parameters",
2443 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2451 warning (OPT_Wattributes, "%qs attribute ignored",
2452 IDENTIFIER_POINTER (name));
2453 *no_add_attrs = true;
2457 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2458 if (is_attribute_p ("fastcall", name))
2460 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2462 error ("fastcall and cdecl attributes are not compatible");
2464 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2466 error ("fastcall and stdcall attributes are not compatible");
2468 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2470 error ("fastcall and regparm attributes are not compatible");
2474 /* Can combine stdcall with fastcall (redundant), regparm and
2476 else if (is_attribute_p ("stdcall", name))
2478 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2480 error ("stdcall and cdecl attributes are not compatible");
2482 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2484 error ("stdcall and fastcall attributes are not compatible");
2488 /* Can combine cdecl with regparm and sseregparm. */
2489 else if (is_attribute_p ("cdecl", name))
2491 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2493 error ("stdcall and cdecl attributes are not compatible");
2495 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2497 error ("fastcall and cdecl attributes are not compatible");
2501 /* Can combine sseregparm with all attributes. */
2506 /* Return 0 if the attributes for two types are incompatible, 1 if they
2507 are compatible, and 2 if they are nearly compatible (which causes a
2508 warning to be generated). */
2511 ix86_comp_type_attributes (tree type1, tree type2)
2513 /* Check for mismatch of non-default calling convention. */
2514 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2516 if (TREE_CODE (type1) != FUNCTION_TYPE)
2519 /* Check for mismatched fastcall/regparm types. */
2520 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2521 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2522 || (ix86_function_regparm (type1, NULL)
2523 != ix86_function_regparm (type2, NULL)))
2526 /* Check for mismatched sseregparm types. */
2527 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2528 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2531 /* Check for mismatched return types (cdecl vs stdcall). */
2532 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2533 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2539 /* Return the regparm value for a function with the indicated TYPE and DECL.
2540 DECL may be NULL when calling function indirectly
2541 or considering a libcall. */
2544 ix86_function_regparm (tree type, tree decl)
2547 int regparm = ix86_regparm;
2548 bool user_convention = false;
2552 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2555 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2556 user_convention = true;
2559 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2562 user_convention = true;
2565 /* Use register calling convention for local functions when possible. */
2566 if (!TARGET_64BIT && !user_convention && decl
2567 && flag_unit_at_a_time && !profile_flag)
2569 struct cgraph_local_info *i = cgraph_local_info (decl);
2572 int local_regparm, globals = 0, regno;
2574 /* Make sure no regparm register is taken by a global register
2576 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2577 if (global_regs[local_regparm])
2579 /* We can't use regparm(3) for nested functions as these use
2580 static chain pointer in third argument. */
2581 if (local_regparm == 3
2582 && decl_function_context (decl)
2583 && !DECL_NO_STATIC_CHAIN (decl))
2585 /* If the function realigns its stackpointer, the
2586 prologue will clobber %ecx. If we've already
2587 generated code for the callee, the callee
2588 DECL_STRUCT_FUNCTION is gone, so we fall back to
2589 scanning the attributes for the self-realigning
2591 if ((DECL_STRUCT_FUNCTION (decl)
2592 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2593 || (!DECL_STRUCT_FUNCTION (decl)
2594 && lookup_attribute (ix86_force_align_arg_pointer_string,
2595 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2597 /* Each global register variable increases register preassure,
2598 so the more global reg vars there are, the smaller regparm
2599 optimization use, unless requested by the user explicitly. */
2600 for (regno = 0; regno < 6; regno++)
2601 if (global_regs[regno])
2604 = globals < local_regparm ? local_regparm - globals : 0;
2606 if (local_regparm > regparm)
2607 regparm = local_regparm;
2614 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2615 in SSE registers for a function with the indicated TYPE and DECL.
2616 DECL may be NULL when calling function indirectly
2617 or considering a libcall. Otherwise return 0. */
2620 ix86_function_sseregparm (tree type, tree decl)
2622 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2623 by the sseregparm attribute. */
2624 if (TARGET_SSEREGPARM
2626 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2631 error ("Calling %qD with attribute sseregparm without "
2632 "SSE/SSE2 enabled", decl);
2634 error ("Calling %qT with attribute sseregparm without "
2635 "SSE/SSE2 enabled", type);
2642 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2643 in SSE registers even for 32-bit mode and not just 3, but up to
2644 8 SSE arguments in registers. */
2645 if (!TARGET_64BIT && decl
2646 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2648 struct cgraph_local_info *i = cgraph_local_info (decl);
2650 return TARGET_SSE2 ? 2 : 1;
2656 /* Return true if EAX is live at the start of the function. Used by
2657 ix86_expand_prologue to determine if we need special help before
2658 calling allocate_stack_worker. */
2661 ix86_eax_live_at_start_p (void)
2663 /* Cheat. Don't bother working forward from ix86_function_regparm
2664 to the function type to whether an actual argument is located in
2665 eax. Instead just look at cfg info, which is still close enough
2666 to correct at this point. This gives false positives for broken
2667 functions that might use uninitialized data that happens to be
2668 allocated in eax, but who cares? */
2669 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2672 /* Value is the number of bytes of arguments automatically
2673 popped when returning from a subroutine call.
2674 FUNDECL is the declaration node of the function (as a tree),
2675 FUNTYPE is the data type of the function (as a tree),
2676 or for a library call it is an identifier node for the subroutine name.
2677 SIZE is the number of bytes of arguments passed on the stack.
2679 On the 80386, the RTD insn may be used to pop them if the number
2680 of args is fixed, but if the number is variable then the caller
2681 must pop them all. RTD can't be used for library calls now
2682 because the library is compiled with the Unix compiler.
2683 Use of RTD is a selectable option, since it is incompatible with
2684 standard Unix calling sequences. If the option is not selected,
2685 the caller must always pop the args.
2687 The attribute stdcall is equivalent to RTD on a per module basis. */
2690 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2692 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2694 /* Cdecl functions override -mrtd, and never pop the stack. */
2695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2697 /* Stdcall and fastcall functions will pop the stack if not
2699 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2700 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2704 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2705 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2706 == void_type_node)))
2710 /* Lose any fake structure return argument if it is passed on the stack. */
2711 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2713 && !KEEP_AGGREGATE_RETURN_POINTER)
2715 int nregs = ix86_function_regparm (funtype, fundecl);
2718 return GET_MODE_SIZE (Pmode);
2724 /* Argument support functions. */
2726 /* Return true when register may be used to pass function parameters. */
2728 ix86_function_arg_regno_p (int regno)
2732 return (regno < REGPARM_MAX
2733 || (TARGET_MMX && MMX_REGNO_P (regno)
2734 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2735 || (TARGET_SSE && SSE_REGNO_P (regno)
2736 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2738 if (TARGET_SSE && SSE_REGNO_P (regno)
2739 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2741 /* RAX is used as hidden argument to va_arg functions. */
2744 for (i = 0; i < REGPARM_MAX; i++)
2745 if (regno == x86_64_int_parameter_registers[i])
2750 /* Return if we do not know how to pass TYPE solely in registers. */
2753 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2755 if (must_pass_in_stack_var_size_or_pad (mode, type))
2758 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2759 The layout_type routine is crafty and tries to trick us into passing
2760 currently unsupported vector types on the stack by using TImode. */
2761 return (!TARGET_64BIT && mode == TImode
2762 && type && TREE_CODE (type) != VECTOR_TYPE);
2765 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2766 for a call to a function whose data type is FNTYPE.
2767 For a library call, FNTYPE is 0. */
2770 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2771 tree fntype, /* tree ptr for function decl */
2772 rtx libname, /* SYMBOL_REF of library name or 0 */
2775 static CUMULATIVE_ARGS zero_cum;
2776 tree param, next_param;
2778 if (TARGET_DEBUG_ARG)
2780 fprintf (stderr, "\ninit_cumulative_args (");
2782 fprintf (stderr, "fntype code = %s, ret code = %s",
2783 tree_code_name[(int) TREE_CODE (fntype)],
2784 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2786 fprintf (stderr, "no fntype");
2789 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2794 /* Set up the number of registers to use for passing arguments. */
2795 cum->nregs = ix86_regparm;
2797 cum->sse_nregs = SSE_REGPARM_MAX;
2799 cum->mmx_nregs = MMX_REGPARM_MAX;
2800 cum->warn_sse = true;
2801 cum->warn_mmx = true;
2802 cum->maybe_vaarg = false;
2804 /* Use ecx and edx registers if function has fastcall attribute,
2805 else look for regparm information. */
2806 if (fntype && !TARGET_64BIT)
2808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2814 cum->nregs = ix86_function_regparm (fntype, fndecl);
2817 /* Set up the number of SSE registers used for passing SFmode
2818 and DFmode arguments. Warn for mismatching ABI. */
2819 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2821 /* Determine if this function has variable arguments. This is
2822 indicated by the last argument being 'void_type_mode' if there
2823 are no variable arguments. If there are variable arguments, then
2824 we won't pass anything in registers in 32-bit mode. */
2826 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2828 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2829 param != 0; param = next_param)
2831 next_param = TREE_CHAIN (param);
2832 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2842 cum->float_in_sse = 0;
2844 cum->maybe_vaarg = true;
2848 if ((!fntype && !libname)
2849 || (fntype && !TYPE_ARG_TYPES (fntype)))
2850 cum->maybe_vaarg = true;
2852 if (TARGET_DEBUG_ARG)
2853 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2858 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2859 But in the case of vector types, it is some vector mode.
2861 When we have only some of our vector isa extensions enabled, then there
2862 are some modes for which vector_mode_supported_p is false. For these
2863 modes, the generic vector support in gcc will choose some non-vector mode
2864 in order to implement the type. By computing the natural mode, we'll
2865 select the proper ABI location for the operand and not depend on whatever
2866 the middle-end decides to do with these vector types. */
2868 static enum machine_mode
2869 type_natural_mode (tree type)
2871 enum machine_mode mode = TYPE_MODE (type);
2873 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2875 HOST_WIDE_INT size = int_size_in_bytes (type);
2876 if ((size == 8 || size == 16)
2877 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2878 && TYPE_VECTOR_SUBPARTS (type) > 1)
2880 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2882 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2883 mode = MIN_MODE_VECTOR_FLOAT;
2885 mode = MIN_MODE_VECTOR_INT;
2887 /* Get the mode which has this inner mode and number of units. */
2888 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2889 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2890 && GET_MODE_INNER (mode) == innermode)
2900 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2901 this may not agree with the mode that the type system has chosen for the
2902 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2903 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2906 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2911 if (orig_mode != BLKmode)
2912 tmp = gen_rtx_REG (orig_mode, regno);
2915 tmp = gen_rtx_REG (mode, regno);
2916 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2917 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2923 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2924 of this code is to classify each 8bytes of incoming argument by the register
2925 class and assign registers accordingly. */
2927 /* Return the union class of CLASS1 and CLASS2.
2928 See the x86-64 PS ABI for details. */
2930 static enum x86_64_reg_class
2931 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2933 /* Rule #1: If both classes are equal, this is the resulting class. */
2934 if (class1 == class2)
2937 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2939 if (class1 == X86_64_NO_CLASS)
2941 if (class2 == X86_64_NO_CLASS)
2944 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2945 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2946 return X86_64_MEMORY_CLASS;
2948 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2949 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2950 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2951 return X86_64_INTEGERSI_CLASS;
2952 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2953 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2954 return X86_64_INTEGER_CLASS;
2956 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2958 if (class1 == X86_64_X87_CLASS
2959 || class1 == X86_64_X87UP_CLASS
2960 || class1 == X86_64_COMPLEX_X87_CLASS
2961 || class2 == X86_64_X87_CLASS
2962 || class2 == X86_64_X87UP_CLASS
2963 || class2 == X86_64_COMPLEX_X87_CLASS)
2964 return X86_64_MEMORY_CLASS;
2966 /* Rule #6: Otherwise class SSE is used. */
2967 return X86_64_SSE_CLASS;
2970 /* Classify the argument of type TYPE and mode MODE.
2971 CLASSES will be filled by the register class used to pass each word
2972 of the operand. The number of words is returned. In case the parameter
2973 should be passed in memory, 0 is returned. As a special case for zero
2974 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2976 BIT_OFFSET is used internally for handling records and specifies offset
2977 of the offset in bits modulo 256 to avoid overflow cases.
2979 See the x86-64 PS ABI for details.
2983 classify_argument (enum machine_mode mode, tree type,
2984 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2986 HOST_WIDE_INT bytes =
2987 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2988 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2990 /* Variable sized entities are always passed/returned in memory. */
2994 if (mode != VOIDmode
2995 && targetm.calls.must_pass_in_stack (mode, type))
2998 if (type && AGGREGATE_TYPE_P (type))
3002 enum x86_64_reg_class subclasses[MAX_CLASSES];
3004 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3008 for (i = 0; i < words; i++)
3009 classes[i] = X86_64_NO_CLASS;
3011 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3012 signalize memory class, so handle it as special case. */
3015 classes[0] = X86_64_NO_CLASS;
3019 /* Classify each field of record and merge classes. */
3020 switch (TREE_CODE (type))
3023 /* And now merge the fields of structure. */
3024 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3026 if (TREE_CODE (field) == FIELD_DECL)
3030 if (TREE_TYPE (field) == error_mark_node)
3033 /* Bitfields are always classified as integer. Handle them
3034 early, since later code would consider them to be
3035 misaligned integers. */
3036 if (DECL_BIT_FIELD (field))
3038 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3039 i < ((int_bit_position (field) + (bit_offset % 64))
3040 + tree_low_cst (DECL_SIZE (field), 0)
3043 merge_classes (X86_64_INTEGER_CLASS,
3048 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3049 TREE_TYPE (field), subclasses,
3050 (int_bit_position (field)
3051 + bit_offset) % 256);
3054 for (i = 0; i < num; i++)
3057 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3059 merge_classes (subclasses[i], classes[i + pos]);
3067 /* Arrays are handled as small records. */
3070 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3071 TREE_TYPE (type), subclasses, bit_offset);
3075 /* The partial classes are now full classes. */
3076 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3077 subclasses[0] = X86_64_SSE_CLASS;
3078 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3079 subclasses[0] = X86_64_INTEGER_CLASS;
3081 for (i = 0; i < words; i++)
3082 classes[i] = subclasses[i % num];
3087 case QUAL_UNION_TYPE:
3088 /* Unions are similar to RECORD_TYPE but offset is always 0.
3090 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3092 if (TREE_CODE (field) == FIELD_DECL)
3096 if (TREE_TYPE (field) == error_mark_node)
3099 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3100 TREE_TYPE (field), subclasses,
3104 for (i = 0; i < num; i++)
3105 classes[i] = merge_classes (subclasses[i], classes[i]);
3114 /* Final merger cleanup. */
3115 for (i = 0; i < words; i++)
3117 /* If one class is MEMORY, everything should be passed in
3119 if (classes[i] == X86_64_MEMORY_CLASS)
3122 /* The X86_64_SSEUP_CLASS should be always preceded by
3123 X86_64_SSE_CLASS. */
3124 if (classes[i] == X86_64_SSEUP_CLASS
3125 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3126 classes[i] = X86_64_SSE_CLASS;
3128 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3129 if (classes[i] == X86_64_X87UP_CLASS
3130 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3131 classes[i] = X86_64_SSE_CLASS;
3136 /* Compute alignment needed. We align all types to natural boundaries with
3137 exception of XFmode that is aligned to 64bits. */
3138 if (mode != VOIDmode && mode != BLKmode)
3140 int mode_alignment = GET_MODE_BITSIZE (mode);
3143 mode_alignment = 128;
3144 else if (mode == XCmode)
3145 mode_alignment = 256;
3146 if (COMPLEX_MODE_P (mode))
3147 mode_alignment /= 2;
3148 /* Misaligned fields are always returned in memory. */
3149 if (bit_offset % mode_alignment)
3153 /* for V1xx modes, just use the base mode */
3154 if (VECTOR_MODE_P (mode)
3155 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3156 mode = GET_MODE_INNER (mode);
3158 /* Classification of atomic types. */
3163 classes[0] = X86_64_SSE_CLASS;
3166 classes[0] = X86_64_SSE_CLASS;
3167 classes[1] = X86_64_SSEUP_CLASS;
3176 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3177 classes[0] = X86_64_INTEGERSI_CLASS;
3179 classes[0] = X86_64_INTEGER_CLASS;
3183 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3188 if (!(bit_offset % 64))
3189 classes[0] = X86_64_SSESF_CLASS;
3191 classes[0] = X86_64_SSE_CLASS;
3194 classes[0] = X86_64_SSEDF_CLASS;
3197 classes[0] = X86_64_X87_CLASS;
3198 classes[1] = X86_64_X87UP_CLASS;
3201 classes[0] = X86_64_SSE_CLASS;
3202 classes[1] = X86_64_SSEUP_CLASS;
3205 classes[0] = X86_64_SSE_CLASS;
3208 classes[0] = X86_64_SSEDF_CLASS;
3209 classes[1] = X86_64_SSEDF_CLASS;
3212 classes[0] = X86_64_COMPLEX_X87_CLASS;
3215 /* This modes is larger than 16 bytes. */
3223 classes[0] = X86_64_SSE_CLASS;
3224 classes[1] = X86_64_SSEUP_CLASS;
3230 classes[0] = X86_64_SSE_CLASS;
3236 gcc_assert (VECTOR_MODE_P (mode));
3241 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3243 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3244 classes[0] = X86_64_INTEGERSI_CLASS;
3246 classes[0] = X86_64_INTEGER_CLASS;
3247 classes[1] = X86_64_INTEGER_CLASS;
3248 return 1 + (bytes > 8);
3252 /* Examine the argument and return set number of register required in each
3253 class. Return 0 iff parameter should be passed in memory. */
3255 examine_argument (enum machine_mode mode, tree type, int in_return,
3256 int *int_nregs, int *sse_nregs)
3258 enum x86_64_reg_class class[MAX_CLASSES];
3259 int n = classify_argument (mode, type, class, 0);
3265 for (n--; n >= 0; n--)
3268 case X86_64_INTEGER_CLASS:
3269 case X86_64_INTEGERSI_CLASS:
3272 case X86_64_SSE_CLASS:
3273 case X86_64_SSESF_CLASS:
3274 case X86_64_SSEDF_CLASS:
3277 case X86_64_NO_CLASS:
3278 case X86_64_SSEUP_CLASS:
3280 case X86_64_X87_CLASS:
3281 case X86_64_X87UP_CLASS:
3285 case X86_64_COMPLEX_X87_CLASS:
3286 return in_return ? 2 : 0;
3287 case X86_64_MEMORY_CLASS:
3293 /* Construct container for the argument used by GCC interface. See
3294 FUNCTION_ARG for the detailed description. */
3297 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3298 tree type, int in_return, int nintregs, int nsseregs,
3299 const int *intreg, int sse_regno)
3301 /* The following variables hold the static issued_error state. */
3302 static bool issued_sse_arg_error;
3303 static bool issued_sse_ret_error;
3304 static bool issued_x87_ret_error;
3306 enum machine_mode tmpmode;
3308 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3309 enum x86_64_reg_class class[MAX_CLASSES];
3313 int needed_sseregs, needed_intregs;
3314 rtx exp[MAX_CLASSES];
3317 n = classify_argument (mode, type, class, 0);
3318 if (TARGET_DEBUG_ARG)
3321 fprintf (stderr, "Memory class\n");
3324 fprintf (stderr, "Classes:");
3325 for (i = 0; i < n; i++)
3327 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3329 fprintf (stderr, "\n");
3334 if (!examine_argument (mode, type, in_return, &needed_intregs,
3337 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3340 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3341 some less clueful developer tries to use floating-point anyway. */
3342 if (needed_sseregs && !TARGET_SSE)
3346 if (!issued_sse_ret_error)
3348 error ("SSE register return with SSE disabled");
3349 issued_sse_ret_error = true;
3352 else if (!issued_sse_arg_error)
3354 error ("SSE register argument with SSE disabled");
3355 issued_sse_arg_error = true;
3360 /* Likewise, error if the ABI requires us to return values in the
3361 x87 registers and the user specified -mno-80387. */
3362 if (!TARGET_80387 && in_return)
3363 for (i = 0; i < n; i++)
3364 if (class[i] == X86_64_X87_CLASS
3365 || class[i] == X86_64_X87UP_CLASS
3366 || class[i] == X86_64_COMPLEX_X87_CLASS)
3368 if (!issued_x87_ret_error)
3370 error ("x87 register return with x87 disabled");
3371 issued_x87_ret_error = true;
3376 /* First construct simple cases. Avoid SCmode, since we want to use
3377 single register to pass this type. */
3378 if (n == 1 && mode != SCmode)
3381 case X86_64_INTEGER_CLASS:
3382 case X86_64_INTEGERSI_CLASS:
3383 return gen_rtx_REG (mode, intreg[0]);
3384 case X86_64_SSE_CLASS:
3385 case X86_64_SSESF_CLASS:
3386 case X86_64_SSEDF_CLASS:
3387 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3388 case X86_64_X87_CLASS:
3389 case X86_64_COMPLEX_X87_CLASS:
3390 return gen_rtx_REG (mode, FIRST_STACK_REG);
3391 case X86_64_NO_CLASS:
3392 /* Zero sized array, struct or class. */
3397 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3399 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3401 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3402 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3403 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3404 && class[1] == X86_64_INTEGER_CLASS
3405 && (mode == CDImode || mode == TImode || mode == TFmode)
3406 && intreg[0] + 1 == intreg[1])
3407 return gen_rtx_REG (mode, intreg[0]);
3409 /* Otherwise figure out the entries of the PARALLEL. */
3410 for (i = 0; i < n; i++)
3414 case X86_64_NO_CLASS:
3416 case X86_64_INTEGER_CLASS:
3417 case X86_64_INTEGERSI_CLASS:
3418 /* Merge TImodes on aligned occasions here too. */
3419 if (i * 8 + 8 > bytes)
3420 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3421 else if (class[i] == X86_64_INTEGERSI_CLASS)
3425 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3426 if (tmpmode == BLKmode)
3428 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3429 gen_rtx_REG (tmpmode, *intreg),
3433 case X86_64_SSESF_CLASS:
3434 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3435 gen_rtx_REG (SFmode,
3436 SSE_REGNO (sse_regno)),
3440 case X86_64_SSEDF_CLASS:
3441 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3442 gen_rtx_REG (DFmode,
3443 SSE_REGNO (sse_regno)),
3447 case X86_64_SSE_CLASS:
3448 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3452 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3453 gen_rtx_REG (tmpmode,
3454 SSE_REGNO (sse_regno)),
3456 if (tmpmode == TImode)
3465 /* Empty aligned struct, union or class. */
3469 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3470 for (i = 0; i < nexps; i++)
3471 XVECEXP (ret, 0, i) = exp [i];
3475 /* Update the data in CUM to advance over an argument
3476 of mode MODE and data type TYPE.
3477 (TYPE is null for libcalls where that information may not be available.) */
3480 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3481 tree type, int named)
3484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3485 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3488 mode = type_natural_mode (type);
3490 if (TARGET_DEBUG_ARG)
3491 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3492 "mode=%s, named=%d)\n\n",
3493 words, cum->words, cum->nregs, cum->sse_nregs,
3494 GET_MODE_NAME (mode), named);
3498 int int_nregs, sse_nregs;
3499 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3500 cum->words += words;
3501 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3503 cum->nregs -= int_nregs;
3504 cum->sse_nregs -= sse_nregs;
3505 cum->regno += int_nregs;
3506 cum->sse_regno += sse_nregs;
3509 cum->words += words;
3527 cum->words += words;
3528 cum->nregs -= words;
3529 cum->regno += words;
3531 if (cum->nregs <= 0)
3539 if (cum->float_in_sse < 2)
3542 if (cum->float_in_sse < 1)
3553 if (!type || !AGGREGATE_TYPE_P (type))
3555 cum->sse_words += words;
3556 cum->sse_nregs -= 1;
3557 cum->sse_regno += 1;
3558 if (cum->sse_nregs <= 0)
3570 if (!type || !AGGREGATE_TYPE_P (type))
3572 cum->mmx_words += words;
3573 cum->mmx_nregs -= 1;
3574 cum->mmx_regno += 1;
3575 if (cum->mmx_nregs <= 0)
3586 /* Define where to put the arguments to a function.
3587 Value is zero to push the argument on the stack,
3588 or a hard register in which to store the argument.
3590 MODE is the argument's machine mode.
3591 TYPE is the data type of the argument (as a tree).
3592 This is null for libcalls where that information may
3594 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3595 the preceding args and about the function being called.
3596 NAMED is nonzero if this argument is a named parameter
3597 (otherwise it is an extra parameter matching an ellipsis). */
3600 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3601 tree type, int named)
3603 enum machine_mode mode = orig_mode;
3606 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3607 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3608 static bool warnedsse, warnedmmx;
3610 /* To simplify the code below, represent vector types with a vector mode
3611 even if MMX/SSE are not active. */
3612 if (type && TREE_CODE (type) == VECTOR_TYPE)
3613 mode = type_natural_mode (type);
3615 /* Handle a hidden AL argument containing number of registers for varargs
3616 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3618 if (mode == VOIDmode)
3621 return GEN_INT (cum->maybe_vaarg
3622 ? (cum->sse_nregs < 0
3630 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3632 &x86_64_int_parameter_registers [cum->regno],
3637 /* For now, pass fp/complex values on the stack. */
3649 if (words <= cum->nregs)
3651 int regno = cum->regno;
3653 /* Fastcall allocates the first two DWORD (SImode) or
3654 smaller arguments to ECX and EDX. */
3657 if (mode == BLKmode || mode == DImode)
3660 /* ECX not EAX is the first allocated register. */
3664 ret = gen_rtx_REG (mode, regno);
3668 if (cum->float_in_sse < 2)
3671 if (cum->float_in_sse < 1)
3681 if (!type || !AGGREGATE_TYPE_P (type))
3683 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3686 warning (0, "SSE vector argument without SSE enabled "
3690 ret = gen_reg_or_parallel (mode, orig_mode,
3691 cum->sse_regno + FIRST_SSE_REG);
3698 if (!type || !AGGREGATE_TYPE_P (type))
3700 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3703 warning (0, "MMX vector argument without MMX enabled "
3707 ret = gen_reg_or_parallel (mode, orig_mode,
3708 cum->mmx_regno + FIRST_MMX_REG);
3713 if (TARGET_DEBUG_ARG)
3716 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3717 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3720 print_simple_rtl (stderr, ret);
3722 fprintf (stderr, ", stack");
3724 fprintf (stderr, " )\n");
3730 /* A C expression that indicates when an argument must be passed by
3731 reference. If nonzero for an argument, a copy of that argument is
3732 made in memory and a pointer to the argument is passed instead of
3733 the argument itself. The pointer is passed in whatever way is
3734 appropriate for passing a pointer to that type. */
3737 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3738 enum machine_mode mode ATTRIBUTE_UNUSED,
3739 tree type, bool named ATTRIBUTE_UNUSED)
3744 if (type && int_size_in_bytes (type) == -1)
3746 if (TARGET_DEBUG_ARG)
3747 fprintf (stderr, "function_arg_pass_by_reference\n");
3754 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3755 ABI. Only called if TARGET_SSE. */
3757 contains_128bit_aligned_vector_p (tree type)
3759 enum machine_mode mode = TYPE_MODE (type);
3760 if (SSE_REG_MODE_P (mode)
3761 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3763 if (TYPE_ALIGN (type) < 128)
3766 if (AGGREGATE_TYPE_P (type))
3768 /* Walk the aggregates recursively. */
3769 switch (TREE_CODE (type))
3773 case QUAL_UNION_TYPE:
3777 /* Walk all the structure fields. */
3778 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3780 if (TREE_CODE (field) == FIELD_DECL
3781 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3788 /* Just for use if some languages passes arrays by value. */
3789 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3800 /* Gives the alignment boundary, in bits, of an argument with the
3801 specified mode and type. */
3804 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3808 align = TYPE_ALIGN (type);
3810 align = GET_MODE_ALIGNMENT (mode);
3811 if (align < PARM_BOUNDARY)
3812 align = PARM_BOUNDARY;
3815 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3816 make an exception for SSE modes since these require 128bit
3819 The handling here differs from field_alignment. ICC aligns MMX
3820 arguments to 4 byte boundaries, while structure fields are aligned
3821 to 8 byte boundaries. */
3823 align = PARM_BOUNDARY;
3826 if (!SSE_REG_MODE_P (mode))
3827 align = PARM_BOUNDARY;
3831 if (!contains_128bit_aligned_vector_p (type))
3832 align = PARM_BOUNDARY;
3840 /* Return true if N is a possible register number of function value. */
3842 ix86_function_value_regno_p (int regno)
3845 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3846 || (regno == FIRST_SSE_REG && TARGET_SSE))
3850 && (regno == FIRST_MMX_REG && TARGET_MMX))
3856 /* Define how to find the value returned by a function.
3857 VALTYPE is the data type of the value (as a tree).
3858 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3859 otherwise, FUNC is 0. */
3861 ix86_function_value (tree valtype, tree fntype_or_decl,
3862 bool outgoing ATTRIBUTE_UNUSED)
3864 enum machine_mode natmode = type_natural_mode (valtype);
3868 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3869 1, REGPARM_MAX, SSE_REGPARM_MAX,
3870 x86_64_int_return_registers, 0);
3871 /* For zero sized structures, construct_container return NULL, but we
3872 need to keep rest of compiler happy by returning meaningful value. */
3874 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3879 tree fn = NULL_TREE, fntype;
3881 && DECL_P (fntype_or_decl))
3882 fn = fntype_or_decl;
3883 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3884 return gen_rtx_REG (TYPE_MODE (valtype),
3885 ix86_value_regno (natmode, fn, fntype));
3889 /* Return true iff type is returned in memory. */
3891 ix86_return_in_memory (tree type)
3893 int needed_intregs, needed_sseregs, size;
3894 enum machine_mode mode = type_natural_mode (type);
3897 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3899 if (mode == BLKmode)
3902 size = int_size_in_bytes (type);
3904 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3907 if (VECTOR_MODE_P (mode) || mode == TImode)
3909 /* User-created vectors small enough to fit in EAX. */
3913 /* MMX/3dNow values are returned in MM0,
3914 except when it doesn't exits. */
3916 return (TARGET_MMX ? 0 : 1);
3918 /* SSE values are returned in XMM0, except when it doesn't exist. */
3920 return (TARGET_SSE ? 0 : 1);
3934 /* When returning SSE vector types, we have a choice of either
3935 (1) being abi incompatible with a -march switch, or
3936 (2) generating an error.
3937 Given no good solution, I think the safest thing is one warning.
3938 The user won't be able to use -Werror, but....
3940 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3941 called in response to actually generating a caller or callee that
3942 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3943 via aggregate_value_p for general type probing from tree-ssa. */
3946 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3948 static bool warnedsse, warnedmmx;
3952 /* Look at the return type of the function, not the function type. */
3953 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3955 if (!TARGET_SSE && !warnedsse)
3958 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3961 warning (0, "SSE vector return without SSE enabled "
3966 if (!TARGET_MMX && !warnedmmx)
3968 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3971 warning (0, "MMX vector return without MMX enabled "
3980 /* Define how to find the value returned by a library function
3981 assuming the value has mode MODE. */
3983 ix86_libcall_value (enum machine_mode mode)
3997 return gen_rtx_REG (mode, FIRST_SSE_REG);
4000 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4004 return gen_rtx_REG (mode, 0);
4008 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4011 /* Given a mode, return the register to use for a return value. */
4014 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4016 gcc_assert (!TARGET_64BIT);
4018 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4019 we normally prevent this case when mmx is not available. However
4020 some ABIs may require the result to be returned like DImode. */
4021 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4022 return TARGET_MMX ? FIRST_MMX_REG : 0;
4024 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4025 we prevent this case when sse is not available. However some ABIs
4026 may require the result to be returned like integer TImode. */
4027 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4028 return TARGET_SSE ? FIRST_SSE_REG : 0;
4030 /* Decimal floating point values can go in %eax, unlike other float modes. */
4031 if (DECIMAL_FLOAT_MODE_P (mode))
4034 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4035 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4038 /* Floating point return values in %st(0), except for local functions when
4039 SSE math is enabled or for functions with sseregparm attribute. */
4040 if ((func || fntype)
4041 && (mode == SFmode || mode == DFmode))
4043 int sse_level = ix86_function_sseregparm (fntype, func);
4044 if ((sse_level >= 1 && mode == SFmode)
4045 || (sse_level == 2 && mode == DFmode))
4046 return FIRST_SSE_REG;
4049 return FIRST_FLOAT_REG;
4052 /* Create the va_list data type. */
4055 ix86_build_builtin_va_list (void)
4057 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4059 /* For i386 we use plain pointer to argument area. */
4061 return build_pointer_type (char_type_node);
4063 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4064 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4066 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4067 unsigned_type_node);
4068 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4069 unsigned_type_node);
4070 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4072 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4075 va_list_gpr_counter_field = f_gpr;
4076 va_list_fpr_counter_field = f_fpr;
4078 DECL_FIELD_CONTEXT (f_gpr) = record;
4079 DECL_FIELD_CONTEXT (f_fpr) = record;
4080 DECL_FIELD_CONTEXT (f_ovf) = record;
4081 DECL_FIELD_CONTEXT (f_sav) = record;
4083 TREE_CHAIN (record) = type_decl;
4084 TYPE_NAME (record) = type_decl;
4085 TYPE_FIELDS (record) = f_gpr;
4086 TREE_CHAIN (f_gpr) = f_fpr;
4087 TREE_CHAIN (f_fpr) = f_ovf;
4088 TREE_CHAIN (f_ovf) = f_sav;
4090 layout_type (record);
4092 /* The correct type is an array type of one element. */
4093 return build_array_type (record, build_index_type (size_zero_node));
4096 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4099 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4100 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4103 CUMULATIVE_ARGS next_cum;
4104 rtx save_area = NULL_RTX, mem;
4117 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4120 /* Indicate to allocate space on the stack for varargs save area. */
4121 ix86_save_varrargs_registers = 1;
4123 cfun->stack_alignment_needed = 128;
4125 fntype = TREE_TYPE (current_function_decl);
4126 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4127 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4128 != void_type_node));
4130 /* For varargs, we do not want to skip the dummy va_dcl argument.
4131 For stdargs, we do want to skip the last named argument. */
4134 function_arg_advance (&next_cum, mode, type, 1);
4137 save_area = frame_pointer_rtx;
4139 set = get_varargs_alias_set ();
4141 for (i = next_cum.regno;
4143 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4146 mem = gen_rtx_MEM (Pmode,
4147 plus_constant (save_area, i * UNITS_PER_WORD));
4148 MEM_NOTRAP_P (mem) = 1;
4149 set_mem_alias_set (mem, set);
4150 emit_move_insn (mem, gen_rtx_REG (Pmode,
4151 x86_64_int_parameter_registers[i]));
4154 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4156 /* Now emit code to save SSE registers. The AX parameter contains number
4157 of SSE parameter registers used to call this function. We use
4158 sse_prologue_save insn template that produces computed jump across
4159 SSE saves. We need some preparation work to get this working. */
4161 label = gen_label_rtx ();
4162 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4164 /* Compute address to jump to :
4165 label - 5*eax + nnamed_sse_arguments*5 */
4166 tmp_reg = gen_reg_rtx (Pmode);
4167 nsse_reg = gen_reg_rtx (Pmode);
4168 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4169 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4170 gen_rtx_MULT (Pmode, nsse_reg,
4172 if (next_cum.sse_regno)
4175 gen_rtx_CONST (DImode,
4176 gen_rtx_PLUS (DImode,
4178 GEN_INT (next_cum.sse_regno * 4))));
4180 emit_move_insn (nsse_reg, label_ref);
4181 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4183 /* Compute address of memory block we save into. We always use pointer
4184 pointing 127 bytes after first byte to store - this is needed to keep
4185 instruction size limited by 4 bytes. */
4186 tmp_reg = gen_reg_rtx (Pmode);
4187 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4188 plus_constant (save_area,
4189 8 * REGPARM_MAX + 127)));
4190 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4191 MEM_NOTRAP_P (mem) = 1;
4192 set_mem_alias_set (mem, set);
4193 set_mem_align (mem, BITS_PER_WORD);
4195 /* And finally do the dirty job! */
4196 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4197 GEN_INT (next_cum.sse_regno), label));
4202 /* Implement va_start. */
4205 ix86_va_start (tree valist, rtx nextarg)
4207 HOST_WIDE_INT words, n_gpr, n_fpr;
4208 tree f_gpr, f_fpr, f_ovf, f_sav;
4209 tree gpr, fpr, ovf, sav, t;
4212 /* Only 64bit target needs something special. */
4215 std_expand_builtin_va_start (valist, nextarg);
4219 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4220 f_fpr = TREE_CHAIN (f_gpr);
4221 f_ovf = TREE_CHAIN (f_fpr);
4222 f_sav = TREE_CHAIN (f_ovf);
4224 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4225 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4226 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4227 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4228 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4230 /* Count number of gp and fp argument registers used. */
4231 words = current_function_args_info.words;
4232 n_gpr = current_function_args_info.regno;
4233 n_fpr = current_function_args_info.sse_regno;
4235 if (TARGET_DEBUG_ARG)
4236 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4237 (int) words, (int) n_gpr, (int) n_fpr);
4239 if (cfun->va_list_gpr_size)
4241 type = TREE_TYPE (gpr);
4242 t = build2 (MODIFY_EXPR, type, gpr,
4243 build_int_cst (type, n_gpr * 8));
4244 TREE_SIDE_EFFECTS (t) = 1;
4245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4248 if (cfun->va_list_fpr_size)
4250 type = TREE_TYPE (fpr);
4251 t = build2 (MODIFY_EXPR, type, fpr,
4252 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4253 TREE_SIDE_EFFECTS (t) = 1;
4254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4257 /* Find the overflow area. */
4258 type = TREE_TYPE (ovf);
4259 t = make_tree (type, virtual_incoming_args_rtx);
4261 t = build2 (PLUS_EXPR, type, t,
4262 build_int_cst (type, words * UNITS_PER_WORD));
4263 t = build2 (MODIFY_EXPR, type, ovf, t);
4264 TREE_SIDE_EFFECTS (t) = 1;
4265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4267 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4269 /* Find the register save area.
4270 Prologue of the function save it right above stack frame. */
4271 type = TREE_TYPE (sav);
4272 t = make_tree (type, frame_pointer_rtx);
4273 t = build2 (MODIFY_EXPR, type, sav, t);
4274 TREE_SIDE_EFFECTS (t) = 1;
4275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4279 /* Implement va_arg. */
4282 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4284 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4285 tree f_gpr, f_fpr, f_ovf, f_sav;
4286 tree gpr, fpr, ovf, sav, t;
4288 tree lab_false, lab_over = NULL_TREE;
4293 enum machine_mode nat_mode;
4295 /* Only 64bit target needs something special. */
4297 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4299 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4300 f_fpr = TREE_CHAIN (f_gpr);
4301 f_ovf = TREE_CHAIN (f_fpr);
4302 f_sav = TREE_CHAIN (f_ovf);
4304 valist = build_va_arg_indirect_ref (valist);
4305 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4306 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4307 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4308 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4310 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4312 type = build_pointer_type (type);
4313 size = int_size_in_bytes (type);
4314 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4316 nat_mode = type_natural_mode (type);
4317 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4318 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4320 /* Pull the value out of the saved registers. */
4322 addr = create_tmp_var (ptr_type_node, "addr");
4323 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4327 int needed_intregs, needed_sseregs;
4329 tree int_addr, sse_addr;
4331 lab_false = create_artificial_label ();
4332 lab_over = create_artificial_label ();
4334 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4336 need_temp = (!REG_P (container)
4337 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4338 || TYPE_ALIGN (type) > 128));
4340 /* In case we are passing structure, verify that it is consecutive block
4341 on the register save area. If not we need to do moves. */
4342 if (!need_temp && !REG_P (container))
4344 /* Verify that all registers are strictly consecutive */
4345 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4349 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4351 rtx slot = XVECEXP (container, 0, i);
4352 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4353 || INTVAL (XEXP (slot, 1)) != i * 16)
4361 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4363 rtx slot = XVECEXP (container, 0, i);
4364 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4365 || INTVAL (XEXP (slot, 1)) != i * 8)
4377 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4378 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4379 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4380 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4383 /* First ensure that we fit completely in registers. */
4386 t = build_int_cst (TREE_TYPE (gpr),
4387 (REGPARM_MAX - needed_intregs + 1) * 8);
4388 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4389 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4390 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4391 gimplify_and_add (t, pre_p);
4395 t = build_int_cst (TREE_TYPE (fpr),
4396 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4398 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4399 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4400 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4401 gimplify_and_add (t, pre_p);
4404 /* Compute index to start of area used for integer regs. */
4407 /* int_addr = gpr + sav; */
4408 t = fold_convert (ptr_type_node, gpr);
4409 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4410 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4411 gimplify_and_add (t, pre_p);
4415 /* sse_addr = fpr + sav; */
4416 t = fold_convert (ptr_type_node, fpr);
4417 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4418 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4419 gimplify_and_add (t, pre_p);
4424 tree temp = create_tmp_var (type, "va_arg_tmp");
4427 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4428 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4429 gimplify_and_add (t, pre_p);
4431 for (i = 0; i < XVECLEN (container, 0); i++)
4433 rtx slot = XVECEXP (container, 0, i);
4434 rtx reg = XEXP (slot, 0);
4435 enum machine_mode mode = GET_MODE (reg);
4436 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4437 tree addr_type = build_pointer_type (piece_type);
4440 tree dest_addr, dest;
4442 if (SSE_REGNO_P (REGNO (reg)))
4444 src_addr = sse_addr;
4445 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4449 src_addr = int_addr;
4450 src_offset = REGNO (reg) * 8;
4452 src_addr = fold_convert (addr_type, src_addr);
4453 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4454 size_int (src_offset)));
4455 src = build_va_arg_indirect_ref (src_addr);
4457 dest_addr = fold_convert (addr_type, addr);
4458 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4459 size_int (INTVAL (XEXP (slot, 1)))));
4460 dest = build_va_arg_indirect_ref (dest_addr);
4462 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4463 gimplify_and_add (t, pre_p);
4469 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4470 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4471 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4472 gimplify_and_add (t, pre_p);
4476 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4477 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4478 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4479 gimplify_and_add (t, pre_p);
4482 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4483 gimplify_and_add (t, pre_p);
4485 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4486 append_to_statement_list (t, pre_p);
4489 /* ... otherwise out of the overflow area. */
4491 /* Care for on-stack alignment if needed. */
4492 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4493 || integer_zerop (TYPE_SIZE (type)))
4497 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4498 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4499 build_int_cst (TREE_TYPE (ovf), align - 1));
4500 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4501 build_int_cst (TREE_TYPE (t), -align));
4503 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4505 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4506 gimplify_and_add (t2, pre_p);
4508 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4509 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4510 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4511 gimplify_and_add (t, pre_p);
4515 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4516 append_to_statement_list (t, pre_p);
4519 ptrtype = build_pointer_type (type);
4520 addr = fold_convert (ptrtype, addr);
4523 addr = build_va_arg_indirect_ref (addr);
4524 return build_va_arg_indirect_ref (addr);
4527 /* Return nonzero if OPNUM's MEM should be matched
4528 in movabs* patterns. */
4531 ix86_check_movabs (rtx insn, int opnum)
4535 set = PATTERN (insn);
4536 if (GET_CODE (set) == PARALLEL)
4537 set = XVECEXP (set, 0, 0);
4538 gcc_assert (GET_CODE (set) == SET);
4539 mem = XEXP (set, opnum);
4540 while (GET_CODE (mem) == SUBREG)
4541 mem = SUBREG_REG (mem);
4542 gcc_assert (GET_CODE (mem) == MEM);
4543 return (volatile_ok || !MEM_VOLATILE_P (mem));
4546 /* Initialize the table of extra 80387 mathematical constants. */
4549 init_ext_80387_constants (void)
4551 static const char * cst[5] =
4553 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4554 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4555 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4556 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4557 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4561 for (i = 0; i < 5; i++)
4563 real_from_string (&ext_80387_constants_table[i], cst[i]);
4564 /* Ensure each constant is rounded to XFmode precision. */
4565 real_convert (&ext_80387_constants_table[i],
4566 XFmode, &ext_80387_constants_table[i]);
4569 ext_80387_constants_init = 1;
4572 /* Return true if the constant is something that can be loaded with
4573 a special instruction. */
4576 standard_80387_constant_p (rtx x)
4578 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4581 if (x == CONST0_RTX (GET_MODE (x)))
4583 if (x == CONST1_RTX (GET_MODE (x)))
4586 /* For XFmode constants, try to find a special 80387 instruction when
4587 optimizing for size or on those CPUs that benefit from them. */
4588 if (GET_MODE (x) == XFmode
4589 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4594 if (! ext_80387_constants_init)
4595 init_ext_80387_constants ();
4597 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4598 for (i = 0; i < 5; i++)
4599 if (real_identical (&r, &ext_80387_constants_table[i]))
4606 /* Return the opcode of the special instruction to be used to load
4610 standard_80387_constant_opcode (rtx x)
4612 switch (standard_80387_constant_p (x))
4633 /* Return the CONST_DOUBLE representing the 80387 constant that is
4634 loaded by the specified special instruction. The argument IDX
4635 matches the return value from standard_80387_constant_p. */
4638 standard_80387_constant_rtx (int idx)
4642 if (! ext_80387_constants_init)
4643 init_ext_80387_constants ();
4659 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4663 /* Return 1 if mode is a valid mode for sse. */
4665 standard_sse_mode_p (enum machine_mode mode)
4682 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4685 standard_sse_constant_p (rtx x)
4687 enum machine_mode mode = GET_MODE (x);
4689 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4691 if (vector_all_ones_operand (x, mode)
4692 && standard_sse_mode_p (mode))
4693 return TARGET_SSE2 ? 2 : -1;
4698 /* Return the opcode of the special instruction to be used to load
4702 standard_sse_constant_opcode (rtx insn, rtx x)
4704 switch (standard_sse_constant_p (x))
4707 if (get_attr_mode (insn) == MODE_V4SF)
4708 return "xorps\t%0, %0";
4709 else if (get_attr_mode (insn) == MODE_V2DF)
4710 return "xorpd\t%0, %0";
4712 return "pxor\t%0, %0";
4714 return "pcmpeqd\t%0, %0";
4719 /* Returns 1 if OP contains a symbol reference */
4722 symbolic_reference_mentioned_p (rtx op)
4727 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4730 fmt = GET_RTX_FORMAT (GET_CODE (op));
4731 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4737 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4738 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4742 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4749 /* Return 1 if it is appropriate to emit `ret' instructions in the
4750 body of a function. Do this only if the epilogue is simple, needing a
4751 couple of insns. Prior to reloading, we can't tell how many registers
4752 must be saved, so return 0 then. Return 0 if there is no frame
4753 marker to de-allocate. */
4756 ix86_can_use_return_insn_p (void)
4758 struct ix86_frame frame;
4760 if (! reload_completed || frame_pointer_needed)
4763 /* Don't allow more than 32 pop, since that's all we can do
4764 with one instruction. */
4765 if (current_function_pops_args
4766 && current_function_args_size >= 32768)
4769 ix86_compute_frame_layout (&frame);
4770 return frame.to_allocate == 0 && frame.nregs == 0;
4773 /* Value should be nonzero if functions must have frame pointers.
4774 Zero means the frame pointer need not be set up (and parms may
4775 be accessed via the stack pointer) in functions that seem suitable. */
4778 ix86_frame_pointer_required (void)
4780 /* If we accessed previous frames, then the generated code expects
4781 to be able to access the saved ebp value in our frame. */
4782 if (cfun->machine->accesses_prev_frame)
4785 /* Several x86 os'es need a frame pointer for other reasons,
4786 usually pertaining to setjmp. */
4787 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4790 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4791 the frame pointer by default. Turn it back on now if we've not
4792 got a leaf function. */
4793 if (TARGET_OMIT_LEAF_FRAME_POINTER
4794 && (!current_function_is_leaf
4795 || ix86_current_function_calls_tls_descriptor))
4798 if (current_function_profile)
4804 /* Record that the current function accesses previous call frames. */
4807 ix86_setup_frame_addresses (void)
4809 cfun->machine->accesses_prev_frame = 1;
4812 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4813 # define USE_HIDDEN_LINKONCE 1
4815 # define USE_HIDDEN_LINKONCE 0
4818 static int pic_labels_used;
4820 /* Fills in the label name that should be used for a pc thunk for
4821 the given register. */
4824 get_pc_thunk_name (char name[32], unsigned int regno)
4826 gcc_assert (!TARGET_64BIT);
4828 if (USE_HIDDEN_LINKONCE)
4829 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4831 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4835 /* This function generates code for -fpic that loads %ebx with
4836 the return address of the caller and then returns. */
4839 ix86_file_end (void)
4844 for (regno = 0; regno < 8; ++regno)
4848 if (! ((pic_labels_used >> regno) & 1))
4851 get_pc_thunk_name (name, regno);
4856 switch_to_section (darwin_sections[text_coal_section]);
4857 fputs ("\t.weak_definition\t", asm_out_file);
4858 assemble_name (asm_out_file, name);
4859 fputs ("\n\t.private_extern\t", asm_out_file);
4860 assemble_name (asm_out_file, name);
4861 fputs ("\n", asm_out_file);
4862 ASM_OUTPUT_LABEL (asm_out_file, name);
4866 if (USE_HIDDEN_LINKONCE)
4870 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4872 TREE_PUBLIC (decl) = 1;
4873 TREE_STATIC (decl) = 1;
4874 DECL_ONE_ONLY (decl) = 1;
4876 (*targetm.asm_out.unique_section) (decl, 0);
4877 switch_to_section (get_named_section (decl, NULL, 0));
4879 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4880 fputs ("\t.hidden\t", asm_out_file);
4881 assemble_name (asm_out_file, name);
4882 fputc ('\n', asm_out_file);
4883 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4887 switch_to_section (text_section);
4888 ASM_OUTPUT_LABEL (asm_out_file, name);
4891 xops[0] = gen_rtx_REG (SImode, regno);
4892 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4893 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4894 output_asm_insn ("ret", xops);
4897 if (NEED_INDICATE_EXEC_STACK)
4898 file_end_indicate_exec_stack ();
4901 /* Emit code for the SET_GOT patterns. */
4904 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4909 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4911 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4913 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4916 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4918 output_asm_insn ("call\t%a2", xops);
4921 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4922 is what will be referenced by the Mach-O PIC subsystem. */
4924 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4927 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4928 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4931 output_asm_insn ("pop{l}\t%0", xops);
4936 get_pc_thunk_name (name, REGNO (dest));
4937 pic_labels_used |= 1 << REGNO (dest);
4939 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4940 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4941 output_asm_insn ("call\t%X2", xops);
4942 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4943 is what will be referenced by the Mach-O PIC subsystem. */
4946 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4948 targetm.asm_out.internal_label (asm_out_file, "L",
4949 CODE_LABEL_NUMBER (label));
4956 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4957 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4959 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4964 /* Generate an "push" pattern for input ARG. */
4969 return gen_rtx_SET (VOIDmode,
4971 gen_rtx_PRE_DEC (Pmode,
4972 stack_pointer_rtx)),
4976 /* Return >= 0 if there is an unused call-clobbered register available
4977 for the entire function. */
4980 ix86_select_alt_pic_regnum (void)
4982 if (current_function_is_leaf && !current_function_profile
4983 && !ix86_current_function_calls_tls_descriptor)
4986 for (i = 2; i >= 0; --i)
4987 if (!regs_ever_live[i])
4991 return INVALID_REGNUM;
4994 /* Return 1 if we need to save REGNO. */
4996 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4998 if (pic_offset_table_rtx
4999 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5000 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5001 || current_function_profile
5002 || current_function_calls_eh_return
5003 || current_function_uses_const_pool))
5005 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5010 if (current_function_calls_eh_return && maybe_eh_return)
5015 unsigned test = EH_RETURN_DATA_REGNO (i);
5016 if (test == INVALID_REGNUM)
5023 if (cfun->machine->force_align_arg_pointer
5024 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5027 return (regs_ever_live[regno]
5028 && !call_used_regs[regno]
5029 && !fixed_regs[regno]
5030 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5033 /* Return number of registers to be saved on the stack. */
5036 ix86_nsaved_regs (void)
5041 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5042 if (ix86_save_reg (regno, true))
5047 /* Return the offset between two registers, one to be eliminated, and the other
5048 its replacement, at the start of a routine. */
5051 ix86_initial_elimination_offset (int from, int to)
5053 struct ix86_frame frame;
5054 ix86_compute_frame_layout (&frame);
5056 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5057 return frame.hard_frame_pointer_offset;
5058 else if (from == FRAME_POINTER_REGNUM
5059 && to == HARD_FRAME_POINTER_REGNUM)
5060 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5063 gcc_assert (to == STACK_POINTER_REGNUM);
5065 if (from == ARG_POINTER_REGNUM)
5066 return frame.stack_pointer_offset;
5068 gcc_assert (from == FRAME_POINTER_REGNUM);
5069 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5073 /* Fill structure ix86_frame about frame of currently computed function. */
5076 ix86_compute_frame_layout (struct ix86_frame *frame)
5078 HOST_WIDE_INT total_size;
5079 unsigned int stack_alignment_needed;
5080 HOST_WIDE_INT offset;
5081 unsigned int preferred_alignment;
5082 HOST_WIDE_INT size = get_frame_size ();
5084 frame->nregs = ix86_nsaved_regs ();
5087 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5088 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5090 /* During reload iteration the amount of registers saved can change.
5091 Recompute the value as needed. Do not recompute when amount of registers
5092 didn't change as reload does multiple calls to the function and does not
5093 expect the decision to change within single iteration. */
5095 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5097 int count = frame->nregs;
5099 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5100 /* The fast prologue uses move instead of push to save registers. This
5101 is significantly longer, but also executes faster as modern hardware
5102 can execute the moves in parallel, but can't do that for push/pop.
5104 Be careful about choosing what prologue to emit: When function takes
5105 many instructions to execute we may use slow version as well as in
5106 case function is known to be outside hot spot (this is known with
5107 feedback only). Weight the size of function by number of registers
5108 to save as it is cheap to use one or two push instructions but very
5109 slow to use many of them. */
5111 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5112 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5113 || (flag_branch_probabilities
5114 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5115 cfun->machine->use_fast_prologue_epilogue = false;
5117 cfun->machine->use_fast_prologue_epilogue
5118 = !expensive_function_p (count);
5120 if (TARGET_PROLOGUE_USING_MOVE
5121 && cfun->machine->use_fast_prologue_epilogue)
5122 frame->save_regs_using_mov = true;
5124 frame->save_regs_using_mov = false;
5127 /* Skip return address and saved base pointer. */
5128 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5130 frame->hard_frame_pointer_offset = offset;
5132 /* Do some sanity checking of stack_alignment_needed and
5133 preferred_alignment, since i386 port is the only using those features
5134 that may break easily. */
5136 gcc_assert (!size || stack_alignment_needed);
5137 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5138 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5139 gcc_assert (stack_alignment_needed
5140 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5142 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5143 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5145 /* Register save area */
5146 offset += frame->nregs * UNITS_PER_WORD;
5149 if (ix86_save_varrargs_registers)
5151 offset += X86_64_VARARGS_SIZE;
5152 frame->va_arg_size = X86_64_VARARGS_SIZE;
5155 frame->va_arg_size = 0;
5157 /* Align start of frame for local function. */
5158 frame->padding1 = ((offset + stack_alignment_needed - 1)
5159 & -stack_alignment_needed) - offset;
5161 offset += frame->padding1;
5163 /* Frame pointer points here. */
5164 frame->frame_pointer_offset = offset;
5168 /* Add outgoing arguments area. Can be skipped if we eliminated
5169 all the function calls as dead code.
5170 Skipping is however impossible when function calls alloca. Alloca
5171 expander assumes that last current_function_outgoing_args_size
5172 of stack frame are unused. */
5173 if (ACCUMULATE_OUTGOING_ARGS
5174 && (!current_function_is_leaf || current_function_calls_alloca
5175 || ix86_current_function_calls_tls_descriptor))
5177 offset += current_function_outgoing_args_size;
5178 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5181 frame->outgoing_arguments_size = 0;
5183 /* Align stack boundary. Only needed if we're calling another function
5185 if (!current_function_is_leaf || current_function_calls_alloca
5186 || ix86_current_function_calls_tls_descriptor)
5187 frame->padding2 = ((offset + preferred_alignment - 1)
5188 & -preferred_alignment) - offset;
5190 frame->padding2 = 0;
5192 offset += frame->padding2;
5194 /* We've reached end of stack frame. */
5195 frame->stack_pointer_offset = offset;
5197 /* Size prologue needs to allocate. */
5198 frame->to_allocate =
5199 (size + frame->padding1 + frame->padding2
5200 + frame->outgoing_arguments_size + frame->va_arg_size);
5202 if ((!frame->to_allocate && frame->nregs <= 1)
5203 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5204 frame->save_regs_using_mov = false;
5206 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5207 && current_function_is_leaf
5208 && !ix86_current_function_calls_tls_descriptor)
5210 frame->red_zone_size = frame->to_allocate;
5211 if (frame->save_regs_using_mov)
5212 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5213 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5214 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5217 frame->red_zone_size = 0;
5218 frame->to_allocate -= frame->red_zone_size;
5219 frame->stack_pointer_offset -= frame->red_zone_size;
5221 fprintf (stderr, "nregs: %i\n", frame->nregs);
5222 fprintf (stderr, "size: %i\n", size);
5223 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5224 fprintf (stderr, "padding1: %i\n", frame->padding1);
5225 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5226 fprintf (stderr, "padding2: %i\n", frame->padding2);
5227 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5228 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5229 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5230 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5231 frame->hard_frame_pointer_offset);
5232 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5236 /* Emit code to save registers in the prologue. */
5239 ix86_emit_save_regs (void)
5244 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5245 if (ix86_save_reg (regno, true))
5247 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5248 RTX_FRAME_RELATED_P (insn) = 1;
5252 /* Emit code to save registers using MOV insns. First register
5253 is restored from POINTER + OFFSET. */
5255 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5260 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5261 if (ix86_save_reg (regno, true))
5263 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5265 gen_rtx_REG (Pmode, regno));
5266 RTX_FRAME_RELATED_P (insn) = 1;
5267 offset += UNITS_PER_WORD;
5271 /* Expand prologue or epilogue stack adjustment.
5272 The pattern exist to put a dependency on all ebp-based memory accesses.
5273 STYLE should be negative if instructions should be marked as frame related,
5274 zero if %r11 register is live and cannot be freely used and positive
5278 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5283 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5284 else if (x86_64_immediate_operand (offset, DImode))
5285 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5289 /* r11 is used by indirect sibcall return as well, set before the
5290 epilogue and used after the epilogue. ATM indirect sibcall
5291 shouldn't be used together with huge frame sizes in one
5292 function because of the frame_size check in sibcall.c. */
5294 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5295 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5297 RTX_FRAME_RELATED_P (insn) = 1;
5298 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5302 RTX_FRAME_RELATED_P (insn) = 1;
5305 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5308 ix86_internal_arg_pointer (void)
5310 bool has_force_align_arg_pointer =
5311 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5312 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5313 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5314 && DECL_NAME (current_function_decl)
5315 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5316 && DECL_FILE_SCOPE_P (current_function_decl))
5317 || ix86_force_align_arg_pointer
5318 || has_force_align_arg_pointer)
5320 /* Nested functions can't realign the stack due to a register
5322 if (DECL_CONTEXT (current_function_decl)
5323 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5325 if (ix86_force_align_arg_pointer)
5326 warning (0, "-mstackrealign ignored for nested functions");
5327 if (has_force_align_arg_pointer)
5328 error ("%s not supported for nested functions",
5329 ix86_force_align_arg_pointer_string);
5330 return virtual_incoming_args_rtx;
5332 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5333 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5336 return virtual_incoming_args_rtx;
5339 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5340 This is called from dwarf2out.c to emit call frame instructions
5341 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5343 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5345 rtx unspec = SET_SRC (pattern);
5346 gcc_assert (GET_CODE (unspec) == UNSPEC);
5350 case UNSPEC_REG_SAVE:
5351 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5352 SET_DEST (pattern));
5354 case UNSPEC_DEF_CFA:
5355 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5356 INTVAL (XVECEXP (unspec, 0, 0)));
5363 /* Expand the prologue into a bunch of separate insns. */
5366 ix86_expand_prologue (void)
5370 struct ix86_frame frame;
5371 HOST_WIDE_INT allocate;
5373 ix86_compute_frame_layout (&frame);
5375 if (cfun->machine->force_align_arg_pointer)
5379 /* Grab the argument pointer. */
5380 x = plus_constant (stack_pointer_rtx, 4);
5381 y = cfun->machine->force_align_arg_pointer;
5382 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5383 RTX_FRAME_RELATED_P (insn) = 1;
5385 /* The unwind info consists of two parts: install the fafp as the cfa,
5386 and record the fafp as the "save register" of the stack pointer.
5387 The later is there in order that the unwinder can see where it
5388 should restore the stack pointer across the and insn. */
5389 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5390 x = gen_rtx_SET (VOIDmode, y, x);
5391 RTX_FRAME_RELATED_P (x) = 1;
5392 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5394 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5395 RTX_FRAME_RELATED_P (y) = 1;
5396 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5397 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5398 REG_NOTES (insn) = x;
5400 /* Align the stack. */
5401 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5404 /* And here we cheat like madmen with the unwind info. We force the
5405 cfa register back to sp+4, which is exactly what it was at the
5406 start of the function. Re-pushing the return address results in
5407 the return at the same spot relative to the cfa, and thus is
5408 correct wrt the unwind info. */
5409 x = cfun->machine->force_align_arg_pointer;
5410 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5411 insn = emit_insn (gen_push (x));
5412 RTX_FRAME_RELATED_P (insn) = 1;
5415 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5416 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5417 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5418 REG_NOTES (insn) = x;
5421 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5422 slower on all targets. Also sdb doesn't like it. */
5424 if (frame_pointer_needed)
5426 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5427 RTX_FRAME_RELATED_P (insn) = 1;
5429 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5430 RTX_FRAME_RELATED_P (insn) = 1;
5433 allocate = frame.to_allocate;
5435 if (!frame.save_regs_using_mov)
5436 ix86_emit_save_regs ();
5438 allocate += frame.nregs * UNITS_PER_WORD;
5440 /* When using red zone we may start register saving before allocating
5441 the stack frame saving one cycle of the prologue. */
5442 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5443 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5444 : stack_pointer_rtx,
5445 -frame.nregs * UNITS_PER_WORD);
5449 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5450 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5451 GEN_INT (-allocate), -1);
5454 /* Only valid for Win32. */
5455 rtx eax = gen_rtx_REG (SImode, 0);
5456 bool eax_live = ix86_eax_live_at_start_p ();
5459 gcc_assert (!TARGET_64BIT);
5463 emit_insn (gen_push (eax));
5467 emit_move_insn (eax, GEN_INT (allocate));
5469 insn = emit_insn (gen_allocate_stack_worker (eax));
5470 RTX_FRAME_RELATED_P (insn) = 1;
5471 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5472 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5473 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5474 t, REG_NOTES (insn));
5478 if (frame_pointer_needed)
5479 t = plus_constant (hard_frame_pointer_rtx,
5482 - frame.nregs * UNITS_PER_WORD);
5484 t = plus_constant (stack_pointer_rtx, allocate);
5485 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5489 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5491 if (!frame_pointer_needed || !frame.to_allocate)
5492 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5494 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5495 -frame.nregs * UNITS_PER_WORD);
5498 pic_reg_used = false;
5499 if (pic_offset_table_rtx
5500 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5501 || current_function_profile))
5503 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5505 if (alt_pic_reg_used != INVALID_REGNUM)
5506 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5508 pic_reg_used = true;
5514 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5516 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5518 /* Even with accurate pre-reload life analysis, we can wind up
5519 deleting all references to the pic register after reload.
5520 Consider if cross-jumping unifies two sides of a branch
5521 controlled by a comparison vs the only read from a global.
5522 In which case, allow the set_got to be deleted, though we're
5523 too late to do anything about the ebx save in the prologue. */
5524 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5527 /* Prevent function calls from be scheduled before the call to mcount.
5528 In the pic_reg_used case, make sure that the got load isn't deleted. */
5529 if (current_function_profile)
5530 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5533 /* Emit code to restore saved registers using MOV insns. First register
5534 is restored from POINTER + OFFSET. */
5536 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5537 int maybe_eh_return)
5540 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5542 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5543 if (ix86_save_reg (regno, maybe_eh_return))
5545 /* Ensure that adjust_address won't be forced to produce pointer
5546 out of range allowed by x86-64 instruction set. */
5547 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5551 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5552 emit_move_insn (r11, GEN_INT (offset));
5553 emit_insn (gen_adddi3 (r11, r11, pointer));
5554 base_address = gen_rtx_MEM (Pmode, r11);
5557 emit_move_insn (gen_rtx_REG (Pmode, regno),
5558 adjust_address (base_address, Pmode, offset));
5559 offset += UNITS_PER_WORD;
5563 /* Restore function stack, frame, and registers. */
5566 ix86_expand_epilogue (int style)
5569 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5570 struct ix86_frame frame;
5571 HOST_WIDE_INT offset;
5573 ix86_compute_frame_layout (&frame);
5575 /* Calculate start of saved registers relative to ebp. Special care
5576 must be taken for the normal return case of a function using
5577 eh_return: the eax and edx registers are marked as saved, but not
5578 restored along this path. */
5579 offset = frame.nregs;
5580 if (current_function_calls_eh_return && style != 2)
5582 offset *= -UNITS_PER_WORD;
5584 /* If we're only restoring one register and sp is not valid then
5585 using a move instruction to restore the register since it's
5586 less work than reloading sp and popping the register.
5588 The default code result in stack adjustment using add/lea instruction,
5589 while this code results in LEAVE instruction (or discrete equivalent),
5590 so it is profitable in some other cases as well. Especially when there
5591 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5592 and there is exactly one register to pop. This heuristic may need some
5593 tuning in future. */
5594 if ((!sp_valid && frame.nregs <= 1)
5595 || (TARGET_EPILOGUE_USING_MOVE
5596 && cfun->machine->use_fast_prologue_epilogue
5597 && (frame.nregs > 1 || frame.to_allocate))
5598 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5599 || (frame_pointer_needed && TARGET_USE_LEAVE
5600 && cfun->machine->use_fast_prologue_epilogue
5601 && frame.nregs == 1)
5602 || current_function_calls_eh_return)
5604 /* Restore registers. We can use ebp or esp to address the memory
5605 locations. If both are available, default to ebp, since offsets
5606 are known to be small. Only exception is esp pointing directly to the
5607 end of block of saved registers, where we may simplify addressing
5610 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5611 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5612 frame.to_allocate, style == 2);
5614 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5615 offset, style == 2);
5617 /* eh_return epilogues need %ecx added to the stack pointer. */
5620 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5622 if (frame_pointer_needed)
5624 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5625 tmp = plus_constant (tmp, UNITS_PER_WORD);
5626 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5628 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5629 emit_move_insn (hard_frame_pointer_rtx, tmp);
5631 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5636 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5637 tmp = plus_constant (tmp, (frame.to_allocate
5638 + frame.nregs * UNITS_PER_WORD));
5639 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5642 else if (!frame_pointer_needed)
5643 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5644 GEN_INT (frame.to_allocate
5645 + frame.nregs * UNITS_PER_WORD),
5647 /* If not an i386, mov & pop is faster than "leave". */
5648 else if (TARGET_USE_LEAVE || optimize_size
5649 || !cfun->machine->use_fast_prologue_epilogue)
5650 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5653 pro_epilogue_adjust_stack (stack_pointer_rtx,
5654 hard_frame_pointer_rtx,
5657 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5659 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5664 /* First step is to deallocate the stack frame so that we can
5665 pop the registers. */
5668 gcc_assert (frame_pointer_needed);
5669 pro_epilogue_adjust_stack (stack_pointer_rtx,
5670 hard_frame_pointer_rtx,
5671 GEN_INT (offset), style);
5673 else if (frame.to_allocate)
5674 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5675 GEN_INT (frame.to_allocate), style);
5677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5678 if (ix86_save_reg (regno, false))
5681 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5683 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5685 if (frame_pointer_needed)
5687 /* Leave results in shorter dependency chains on CPUs that are
5688 able to grok it fast. */
5689 if (TARGET_USE_LEAVE)
5690 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5691 else if (TARGET_64BIT)
5692 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5694 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5698 if (cfun->machine->force_align_arg_pointer)
5700 emit_insn (gen_addsi3 (stack_pointer_rtx,
5701 cfun->machine->force_align_arg_pointer,
5705 /* Sibcall epilogues don't want a return instruction. */
5709 if (current_function_pops_args && current_function_args_size)
5711 rtx popc = GEN_INT (current_function_pops_args);
5713 /* i386 can only pop 64K bytes. If asked to pop more, pop
5714 return address, do explicit add, and jump indirectly to the
5717 if (current_function_pops_args >= 65536)
5719 rtx ecx = gen_rtx_REG (SImode, 2);
5721 /* There is no "pascal" calling convention in 64bit ABI. */
5722 gcc_assert (!TARGET_64BIT);
5724 emit_insn (gen_popsi1 (ecx));
5725 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5726 emit_jump_insn (gen_return_indirect_internal (ecx));
5729 emit_jump_insn (gen_return_pop_internal (popc));
5732 emit_jump_insn (gen_return_internal ());
5735 /* Reset from the function's potential modifications. */
5738 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5739 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5741 if (pic_offset_table_rtx)
5742 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5744 /* Mach-O doesn't support labels at the end of objects, so if
5745 it looks like we might want one, insert a NOP. */
5747 rtx insn = get_last_insn ();
5750 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
5751 insn = PREV_INSN (insn);
5755 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
5756 fputs ("\tnop\n", file);
5762 /* Extract the parts of an RTL expression that is a valid memory address
5763 for an instruction. Return 0 if the structure of the address is
5764 grossly off. Return -1 if the address contains ASHIFT, so it is not
5765 strictly valid, but still used for computing length of lea instruction. */
5768 ix86_decompose_address (rtx addr, struct ix86_address *out)
5770 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5771 rtx base_reg, index_reg;
5772 HOST_WIDE_INT scale = 1;
5773 rtx scale_rtx = NULL_RTX;
5775 enum ix86_address_seg seg = SEG_DEFAULT;
5777 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5779 else if (GET_CODE (addr) == PLUS)
5789 addends[n++] = XEXP (op, 1);
5792 while (GET_CODE (op) == PLUS);
5797 for (i = n; i >= 0; --i)
5800 switch (GET_CODE (op))
5805 index = XEXP (op, 0);
5806 scale_rtx = XEXP (op, 1);
5810 if (XINT (op, 1) == UNSPEC_TP
5811 && TARGET_TLS_DIRECT_SEG_REFS
5812 && seg == SEG_DEFAULT)
5813 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5842 else if (GET_CODE (addr) == MULT)
5844 index = XEXP (addr, 0); /* index*scale */
5845 scale_rtx = XEXP (addr, 1);
5847 else if (GET_CODE (addr) == ASHIFT)
5851 /* We're called for lea too, which implements ashift on occasion. */
5852 index = XEXP (addr, 0);
5853 tmp = XEXP (addr, 1);
5854 if (GET_CODE (tmp) != CONST_INT)
5856 scale = INTVAL (tmp);
5857 if ((unsigned HOST_WIDE_INT) scale > 3)
5863 disp = addr; /* displacement */
5865 /* Extract the integral value of scale. */
5868 if (GET_CODE (scale_rtx) != CONST_INT)
5870 scale = INTVAL (scale_rtx);
5873 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5874 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5876 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5877 if (base_reg && index_reg && scale == 1
5878 && (index_reg == arg_pointer_rtx
5879 || index_reg == frame_pointer_rtx
5880 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5883 tmp = base, base = index, index = tmp;
5884 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5887 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5888 if ((base_reg == hard_frame_pointer_rtx
5889 || base_reg == frame_pointer_rtx
5890 || base_reg == arg_pointer_rtx) && !disp)
5893 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5894 Avoid this by transforming to [%esi+0]. */
5895 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5896 && base_reg && !index_reg && !disp
5898 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5901 /* Special case: encode reg+reg instead of reg*2. */
5902 if (!base && index && scale && scale == 2)
5903 base = index, base_reg = index_reg, scale = 1;
5905 /* Special case: scaling cannot be encoded without base or displacement. */
5906 if (!base && !disp && index && scale != 1)
5918 /* Return cost of the memory address x.
5919 For i386, it is better to use a complex address than let gcc copy
5920 the address into a reg and make a new pseudo. But not if the address
5921 requires to two regs - that would mean more pseudos with longer
5924 ix86_address_cost (rtx x)
5926 struct ix86_address parts;
5928 int ok = ix86_decompose_address (x, &parts);
5932 if (parts.base && GET_CODE (parts.base) == SUBREG)
5933 parts.base = SUBREG_REG (parts.base);
5934 if (parts.index && GET_CODE (parts.index) == SUBREG)
5935 parts.index = SUBREG_REG (parts.index);
5937 /* More complex memory references are better. */
5938 if (parts.disp && parts.disp != const0_rtx)
5940 if (parts.seg != SEG_DEFAULT)
5943 /* Attempt to minimize number of registers in the address. */
5945 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5947 && (!REG_P (parts.index)
5948 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5952 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5954 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5955 && parts.base != parts.index)
5958 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5959 since it's predecode logic can't detect the length of instructions
5960 and it degenerates to vector decoded. Increase cost of such
5961 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5962 to split such addresses or even refuse such addresses at all.
5964 Following addressing modes are affected:
5969 The first and last case may be avoidable by explicitly coding the zero in
5970 memory address, but I don't have AMD-K6 machine handy to check this
5974 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5975 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5976 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5982 /* If X is a machine specific address (i.e. a symbol or label being
5983 referenced as a displacement from the GOT implemented using an
5984 UNSPEC), then return the base term. Otherwise return X. */
5987 ix86_find_base_term (rtx x)
5993 if (GET_CODE (x) != CONST)
5996 if (GET_CODE (term) == PLUS
5997 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5998 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5999 term = XEXP (term, 0);
6000 if (GET_CODE (term) != UNSPEC
6001 || XINT (term, 1) != UNSPEC_GOTPCREL)
6004 term = XVECEXP (term, 0, 0);
6006 if (GET_CODE (term) != SYMBOL_REF
6007 && GET_CODE (term) != LABEL_REF)
6013 term = ix86_delegitimize_address (x);
6015 if (GET_CODE (term) != SYMBOL_REF
6016 && GET_CODE (term) != LABEL_REF)
6022 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6023 this is used for to form addresses to local data when -fPIC is in
6027 darwin_local_data_pic (rtx disp)
6029 if (GET_CODE (disp) == MINUS)
6031 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6032 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6033 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6035 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6036 if (! strcmp (sym_name, "<pic base>"))
6044 /* Determine if a given RTX is a valid constant. We already know this
6045 satisfies CONSTANT_P. */
6048 legitimate_constant_p (rtx x)
6050 switch (GET_CODE (x))
6055 if (GET_CODE (x) == PLUS)
6057 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6062 if (TARGET_MACHO && darwin_local_data_pic (x))
6065 /* Only some unspecs are valid as "constants". */
6066 if (GET_CODE (x) == UNSPEC)
6067 switch (XINT (x, 1))
6070 return TARGET_64BIT;
6073 x = XVECEXP (x, 0, 0);
6074 return (GET_CODE (x) == SYMBOL_REF
6075 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6077 x = XVECEXP (x, 0, 0);
6078 return (GET_CODE (x) == SYMBOL_REF
6079 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6084 /* We must have drilled down to a symbol. */
6085 if (GET_CODE (x) == LABEL_REF)
6087 if (GET_CODE (x) != SYMBOL_REF)
6092 /* TLS symbols are never valid. */
6093 if (SYMBOL_REF_TLS_MODEL (x))
6098 if (GET_MODE (x) == TImode
6099 && x != CONST0_RTX (TImode)
6105 if (x == CONST0_RTX (GET_MODE (x)))
6113 /* Otherwise we handle everything else in the move patterns. */
6117 /* Determine if it's legal to put X into the constant pool. This
6118 is not possible for the address of thread-local symbols, which
6119 is checked above. */
6122 ix86_cannot_force_const_mem (rtx x)
6124 /* We can always put integral constants and vectors in memory. */
6125 switch (GET_CODE (x))
6135 return !legitimate_constant_p (x);
6138 /* Determine if a given RTX is a valid constant address. */
6141 constant_address_p (rtx x)
6143 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6146 /* Nonzero if the constant value X is a legitimate general operand
6147 when generating PIC code. It is given that flag_pic is on and
6148 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6151 legitimate_pic_operand_p (rtx x)
6155 switch (GET_CODE (x))
6158 inner = XEXP (x, 0);
6159 if (GET_CODE (inner) == PLUS
6160 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6161 inner = XEXP (inner, 0);
6163 /* Only some unspecs are valid as "constants". */
6164 if (GET_CODE (inner) == UNSPEC)
6165 switch (XINT (inner, 1))
6168 return TARGET_64BIT;
6170 x = XVECEXP (inner, 0, 0);
6171 return (GET_CODE (x) == SYMBOL_REF
6172 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6180 return legitimate_pic_address_disp_p (x);
6187 /* Determine if a given CONST RTX is a valid memory displacement
6191 legitimate_pic_address_disp_p (rtx disp)
6195 /* In 64bit mode we can allow direct addresses of symbols and labels
6196 when they are not dynamic symbols. */
6199 rtx op0 = disp, op1;
6201 switch (GET_CODE (disp))
6207 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6209 op0 = XEXP (XEXP (disp, 0), 0);
6210 op1 = XEXP (XEXP (disp, 0), 1);
6211 if (GET_CODE (op1) != CONST_INT
6212 || INTVAL (op1) >= 16*1024*1024
6213 || INTVAL (op1) < -16*1024*1024)
6215 if (GET_CODE (op0) == LABEL_REF)
6217 if (GET_CODE (op0) != SYMBOL_REF)
6222 /* TLS references should always be enclosed in UNSPEC. */
6223 if (SYMBOL_REF_TLS_MODEL (op0))
6225 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6233 if (GET_CODE (disp) != CONST)
6235 disp = XEXP (disp, 0);
6239 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6240 of GOT tables. We should not need these anyway. */
6241 if (GET_CODE (disp) != UNSPEC
6242 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6243 && XINT (disp, 1) != UNSPEC_GOTOFF))
6246 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6247 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6253 if (GET_CODE (disp) == PLUS)
6255 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6257 disp = XEXP (disp, 0);
6261 if (TARGET_MACHO && darwin_local_data_pic (disp))
6264 if (GET_CODE (disp) != UNSPEC)
6267 switch (XINT (disp, 1))
6272 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6274 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6275 While ABI specify also 32bit relocation but we don't produce it in
6276 small PIC model at all. */
6277 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6278 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6280 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6282 case UNSPEC_GOTTPOFF:
6283 case UNSPEC_GOTNTPOFF:
6284 case UNSPEC_INDNTPOFF:
6287 disp = XVECEXP (disp, 0, 0);
6288 return (GET_CODE (disp) == SYMBOL_REF
6289 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6291 disp = XVECEXP (disp, 0, 0);
6292 return (GET_CODE (disp) == SYMBOL_REF
6293 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6295 disp = XVECEXP (disp, 0, 0);
6296 return (GET_CODE (disp) == SYMBOL_REF
6297 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6303 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6304 memory address for an instruction. The MODE argument is the machine mode
6305 for the MEM expression that wants to use this address.
6307 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6308 convert common non-canonical forms to canonical form so that they will
6312 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6314 struct ix86_address parts;
6315 rtx base, index, disp;
6316 HOST_WIDE_INT scale;
6317 const char *reason = NULL;
6318 rtx reason_rtx = NULL_RTX;
6320 if (TARGET_DEBUG_ADDR)
6323 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6324 GET_MODE_NAME (mode), strict);
6328 if (ix86_decompose_address (addr, &parts) <= 0)
6330 reason = "decomposition failed";
6335 index = parts.index;
6337 scale = parts.scale;
6339 /* Validate base register.
6341 Don't allow SUBREG's that span more than a word here. It can lead to spill
6342 failures when the base is one word out of a two word structure, which is
6343 represented internally as a DImode int. */
6352 else if (GET_CODE (base) == SUBREG
6353 && REG_P (SUBREG_REG (base))
6354 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6356 reg = SUBREG_REG (base);
6359 reason = "base is not a register";
6363 if (GET_MODE (base) != Pmode)
6365 reason = "base is not in Pmode";
6369 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6370 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6372 reason = "base is not valid";
6377 /* Validate index register.
6379 Don't allow SUBREG's that span more than a word here -- same as above. */
6388 else if (GET_CODE (index) == SUBREG
6389 && REG_P (SUBREG_REG (index))
6390 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6392 reg = SUBREG_REG (index);
6395 reason = "index is not a register";
6399 if (GET_MODE (index) != Pmode)
6401 reason = "index is not in Pmode";
6405 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6406 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6408 reason = "index is not valid";
6413 /* Validate scale factor. */
6416 reason_rtx = GEN_INT (scale);
6419 reason = "scale without index";
6423 if (scale != 2 && scale != 4 && scale != 8)
6425 reason = "scale is not a valid multiplier";
6430 /* Validate displacement. */
6435 if (GET_CODE (disp) == CONST
6436 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6437 switch (XINT (XEXP (disp, 0), 1))
6439 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6440 used. While ABI specify also 32bit relocations, we don't produce
6441 them at all and use IP relative instead. */
6444 gcc_assert (flag_pic);
6446 goto is_legitimate_pic;
6447 reason = "64bit address unspec";
6450 case UNSPEC_GOTPCREL:
6451 gcc_assert (flag_pic);
6452 goto is_legitimate_pic;
6454 case UNSPEC_GOTTPOFF:
6455 case UNSPEC_GOTNTPOFF:
6456 case UNSPEC_INDNTPOFF:
6462 reason = "invalid address unspec";
6466 else if (SYMBOLIC_CONST (disp)
6470 && MACHOPIC_INDIRECT
6471 && !machopic_operand_p (disp)
6477 if (TARGET_64BIT && (index || base))
6479 /* foo@dtpoff(%rX) is ok. */
6480 if (GET_CODE (disp) != CONST
6481 || GET_CODE (XEXP (disp, 0)) != PLUS
6482 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6483 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6484 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6485 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6487 reason = "non-constant pic memory reference";
6491 else if (! legitimate_pic_address_disp_p (disp))
6493 reason = "displacement is an invalid pic construct";
6497 /* This code used to verify that a symbolic pic displacement
6498 includes the pic_offset_table_rtx register.
6500 While this is good idea, unfortunately these constructs may
6501 be created by "adds using lea" optimization for incorrect
6510 This code is nonsensical, but results in addressing
6511 GOT table with pic_offset_table_rtx base. We can't
6512 just refuse it easily, since it gets matched by
6513 "addsi3" pattern, that later gets split to lea in the
6514 case output register differs from input. While this
6515 can be handled by separate addsi pattern for this case
6516 that never results in lea, this seems to be easier and
6517 correct fix for crash to disable this test. */
6519 else if (GET_CODE (disp) != LABEL_REF
6520 && GET_CODE (disp) != CONST_INT
6521 && (GET_CODE (disp) != CONST
6522 || !legitimate_constant_p (disp))
6523 && (GET_CODE (disp) != SYMBOL_REF
6524 || !legitimate_constant_p (disp)))
6526 reason = "displacement is not constant";
6529 else if (TARGET_64BIT
6530 && !x86_64_immediate_operand (disp, VOIDmode))
6532 reason = "displacement is out of range";
6537 /* Everything looks valid. */
6538 if (TARGET_DEBUG_ADDR)
6539 fprintf (stderr, "Success.\n");
6543 if (TARGET_DEBUG_ADDR)
6545 fprintf (stderr, "Error: %s\n", reason);
6546 debug_rtx (reason_rtx);
6551 /* Return a unique alias set for the GOT. */
6553 static HOST_WIDE_INT
6554 ix86_GOT_alias_set (void)
6556 static HOST_WIDE_INT set = -1;
6558 set = new_alias_set ();
6562 /* Return a legitimate reference for ORIG (an address) using the
6563 register REG. If REG is 0, a new pseudo is generated.
6565 There are two types of references that must be handled:
6567 1. Global data references must load the address from the GOT, via
6568 the PIC reg. An insn is emitted to do this load, and the reg is
6571 2. Static data references, constant pool addresses, and code labels
6572 compute the address as an offset from the GOT, whose base is in
6573 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6574 differentiate them from global data objects. The returned
6575 address is the PIC reg + an unspec constant.
6577 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6578 reg also appears in the address. */
6581 legitimize_pic_address (rtx orig, rtx reg)
6588 if (TARGET_MACHO && !TARGET_64BIT)
6591 reg = gen_reg_rtx (Pmode);
6592 /* Use the generic Mach-O PIC machinery. */
6593 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6597 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6599 else if (TARGET_64BIT
6600 && ix86_cmodel != CM_SMALL_PIC
6601 && local_symbolic_operand (addr, Pmode))
6604 /* This symbol may be referenced via a displacement from the PIC
6605 base address (@GOTOFF). */
6607 if (reload_in_progress)
6608 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6609 if (GET_CODE (addr) == CONST)
6610 addr = XEXP (addr, 0);
6611 if (GET_CODE (addr) == PLUS)
6613 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6614 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6617 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6618 new = gen_rtx_CONST (Pmode, new);
6620 tmpreg = gen_reg_rtx (Pmode);
6623 emit_move_insn (tmpreg, new);
6627 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6628 tmpreg, 1, OPTAB_DIRECT);
6631 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6633 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6635 /* This symbol may be referenced via a displacement from the PIC
6636 base address (@GOTOFF). */
6638 if (reload_in_progress)
6639 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6640 if (GET_CODE (addr) == CONST)
6641 addr = XEXP (addr, 0);
6642 if (GET_CODE (addr) == PLUS)
6644 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6645 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6648 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6649 new = gen_rtx_CONST (Pmode, new);
6650 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6654 emit_move_insn (reg, new);
6658 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6662 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6663 new = gen_rtx_CONST (Pmode, new);
6664 new = gen_const_mem (Pmode, new);
6665 set_mem_alias_set (new, ix86_GOT_alias_set ());
6668 reg = gen_reg_rtx (Pmode);
6669 /* Use directly gen_movsi, otherwise the address is loaded
6670 into register for CSE. We don't want to CSE this addresses,
6671 instead we CSE addresses from the GOT table, so skip this. */
6672 emit_insn (gen_movsi (reg, new));
6677 /* This symbol must be referenced via a load from the
6678 Global Offset Table (@GOT). */
6680 if (reload_in_progress)
6681 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6682 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6683 new = gen_rtx_CONST (Pmode, new);
6684 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6685 new = gen_const_mem (Pmode, new);
6686 set_mem_alias_set (new, ix86_GOT_alias_set ());
6689 reg = gen_reg_rtx (Pmode);
6690 emit_move_insn (reg, new);
6696 if (GET_CODE (addr) == CONST_INT
6697 && !x86_64_immediate_operand (addr, VOIDmode))
6701 emit_move_insn (reg, addr);
6705 new = force_reg (Pmode, addr);
6707 else if (GET_CODE (addr) == CONST)
6709 addr = XEXP (addr, 0);
6711 /* We must match stuff we generate before. Assume the only
6712 unspecs that can get here are ours. Not that we could do
6713 anything with them anyway.... */
6714 if (GET_CODE (addr) == UNSPEC
6715 || (GET_CODE (addr) == PLUS
6716 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6718 gcc_assert (GET_CODE (addr) == PLUS);
6720 if (GET_CODE (addr) == PLUS)
6722 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6724 /* Check first to see if this is a constant offset from a @GOTOFF
6725 symbol reference. */
6726 if (local_symbolic_operand (op0, Pmode)
6727 && GET_CODE (op1) == CONST_INT)
6731 if (reload_in_progress)
6732 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6733 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6735 new = gen_rtx_PLUS (Pmode, new, op1);
6736 new = gen_rtx_CONST (Pmode, new);
6737 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6741 emit_move_insn (reg, new);
6747 if (INTVAL (op1) < -16*1024*1024
6748 || INTVAL (op1) >= 16*1024*1024)
6750 if (!x86_64_immediate_operand (op1, Pmode))
6751 op1 = force_reg (Pmode, op1);
6752 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6758 base = legitimize_pic_address (XEXP (addr, 0), reg);
6759 new = legitimize_pic_address (XEXP (addr, 1),
6760 base == reg ? NULL_RTX : reg);
6762 if (GET_CODE (new) == CONST_INT)
6763 new = plus_constant (base, INTVAL (new));
6766 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6768 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6769 new = XEXP (new, 1);
6771 new = gen_rtx_PLUS (Pmode, base, new);
6779 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6782 get_thread_pointer (int to_reg)
6786 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6790 reg = gen_reg_rtx (Pmode);
6791 insn = gen_rtx_SET (VOIDmode, reg, tp);
6792 insn = emit_insn (insn);
6797 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6798 false if we expect this to be used for a memory address and true if
6799 we expect to load the address into a register. */
6802 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6804 rtx dest, base, off, pic, tp;
6809 case TLS_MODEL_GLOBAL_DYNAMIC:
6810 dest = gen_reg_rtx (Pmode);
6811 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6813 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6815 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6818 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6819 insns = get_insns ();
6822 emit_libcall_block (insns, dest, rax, x);
6824 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6825 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6827 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6829 if (TARGET_GNU2_TLS)
6831 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6833 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6837 case TLS_MODEL_LOCAL_DYNAMIC:
6838 base = gen_reg_rtx (Pmode);
6839 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6841 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6843 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6846 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6847 insns = get_insns ();
6850 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6851 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6852 emit_libcall_block (insns, base, rax, note);
6854 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6855 emit_insn (gen_tls_local_dynamic_base_64 (base));
6857 emit_insn (gen_tls_local_dynamic_base_32 (base));
6859 if (TARGET_GNU2_TLS)
6861 rtx x = ix86_tls_module_base ();
6863 set_unique_reg_note (get_last_insn (), REG_EQUIV,
6864 gen_rtx_MINUS (Pmode, x, tp));
6867 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6868 off = gen_rtx_CONST (Pmode, off);
6870 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6872 if (TARGET_GNU2_TLS)
6874 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
6876 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6881 case TLS_MODEL_INITIAL_EXEC:
6885 type = UNSPEC_GOTNTPOFF;
6889 if (reload_in_progress)
6890 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6891 pic = pic_offset_table_rtx;
6892 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6894 else if (!TARGET_ANY_GNU_TLS)
6896 pic = gen_reg_rtx (Pmode);
6897 emit_insn (gen_set_got (pic));
6898 type = UNSPEC_GOTTPOFF;
6903 type = UNSPEC_INDNTPOFF;
6906 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6907 off = gen_rtx_CONST (Pmode, off);
6909 off = gen_rtx_PLUS (Pmode, pic, off);
6910 off = gen_const_mem (Pmode, off);
6911 set_mem_alias_set (off, ix86_GOT_alias_set ());
6913 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6915 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6916 off = force_reg (Pmode, off);
6917 return gen_rtx_PLUS (Pmode, base, off);
6921 base = get_thread_pointer (true);
6922 dest = gen_reg_rtx (Pmode);
6923 emit_insn (gen_subsi3 (dest, base, off));
6927 case TLS_MODEL_LOCAL_EXEC:
6928 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6929 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6930 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6931 off = gen_rtx_CONST (Pmode, off);
6933 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6935 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6936 return gen_rtx_PLUS (Pmode, base, off);
6940 base = get_thread_pointer (true);
6941 dest = gen_reg_rtx (Pmode);
6942 emit_insn (gen_subsi3 (dest, base, off));
6953 /* Try machine-dependent ways of modifying an illegitimate address
6954 to be legitimate. If we find one, return the new, valid address.
6955 This macro is used in only one place: `memory_address' in explow.c.
6957 OLDX is the address as it was before break_out_memory_refs was called.
6958 In some cases it is useful to look at this to decide what needs to be done.
6960 MODE and WIN are passed so that this macro can use
6961 GO_IF_LEGITIMATE_ADDRESS.
6963 It is always safe for this macro to do nothing. It exists to recognize
6964 opportunities to optimize the output.
6966 For the 80386, we handle X+REG by loading X into a register R and
6967 using R+REG. R will go in a general reg and indexing will be used.
6968 However, if REG is a broken-out memory address or multiplication,
6969 nothing needs to be done because REG can certainly go in a general reg.
6971 When -fpic is used, special handling is needed for symbolic references.
6972 See comments by legitimize_pic_address in i386.c for details. */
6975 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6980 if (TARGET_DEBUG_ADDR)
6982 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6983 GET_MODE_NAME (mode));
6987 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6989 return legitimize_tls_address (x, log, false);
6990 if (GET_CODE (x) == CONST
6991 && GET_CODE (XEXP (x, 0)) == PLUS
6992 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6993 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6995 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6996 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6999 if (flag_pic && SYMBOLIC_CONST (x))
7000 return legitimize_pic_address (x, 0);
7002 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7003 if (GET_CODE (x) == ASHIFT
7004 && GET_CODE (XEXP (x, 1)) == CONST_INT
7005 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7008 log = INTVAL (XEXP (x, 1));
7009 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7010 GEN_INT (1 << log));
7013 if (GET_CODE (x) == PLUS)
7015 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7017 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7018 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7019 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7022 log = INTVAL (XEXP (XEXP (x, 0), 1));
7023 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7024 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7025 GEN_INT (1 << log));
7028 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7029 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7030 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7033 log = INTVAL (XEXP (XEXP (x, 1), 1));
7034 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7035 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7036 GEN_INT (1 << log));
7039 /* Put multiply first if it isn't already. */
7040 if (GET_CODE (XEXP (x, 1)) == MULT)
7042 rtx tmp = XEXP (x, 0);
7043 XEXP (x, 0) = XEXP (x, 1);
7048 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7049 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7050 created by virtual register instantiation, register elimination, and
7051 similar optimizations. */
7052 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7055 x = gen_rtx_PLUS (Pmode,
7056 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7057 XEXP (XEXP (x, 1), 0)),
7058 XEXP (XEXP (x, 1), 1));
7062 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7063 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7064 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7065 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7066 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7067 && CONSTANT_P (XEXP (x, 1)))
7070 rtx other = NULL_RTX;
7072 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7074 constant = XEXP (x, 1);
7075 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7077 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7079 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7080 other = XEXP (x, 1);
7088 x = gen_rtx_PLUS (Pmode,
7089 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7090 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7091 plus_constant (other, INTVAL (constant)));
7095 if (changed && legitimate_address_p (mode, x, FALSE))
7098 if (GET_CODE (XEXP (x, 0)) == MULT)
7101 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7104 if (GET_CODE (XEXP (x, 1)) == MULT)
7107 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7111 && GET_CODE (XEXP (x, 1)) == REG
7112 && GET_CODE (XEXP (x, 0)) == REG)
7115 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7118 x = legitimize_pic_address (x, 0);
7121 if (changed && legitimate_address_p (mode, x, FALSE))
7124 if (GET_CODE (XEXP (x, 0)) == REG)
7126 rtx temp = gen_reg_rtx (Pmode);
7127 rtx val = force_operand (XEXP (x, 1), temp);
7129 emit_move_insn (temp, val);
7135 else if (GET_CODE (XEXP (x, 1)) == REG)
7137 rtx temp = gen_reg_rtx (Pmode);
7138 rtx val = force_operand (XEXP (x, 0), temp);
7140 emit_move_insn (temp, val);
7150 /* Print an integer constant expression in assembler syntax. Addition
7151 and subtraction are the only arithmetic that may appear in these
7152 expressions. FILE is the stdio stream to write to, X is the rtx, and
7153 CODE is the operand print code from the output string. */
7156 output_pic_addr_const (FILE *file, rtx x, int code)
7160 switch (GET_CODE (x))
7163 gcc_assert (flag_pic);
7168 output_addr_const (file, x);
7169 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7170 fputs ("@PLT", file);
7177 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7178 assemble_name (asm_out_file, buf);
7182 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7186 /* This used to output parentheses around the expression,
7187 but that does not work on the 386 (either ATT or BSD assembler). */
7188 output_pic_addr_const (file, XEXP (x, 0), code);
7192 if (GET_MODE (x) == VOIDmode)
7194 /* We can use %d if the number is <32 bits and positive. */
7195 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7196 fprintf (file, "0x%lx%08lx",
7197 (unsigned long) CONST_DOUBLE_HIGH (x),
7198 (unsigned long) CONST_DOUBLE_LOW (x));
7200 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7203 /* We can't handle floating point constants;
7204 PRINT_OPERAND must handle them. */
7205 output_operand_lossage ("floating constant misused");
7209 /* Some assemblers need integer constants to appear first. */
7210 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7212 output_pic_addr_const (file, XEXP (x, 0), code);
7214 output_pic_addr_const (file, XEXP (x, 1), code);
7218 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7219 output_pic_addr_const (file, XEXP (x, 1), code);
7221 output_pic_addr_const (file, XEXP (x, 0), code);
7227 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7228 output_pic_addr_const (file, XEXP (x, 0), code);
7230 output_pic_addr_const (file, XEXP (x, 1), code);
7232 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7236 gcc_assert (XVECLEN (x, 0) == 1);
7237 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7238 switch (XINT (x, 1))
7241 fputs ("@GOT", file);
7244 fputs ("@GOTOFF", file);
7246 case UNSPEC_GOTPCREL:
7247 fputs ("@GOTPCREL(%rip)", file);
7249 case UNSPEC_GOTTPOFF:
7250 /* FIXME: This might be @TPOFF in Sun ld too. */
7251 fputs ("@GOTTPOFF", file);
7254 fputs ("@TPOFF", file);
7258 fputs ("@TPOFF", file);
7260 fputs ("@NTPOFF", file);
7263 fputs ("@DTPOFF", file);
7265 case UNSPEC_GOTNTPOFF:
7267 fputs ("@GOTTPOFF(%rip)", file);
7269 fputs ("@GOTNTPOFF", file);
7271 case UNSPEC_INDNTPOFF:
7272 fputs ("@INDNTPOFF", file);
7275 output_operand_lossage ("invalid UNSPEC as operand");
7281 output_operand_lossage ("invalid expression as operand");
7285 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7286 We need to emit DTP-relative relocations. */
7289 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7291 fputs (ASM_LONG, file);
7292 output_addr_const (file, x);
7293 fputs ("@DTPOFF", file);
7299 fputs (", 0", file);
7306 /* In the name of slightly smaller debug output, and to cater to
7307 general assembler lossage, recognize PIC+GOTOFF and turn it back
7308 into a direct symbol reference.
7310 On Darwin, this is necessary to avoid a crash, because Darwin
7311 has a different PIC label for each routine but the DWARF debugging
7312 information is not associated with any particular routine, so it's
7313 necessary to remove references to the PIC label from RTL stored by
7314 the DWARF output code. */
7317 ix86_delegitimize_address (rtx orig_x)
7320 /* reg_addend is NULL or a multiple of some register. */
7321 rtx reg_addend = NULL_RTX;
7322 /* const_addend is NULL or a const_int. */
7323 rtx const_addend = NULL_RTX;
7324 /* This is the result, or NULL. */
7325 rtx result = NULL_RTX;
7327 if (GET_CODE (x) == MEM)
7332 if (GET_CODE (x) != CONST
7333 || GET_CODE (XEXP (x, 0)) != UNSPEC
7334 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7335 || GET_CODE (orig_x) != MEM)
7337 return XVECEXP (XEXP (x, 0), 0, 0);
7340 if (GET_CODE (x) != PLUS
7341 || GET_CODE (XEXP (x, 1)) != CONST)
7344 if (GET_CODE (XEXP (x, 0)) == REG
7345 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7346 /* %ebx + GOT/GOTOFF */
7348 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7350 /* %ebx + %reg * scale + GOT/GOTOFF */
7351 reg_addend = XEXP (x, 0);
7352 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7353 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7354 reg_addend = XEXP (reg_addend, 1);
7355 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7356 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7357 reg_addend = XEXP (reg_addend, 0);
7360 if (GET_CODE (reg_addend) != REG
7361 && GET_CODE (reg_addend) != MULT
7362 && GET_CODE (reg_addend) != ASHIFT)
7368 x = XEXP (XEXP (x, 1), 0);
7369 if (GET_CODE (x) == PLUS
7370 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7372 const_addend = XEXP (x, 1);
7376 if (GET_CODE (x) == UNSPEC
7377 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7378 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7379 result = XVECEXP (x, 0, 0);
7381 if (TARGET_MACHO && darwin_local_data_pic (x)
7382 && GET_CODE (orig_x) != MEM)
7383 result = XEXP (x, 0);
7389 result = gen_rtx_PLUS (Pmode, result, const_addend);
7391 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7396 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7401 if (mode == CCFPmode || mode == CCFPUmode)
7403 enum rtx_code second_code, bypass_code;
7404 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7405 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7406 code = ix86_fp_compare_code_to_integer (code);
7410 code = reverse_condition (code);
7421 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7425 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7426 Those same assemblers have the same but opposite lossage on cmov. */
7427 gcc_assert (mode == CCmode);
7428 suffix = fp ? "nbe" : "a";
7448 gcc_assert (mode == CCmode);
7470 gcc_assert (mode == CCmode);
7471 suffix = fp ? "nb" : "ae";
7474 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7478 gcc_assert (mode == CCmode);
7482 suffix = fp ? "u" : "p";
7485 suffix = fp ? "nu" : "np";
7490 fputs (suffix, file);
7493 /* Print the name of register X to FILE based on its machine mode and number.
7494 If CODE is 'w', pretend the mode is HImode.
7495 If CODE is 'b', pretend the mode is QImode.
7496 If CODE is 'k', pretend the mode is SImode.
7497 If CODE is 'q', pretend the mode is DImode.
7498 If CODE is 'h', pretend the reg is the 'high' byte register.
7499 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7502 print_reg (rtx x, int code, FILE *file)
7504 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7505 && REGNO (x) != FRAME_POINTER_REGNUM
7506 && REGNO (x) != FLAGS_REG
7507 && REGNO (x) != FPSR_REG
7508 && REGNO (x) != FPCR_REG);
7510 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7513 if (code == 'w' || MMX_REG_P (x))
7515 else if (code == 'b')
7517 else if (code == 'k')
7519 else if (code == 'q')
7521 else if (code == 'y')
7523 else if (code == 'h')
7526 code = GET_MODE_SIZE (GET_MODE (x));
7528 /* Irritatingly, AMD extended registers use different naming convention
7529 from the normal registers. */
7530 if (REX_INT_REG_P (x))
7532 gcc_assert (TARGET_64BIT);
7536 error ("extended registers have no high halves");
7539 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7542 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7545 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7548 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7551 error ("unsupported operand size for extended register");
7559 if (STACK_TOP_P (x))
7561 fputs ("st(0)", file);
7568 if (! ANY_FP_REG_P (x))
7569 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7574 fputs (hi_reg_name[REGNO (x)], file);
7577 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7579 fputs (qi_reg_name[REGNO (x)], file);
7582 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7584 fputs (qi_high_reg_name[REGNO (x)], file);
7591 /* Locate some local-dynamic symbol still in use by this function
7592 so that we can print its name in some tls_local_dynamic_base
7596 get_some_local_dynamic_name (void)
7600 if (cfun->machine->some_ld_name)
7601 return cfun->machine->some_ld_name;
7603 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7605 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7606 return cfun->machine->some_ld_name;
7612 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7616 if (GET_CODE (x) == SYMBOL_REF
7617 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7619 cfun->machine->some_ld_name = XSTR (x, 0);
7627 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7628 C -- print opcode suffix for set/cmov insn.
7629 c -- like C, but print reversed condition
7630 F,f -- likewise, but for floating-point.
7631 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7633 R -- print the prefix for register names.
7634 z -- print the opcode suffix for the size of the current operand.
7635 * -- print a star (in certain assembler syntax)
7636 A -- print an absolute memory reference.
7637 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7638 s -- print a shift double count, followed by the assemblers argument
7640 b -- print the QImode name of the register for the indicated operand.
7641 %b0 would print %al if operands[0] is reg 0.
7642 w -- likewise, print the HImode name of the register.
7643 k -- likewise, print the SImode name of the register.
7644 q -- likewise, print the DImode name of the register.
7645 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7646 y -- print "st(0)" instead of "st" as a register.
7647 D -- print condition for SSE cmp instruction.
7648 P -- if PIC, print an @PLT suffix.
7649 X -- don't print any sort of PIC '@' suffix for a symbol.
7650 & -- print some in-use local-dynamic symbol name.
7651 H -- print a memory address offset by 8; used for sse high-parts
7655 print_operand (FILE *file, rtx x, int code)
7662 if (ASSEMBLER_DIALECT == ASM_ATT)
7667 assemble_name (file, get_some_local_dynamic_name ());
7671 switch (ASSEMBLER_DIALECT)
7678 /* Intel syntax. For absolute addresses, registers should not
7679 be surrounded by braces. */
7680 if (GET_CODE (x) != REG)
7683 PRINT_OPERAND (file, x, 0);
7693 PRINT_OPERAND (file, x, 0);
7698 if (ASSEMBLER_DIALECT == ASM_ATT)
7703 if (ASSEMBLER_DIALECT == ASM_ATT)
7708 if (ASSEMBLER_DIALECT == ASM_ATT)
7713 if (ASSEMBLER_DIALECT == ASM_ATT)
7718 if (ASSEMBLER_DIALECT == ASM_ATT)
7723 if (ASSEMBLER_DIALECT == ASM_ATT)
7728 /* 387 opcodes don't get size suffixes if the operands are
7730 if (STACK_REG_P (x))
7733 /* Likewise if using Intel opcodes. */
7734 if (ASSEMBLER_DIALECT == ASM_INTEL)
7737 /* This is the size of op from size of operand. */
7738 switch (GET_MODE_SIZE (GET_MODE (x)))
7741 #ifdef HAVE_GAS_FILDS_FISTS
7747 if (GET_MODE (x) == SFmode)
7762 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7764 #ifdef GAS_MNEMONICS
7790 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7792 PRINT_OPERAND (file, x, 0);
7798 /* Little bit of braindamage here. The SSE compare instructions
7799 does use completely different names for the comparisons that the
7800 fp conditional moves. */
7801 switch (GET_CODE (x))
7816 fputs ("unord", file);
7820 fputs ("neq", file);
7824 fputs ("nlt", file);
7828 fputs ("nle", file);
7831 fputs ("ord", file);
7838 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7839 if (ASSEMBLER_DIALECT == ASM_ATT)
7841 switch (GET_MODE (x))
7843 case HImode: putc ('w', file); break;
7845 case SFmode: putc ('l', file); break;
7847 case DFmode: putc ('q', file); break;
7848 default: gcc_unreachable ();
7855 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7858 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7859 if (ASSEMBLER_DIALECT == ASM_ATT)
7862 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7865 /* Like above, but reverse condition */
7867 /* Check to see if argument to %c is really a constant
7868 and not a condition code which needs to be reversed. */
7869 if (!COMPARISON_P (x))
7871 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7874 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7877 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7878 if (ASSEMBLER_DIALECT == ASM_ATT)
7881 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7885 /* It doesn't actually matter what mode we use here, as we're
7886 only going to use this for printing. */
7887 x = adjust_address_nv (x, DImode, 8);
7894 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7897 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7900 int pred_val = INTVAL (XEXP (x, 0));
7902 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7903 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7905 int taken = pred_val > REG_BR_PROB_BASE / 2;
7906 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7908 /* Emit hints only in the case default branch prediction
7909 heuristics would fail. */
7910 if (taken != cputaken)
7912 /* We use 3e (DS) prefix for taken branches and
7913 2e (CS) prefix for not taken branches. */
7915 fputs ("ds ; ", file);
7917 fputs ("cs ; ", file);
7924 output_operand_lossage ("invalid operand code '%c'", code);
7928 if (GET_CODE (x) == REG)
7929 print_reg (x, code, file);
7931 else if (GET_CODE (x) == MEM)
7933 /* No `byte ptr' prefix for call instructions. */
7934 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7937 switch (GET_MODE_SIZE (GET_MODE (x)))
7939 case 1: size = "BYTE"; break;
7940 case 2: size = "WORD"; break;
7941 case 4: size = "DWORD"; break;
7942 case 8: size = "QWORD"; break;
7943 case 12: size = "XWORD"; break;
7944 case 16: size = "XMMWORD"; break;
7949 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7952 else if (code == 'w')
7954 else if (code == 'k')
7958 fputs (" PTR ", file);
7962 /* Avoid (%rip) for call operands. */
7963 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7964 && GET_CODE (x) != CONST_INT)
7965 output_addr_const (file, x);
7966 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7967 output_operand_lossage ("invalid constraints for operand");
7972 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7977 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7978 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7980 if (ASSEMBLER_DIALECT == ASM_ATT)
7982 fprintf (file, "0x%08lx", l);
7985 /* These float cases don't actually occur as immediate operands. */
7986 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7990 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7991 fprintf (file, "%s", dstr);
7994 else if (GET_CODE (x) == CONST_DOUBLE
7995 && GET_MODE (x) == XFmode)
7999 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8000 fprintf (file, "%s", dstr);
8005 /* We have patterns that allow zero sets of memory, for instance.
8006 In 64-bit mode, we should probably support all 8-byte vectors,
8007 since we can in fact encode that into an immediate. */
8008 if (GET_CODE (x) == CONST_VECTOR)
8010 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8016 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8018 if (ASSEMBLER_DIALECT == ASM_ATT)
8021 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8022 || GET_CODE (x) == LABEL_REF)
8024 if (ASSEMBLER_DIALECT == ASM_ATT)
8027 fputs ("OFFSET FLAT:", file);
8030 if (GET_CODE (x) == CONST_INT)
8031 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8033 output_pic_addr_const (file, x, code);
8035 output_addr_const (file, x);
8039 /* Print a memory operand whose address is ADDR. */
8042 print_operand_address (FILE *file, rtx addr)
8044 struct ix86_address parts;
8045 rtx base, index, disp;
8047 int ok = ix86_decompose_address (addr, &parts);
8052 index = parts.index;
8054 scale = parts.scale;
8062 if (USER_LABEL_PREFIX[0] == 0)
8064 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8070 if (!base && !index)
8072 /* Displacement only requires special attention. */
8074 if (GET_CODE (disp) == CONST_INT)
8076 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8078 if (USER_LABEL_PREFIX[0] == 0)
8080 fputs ("ds:", file);
8082 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8085 output_pic_addr_const (file, disp, 0);
8087 output_addr_const (file, disp);
8089 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8092 if (GET_CODE (disp) == CONST
8093 && GET_CODE (XEXP (disp, 0)) == PLUS
8094 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8095 disp = XEXP (XEXP (disp, 0), 0);
8096 if (GET_CODE (disp) == LABEL_REF
8097 || (GET_CODE (disp) == SYMBOL_REF
8098 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8099 fputs ("(%rip)", file);
8104 if (ASSEMBLER_DIALECT == ASM_ATT)
8109 output_pic_addr_const (file, disp, 0);
8110 else if (GET_CODE (disp) == LABEL_REF)
8111 output_asm_label (disp);
8113 output_addr_const (file, disp);
8118 print_reg (base, 0, file);
8122 print_reg (index, 0, file);
8124 fprintf (file, ",%d", scale);
8130 rtx offset = NULL_RTX;
8134 /* Pull out the offset of a symbol; print any symbol itself. */
8135 if (GET_CODE (disp) == CONST
8136 && GET_CODE (XEXP (disp, 0)) == PLUS
8137 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8139 offset = XEXP (XEXP (disp, 0), 1);
8140 disp = gen_rtx_CONST (VOIDmode,
8141 XEXP (XEXP (disp, 0), 0));
8145 output_pic_addr_const (file, disp, 0);
8146 else if (GET_CODE (disp) == LABEL_REF)
8147 output_asm_label (disp);
8148 else if (GET_CODE (disp) == CONST_INT)
8151 output_addr_const (file, disp);
8157 print_reg (base, 0, file);
8160 if (INTVAL (offset) >= 0)
8162 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8166 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8173 print_reg (index, 0, file);
8175 fprintf (file, "*%d", scale);
8183 output_addr_const_extra (FILE *file, rtx x)
8187 if (GET_CODE (x) != UNSPEC)
8190 op = XVECEXP (x, 0, 0);
8191 switch (XINT (x, 1))
8193 case UNSPEC_GOTTPOFF:
8194 output_addr_const (file, op);
8195 /* FIXME: This might be @TPOFF in Sun ld. */
8196 fputs ("@GOTTPOFF", file);
8199 output_addr_const (file, op);
8200 fputs ("@TPOFF", file);
8203 output_addr_const (file, op);
8205 fputs ("@TPOFF", file);
8207 fputs ("@NTPOFF", file);
8210 output_addr_const (file, op);
8211 fputs ("@DTPOFF", file);
8213 case UNSPEC_GOTNTPOFF:
8214 output_addr_const (file, op);
8216 fputs ("@GOTTPOFF(%rip)", file);
8218 fputs ("@GOTNTPOFF", file);
8220 case UNSPEC_INDNTPOFF:
8221 output_addr_const (file, op);
8222 fputs ("@INDNTPOFF", file);
8232 /* Split one or more DImode RTL references into pairs of SImode
8233 references. The RTL can be REG, offsettable MEM, integer constant, or
8234 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8235 split and "num" is its length. lo_half and hi_half are output arrays
8236 that parallel "operands". */
8239 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8243 rtx op = operands[num];
8245 /* simplify_subreg refuse to split volatile memory addresses,
8246 but we still have to handle it. */
8247 if (GET_CODE (op) == MEM)
8249 lo_half[num] = adjust_address (op, SImode, 0);
8250 hi_half[num] = adjust_address (op, SImode, 4);
8254 lo_half[num] = simplify_gen_subreg (SImode, op,
8255 GET_MODE (op) == VOIDmode
8256 ? DImode : GET_MODE (op), 0);
8257 hi_half[num] = simplify_gen_subreg (SImode, op,
8258 GET_MODE (op) == VOIDmode
8259 ? DImode : GET_MODE (op), 4);
8263 /* Split one or more TImode RTL references into pairs of DImode
8264 references. The RTL can be REG, offsettable MEM, integer constant, or
8265 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8266 split and "num" is its length. lo_half and hi_half are output arrays
8267 that parallel "operands". */
8270 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8274 rtx op = operands[num];
8276 /* simplify_subreg refuse to split volatile memory addresses, but we
8277 still have to handle it. */
8278 if (GET_CODE (op) == MEM)
8280 lo_half[num] = adjust_address (op, DImode, 0);
8281 hi_half[num] = adjust_address (op, DImode, 8);
8285 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8286 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8291 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8292 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8293 is the expression of the binary operation. The output may either be
8294 emitted here, or returned to the caller, like all output_* functions.
8296 There is no guarantee that the operands are the same mode, as they
8297 might be within FLOAT or FLOAT_EXTEND expressions. */
8299 #ifndef SYSV386_COMPAT
8300 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8301 wants to fix the assemblers because that causes incompatibility
8302 with gcc. No-one wants to fix gcc because that causes
8303 incompatibility with assemblers... You can use the option of
8304 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8305 #define SYSV386_COMPAT 1
8309 output_387_binary_op (rtx insn, rtx *operands)
8311 static char buf[30];
8314 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8316 #ifdef ENABLE_CHECKING
8317 /* Even if we do not want to check the inputs, this documents input
8318 constraints. Which helps in understanding the following code. */
8319 if (STACK_REG_P (operands[0])
8320 && ((REG_P (operands[1])
8321 && REGNO (operands[0]) == REGNO (operands[1])
8322 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8323 || (REG_P (operands[2])
8324 && REGNO (operands[0]) == REGNO (operands[2])
8325 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8326 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8329 gcc_assert (is_sse);
8332 switch (GET_CODE (operands[3]))
8335 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8336 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8344 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8345 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8353 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8354 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8362 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8363 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8377 if (GET_MODE (operands[0]) == SFmode)
8378 strcat (buf, "ss\t{%2, %0|%0, %2}");
8380 strcat (buf, "sd\t{%2, %0|%0, %2}");
8385 switch (GET_CODE (operands[3]))
8389 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8391 rtx temp = operands[2];
8392 operands[2] = operands[1];
8396 /* know operands[0] == operands[1]. */
8398 if (GET_CODE (operands[2]) == MEM)
8404 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8406 if (STACK_TOP_P (operands[0]))
8407 /* How is it that we are storing to a dead operand[2]?
8408 Well, presumably operands[1] is dead too. We can't
8409 store the result to st(0) as st(0) gets popped on this
8410 instruction. Instead store to operands[2] (which I
8411 think has to be st(1)). st(1) will be popped later.
8412 gcc <= 2.8.1 didn't have this check and generated
8413 assembly code that the Unixware assembler rejected. */
8414 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8416 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8420 if (STACK_TOP_P (operands[0]))
8421 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8423 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8428 if (GET_CODE (operands[1]) == MEM)
8434 if (GET_CODE (operands[2]) == MEM)
8440 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8443 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8444 derived assemblers, confusingly reverse the direction of
8445 the operation for fsub{r} and fdiv{r} when the
8446 destination register is not st(0). The Intel assembler
8447 doesn't have this brain damage. Read !SYSV386_COMPAT to
8448 figure out what the hardware really does. */
8449 if (STACK_TOP_P (operands[0]))
8450 p = "{p\t%0, %2|rp\t%2, %0}";
8452 p = "{rp\t%2, %0|p\t%0, %2}";
8454 if (STACK_TOP_P (operands[0]))
8455 /* As above for fmul/fadd, we can't store to st(0). */
8456 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8458 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8463 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8466 if (STACK_TOP_P (operands[0]))
8467 p = "{rp\t%0, %1|p\t%1, %0}";
8469 p = "{p\t%1, %0|rp\t%0, %1}";
8471 if (STACK_TOP_P (operands[0]))
8472 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8474 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8479 if (STACK_TOP_P (operands[0]))
8481 if (STACK_TOP_P (operands[1]))
8482 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8484 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8487 else if (STACK_TOP_P (operands[1]))
8490 p = "{\t%1, %0|r\t%0, %1}";
8492 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8498 p = "{r\t%2, %0|\t%0, %2}";
8500 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8513 /* Return needed mode for entity in optimize_mode_switching pass. */
8516 ix86_mode_needed (int entity, rtx insn)
8518 enum attr_i387_cw mode;
8520 /* The mode UNINITIALIZED is used to store control word after a
8521 function call or ASM pattern. The mode ANY specify that function
8522 has no requirements on the control word and make no changes in the
8523 bits we are interested in. */
8526 || (NONJUMP_INSN_P (insn)
8527 && (asm_noperands (PATTERN (insn)) >= 0
8528 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8529 return I387_CW_UNINITIALIZED;
8531 if (recog_memoized (insn) < 0)
8534 mode = get_attr_i387_cw (insn);
8539 if (mode == I387_CW_TRUNC)
8544 if (mode == I387_CW_FLOOR)
8549 if (mode == I387_CW_CEIL)
8554 if (mode == I387_CW_MASK_PM)
8565 /* Output code to initialize control word copies used by trunc?f?i and
8566 rounding patterns. CURRENT_MODE is set to current control word,
8567 while NEW_MODE is set to new control word. */
8570 emit_i387_cw_initialization (int mode)
8572 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8577 rtx reg = gen_reg_rtx (HImode);
8579 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8580 emit_move_insn (reg, stored_mode);
8582 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8587 /* round toward zero (truncate) */
8588 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8589 slot = SLOT_CW_TRUNC;
8593 /* round down toward -oo */
8594 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8595 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8596 slot = SLOT_CW_FLOOR;
8600 /* round up toward +oo */
8601 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8602 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8603 slot = SLOT_CW_CEIL;
8606 case I387_CW_MASK_PM:
8607 /* mask precision exception for nearbyint() */
8608 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8609 slot = SLOT_CW_MASK_PM;
8621 /* round toward zero (truncate) */
8622 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8623 slot = SLOT_CW_TRUNC;
8627 /* round down toward -oo */
8628 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8629 slot = SLOT_CW_FLOOR;
8633 /* round up toward +oo */
8634 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8635 slot = SLOT_CW_CEIL;
8638 case I387_CW_MASK_PM:
8639 /* mask precision exception for nearbyint() */
8640 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8641 slot = SLOT_CW_MASK_PM;
8649 gcc_assert (slot < MAX_386_STACK_LOCALS);
8651 new_mode = assign_386_stack_local (HImode, slot);
8652 emit_move_insn (new_mode, reg);
8655 /* Output code for INSN to convert a float to a signed int. OPERANDS
8656 are the insn operands. The output may be [HSD]Imode and the input
8657 operand may be [SDX]Fmode. */
8660 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8662 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8663 int dimode_p = GET_MODE (operands[0]) == DImode;
8664 int round_mode = get_attr_i387_cw (insn);
8666 /* Jump through a hoop or two for DImode, since the hardware has no
8667 non-popping instruction. We used to do this a different way, but
8668 that was somewhat fragile and broke with post-reload splitters. */
8669 if ((dimode_p || fisttp) && !stack_top_dies)
8670 output_asm_insn ("fld\t%y1", operands);
8672 gcc_assert (STACK_TOP_P (operands[1]));
8673 gcc_assert (GET_CODE (operands[0]) == MEM);
8676 output_asm_insn ("fisttp%z0\t%0", operands);
8679 if (round_mode != I387_CW_ANY)
8680 output_asm_insn ("fldcw\t%3", operands);
8681 if (stack_top_dies || dimode_p)
8682 output_asm_insn ("fistp%z0\t%0", operands);
8684 output_asm_insn ("fist%z0\t%0", operands);
8685 if (round_mode != I387_CW_ANY)
8686 output_asm_insn ("fldcw\t%2", operands);
8692 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8693 have the values zero or one, indicates the ffreep insn's operand
8694 from the OPERANDS array. */
8697 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8699 if (TARGET_USE_FFREEP)
8700 #if HAVE_AS_IX86_FFREEP
8701 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8704 static char retval[] = ".word\t0xc_df";
8705 int regno = REGNO (operands[opno]);
8707 gcc_assert (FP_REGNO_P (regno));
8709 retval[9] = '0' + (regno - FIRST_STACK_REG);
8714 return opno ? "fstp\t%y1" : "fstp\t%y0";
8718 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8719 should be used. UNORDERED_P is true when fucom should be used. */
8722 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8725 rtx cmp_op0, cmp_op1;
8726 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8730 cmp_op0 = operands[0];
8731 cmp_op1 = operands[1];
8735 cmp_op0 = operands[1];
8736 cmp_op1 = operands[2];
8741 if (GET_MODE (operands[0]) == SFmode)
8743 return "ucomiss\t{%1, %0|%0, %1}";
8745 return "comiss\t{%1, %0|%0, %1}";
8748 return "ucomisd\t{%1, %0|%0, %1}";
8750 return "comisd\t{%1, %0|%0, %1}";
8753 gcc_assert (STACK_TOP_P (cmp_op0));
8755 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8757 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8761 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8762 return output_387_ffreep (operands, 1);
8765 return "ftst\n\tfnstsw\t%0";
8768 if (STACK_REG_P (cmp_op1)
8770 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8771 && REGNO (cmp_op1) != FIRST_STACK_REG)
8773 /* If both the top of the 387 stack dies, and the other operand
8774 is also a stack register that dies, then this must be a
8775 `fcompp' float compare */
8779 /* There is no double popping fcomi variant. Fortunately,
8780 eflags is immune from the fstp's cc clobbering. */
8782 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8784 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8785 return output_387_ffreep (operands, 0);
8790 return "fucompp\n\tfnstsw\t%0";
8792 return "fcompp\n\tfnstsw\t%0";
8797 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8799 static const char * const alt[16] =
8801 "fcom%z2\t%y2\n\tfnstsw\t%0",
8802 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8803 "fucom%z2\t%y2\n\tfnstsw\t%0",
8804 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8806 "ficom%z2\t%y2\n\tfnstsw\t%0",
8807 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8811 "fcomi\t{%y1, %0|%0, %y1}",
8812 "fcomip\t{%y1, %0|%0, %y1}",
8813 "fucomi\t{%y1, %0|%0, %y1}",
8814 "fucomip\t{%y1, %0|%0, %y1}",
8825 mask = eflags_p << 3;
8826 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8827 mask |= unordered_p << 1;
8828 mask |= stack_top_dies;
8830 gcc_assert (mask < 16);
8839 ix86_output_addr_vec_elt (FILE *file, int value)
8841 const char *directive = ASM_LONG;
8845 directive = ASM_QUAD;
8847 gcc_assert (!TARGET_64BIT);
8850 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8854 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8857 fprintf (file, "%s%s%d-%s%d\n",
8858 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8859 else if (HAVE_AS_GOTOFF_IN_DATA)
8860 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8862 else if (TARGET_MACHO)
8864 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8865 machopic_output_function_base_name (file);
8866 fprintf(file, "\n");
8870 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8871 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8874 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8878 ix86_expand_clear (rtx dest)
8882 /* We play register width games, which are only valid after reload. */
8883 gcc_assert (reload_completed);
8885 /* Avoid HImode and its attendant prefix byte. */
8886 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8887 dest = gen_rtx_REG (SImode, REGNO (dest));
8889 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8891 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8892 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8894 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8895 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8901 /* X is an unchanging MEM. If it is a constant pool reference, return
8902 the constant pool rtx, else NULL. */
8905 maybe_get_pool_constant (rtx x)
8907 x = ix86_delegitimize_address (XEXP (x, 0));
8909 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8910 return get_pool_constant (x);
8916 ix86_expand_move (enum machine_mode mode, rtx operands[])
8918 int strict = (reload_in_progress || reload_completed);
8920 enum tls_model model;
8925 if (GET_CODE (op1) == SYMBOL_REF)
8927 model = SYMBOL_REF_TLS_MODEL (op1);
8930 op1 = legitimize_tls_address (op1, model, true);
8931 op1 = force_operand (op1, op0);
8936 else if (GET_CODE (op1) == CONST
8937 && GET_CODE (XEXP (op1, 0)) == PLUS
8938 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8940 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8943 rtx addend = XEXP (XEXP (op1, 0), 1);
8944 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8945 op1 = force_operand (op1, NULL);
8946 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8947 op0, 1, OPTAB_DIRECT);
8953 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8955 if (TARGET_MACHO && !TARGET_64BIT)
8960 rtx temp = ((reload_in_progress
8961 || ((op0 && GET_CODE (op0) == REG)
8963 ? op0 : gen_reg_rtx (Pmode));
8964 op1 = machopic_indirect_data_reference (op1, temp);
8965 op1 = machopic_legitimize_pic_address (op1, mode,
8966 temp == op1 ? 0 : temp);
8968 else if (MACHOPIC_INDIRECT)
8969 op1 = machopic_indirect_data_reference (op1, 0);
8976 if (GET_CODE (op0) == MEM)
8977 op1 = force_reg (Pmode, op1);
8979 op1 = legitimize_address (op1, op1, Pmode);
8984 if (GET_CODE (op0) == MEM
8985 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8986 || !push_operand (op0, mode))
8987 && GET_CODE (op1) == MEM)
8988 op1 = force_reg (mode, op1);
8990 if (push_operand (op0, mode)
8991 && ! general_no_elim_operand (op1, mode))
8992 op1 = copy_to_mode_reg (mode, op1);
8994 /* Force large constants in 64bit compilation into register
8995 to get them CSEed. */
8996 if (TARGET_64BIT && mode == DImode
8997 && immediate_operand (op1, mode)
8998 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8999 && !register_operand (op0, mode)
9000 && optimize && !reload_completed && !reload_in_progress)
9001 op1 = copy_to_mode_reg (mode, op1);
9003 if (FLOAT_MODE_P (mode))
9005 /* If we are loading a floating point constant to a register,
9006 force the value to memory now, since we'll get better code
9007 out the back end. */
9011 else if (GET_CODE (op1) == CONST_DOUBLE)
9013 op1 = validize_mem (force_const_mem (mode, op1));
9014 if (!register_operand (op0, mode))
9016 rtx temp = gen_reg_rtx (mode);
9017 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9018 emit_move_insn (op0, temp);
9025 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9029 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9031 rtx op0 = operands[0], op1 = operands[1];
9033 /* Force constants other than zero into memory. We do not know how
9034 the instructions used to build constants modify the upper 64 bits
9035 of the register, once we have that information we may be able
9036 to handle some of them more efficiently. */
9037 if ((reload_in_progress | reload_completed) == 0
9038 && register_operand (op0, mode)
9040 && standard_sse_constant_p (op1) <= 0)
9041 op1 = validize_mem (force_const_mem (mode, op1));
9043 /* Make operand1 a register if it isn't already. */
9045 && !register_operand (op0, mode)
9046 && !register_operand (op1, mode))
9048 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9052 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9055 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9056 straight to ix86_expand_vector_move. */
9059 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9068 /* If we're optimizing for size, movups is the smallest. */
9071 op0 = gen_lowpart (V4SFmode, op0);
9072 op1 = gen_lowpart (V4SFmode, op1);
9073 emit_insn (gen_sse_movups (op0, op1));
9077 /* ??? If we have typed data, then it would appear that using
9078 movdqu is the only way to get unaligned data loaded with
9080 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9082 op0 = gen_lowpart (V16QImode, op0);
9083 op1 = gen_lowpart (V16QImode, op1);
9084 emit_insn (gen_sse2_movdqu (op0, op1));
9088 if (TARGET_SSE2 && mode == V2DFmode)
9092 /* When SSE registers are split into halves, we can avoid
9093 writing to the top half twice. */
9094 if (TARGET_SSE_SPLIT_REGS)
9096 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9101 /* ??? Not sure about the best option for the Intel chips.
9102 The following would seem to satisfy; the register is
9103 entirely cleared, breaking the dependency chain. We
9104 then store to the upper half, with a dependency depth
9105 of one. A rumor has it that Intel recommends two movsd
9106 followed by an unpacklpd, but this is unconfirmed. And
9107 given that the dependency depth of the unpacklpd would
9108 still be one, I'm not sure why this would be better. */
9109 zero = CONST0_RTX (V2DFmode);
9112 m = adjust_address (op1, DFmode, 0);
9113 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9114 m = adjust_address (op1, DFmode, 8);
9115 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9119 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9120 emit_move_insn (op0, CONST0_RTX (mode));
9122 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9124 if (mode != V4SFmode)
9125 op0 = gen_lowpart (V4SFmode, op0);
9126 m = adjust_address (op1, V2SFmode, 0);
9127 emit_insn (gen_sse_loadlps (op0, op0, m));
9128 m = adjust_address (op1, V2SFmode, 8);
9129 emit_insn (gen_sse_loadhps (op0, op0, m));
9132 else if (MEM_P (op0))
9134 /* If we're optimizing for size, movups is the smallest. */
9137 op0 = gen_lowpart (V4SFmode, op0);
9138 op1 = gen_lowpart (V4SFmode, op1);
9139 emit_insn (gen_sse_movups (op0, op1));
9143 /* ??? Similar to above, only less clear because of quote
9144 typeless stores unquote. */
9145 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9146 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9148 op0 = gen_lowpart (V16QImode, op0);
9149 op1 = gen_lowpart (V16QImode, op1);
9150 emit_insn (gen_sse2_movdqu (op0, op1));
9154 if (TARGET_SSE2 && mode == V2DFmode)
9156 m = adjust_address (op0, DFmode, 0);
9157 emit_insn (gen_sse2_storelpd (m, op1));
9158 m = adjust_address (op0, DFmode, 8);
9159 emit_insn (gen_sse2_storehpd (m, op1));
9163 if (mode != V4SFmode)
9164 op1 = gen_lowpart (V4SFmode, op1);
9165 m = adjust_address (op0, V2SFmode, 0);
9166 emit_insn (gen_sse_storelps (m, op1));
9167 m = adjust_address (op0, V2SFmode, 8);
9168 emit_insn (gen_sse_storehps (m, op1));
9175 /* Expand a push in MODE. This is some mode for which we do not support
9176 proper push instructions, at least from the registers that we expect
9177 the value to live in. */
9180 ix86_expand_push (enum machine_mode mode, rtx x)
9184 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9185 GEN_INT (-GET_MODE_SIZE (mode)),
9186 stack_pointer_rtx, 1, OPTAB_DIRECT);
9187 if (tmp != stack_pointer_rtx)
9188 emit_move_insn (stack_pointer_rtx, tmp);
9190 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9191 emit_move_insn (tmp, x);
9194 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9195 destination to use for the operation. If different from the true
9196 destination in operands[0], a copy operation will be required. */
9199 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9202 int matching_memory;
9203 rtx src1, src2, dst;
9209 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9210 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9211 && (rtx_equal_p (dst, src2)
9212 || immediate_operand (src1, mode)))
9219 /* If the destination is memory, and we do not have matching source
9220 operands, do things in registers. */
9221 matching_memory = 0;
9222 if (GET_CODE (dst) == MEM)
9224 if (rtx_equal_p (dst, src1))
9225 matching_memory = 1;
9226 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9227 && rtx_equal_p (dst, src2))
9228 matching_memory = 2;
9230 dst = gen_reg_rtx (mode);
9233 /* Both source operands cannot be in memory. */
9234 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9236 if (matching_memory != 2)
9237 src2 = force_reg (mode, src2);
9239 src1 = force_reg (mode, src1);
9242 /* If the operation is not commutable, source 1 cannot be a constant
9243 or non-matching memory. */
9244 if ((CONSTANT_P (src1)
9245 || (!matching_memory && GET_CODE (src1) == MEM))
9246 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9247 src1 = force_reg (mode, src1);
9249 src1 = operands[1] = src1;
9250 src2 = operands[2] = src2;
9254 /* Similarly, but assume that the destination has already been
9258 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9259 enum machine_mode mode, rtx operands[])
9261 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9262 gcc_assert (dst == operands[0]);
9265 /* Attempt to expand a binary operator. Make the expansion closer to the
9266 actual machine, then just general_operand, which will allow 3 separate
9267 memory references (one output, two input) in a single insn. */
9270 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9273 rtx src1, src2, dst, op, clob;
9275 dst = ix86_fixup_binary_operands (code, mode, operands);
9279 /* Emit the instruction. */
9281 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9282 if (reload_in_progress)
9284 /* Reload doesn't know about the flags register, and doesn't know that
9285 it doesn't want to clobber it. We can only do this with PLUS. */
9286 gcc_assert (code == PLUS);
9291 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9292 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9295 /* Fix up the destination if needed. */
9296 if (dst != operands[0])
9297 emit_move_insn (operands[0], dst);
9300 /* Return TRUE or FALSE depending on whether the binary operator meets the
9301 appropriate constraints. */
9304 ix86_binary_operator_ok (enum rtx_code code,
9305 enum machine_mode mode ATTRIBUTE_UNUSED,
9308 /* Both source operands cannot be in memory. */
9309 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9311 /* If the operation is not commutable, source 1 cannot be a constant. */
9312 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9314 /* If the destination is memory, we must have a matching source operand. */
9315 if (GET_CODE (operands[0]) == MEM
9316 && ! (rtx_equal_p (operands[0], operands[1])
9317 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9318 && rtx_equal_p (operands[0], operands[2]))))
9320 /* If the operation is not commutable and the source 1 is memory, we must
9321 have a matching destination. */
9322 if (GET_CODE (operands[1]) == MEM
9323 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9324 && ! rtx_equal_p (operands[0], operands[1]))
9329 /* Attempt to expand a unary operator. Make the expansion closer to the
9330 actual machine, then just general_operand, which will allow 2 separate
9331 memory references (one output, one input) in a single insn. */
9334 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9337 int matching_memory;
9338 rtx src, dst, op, clob;
9343 /* If the destination is memory, and we do not have matching source
9344 operands, do things in registers. */
9345 matching_memory = 0;
9348 if (rtx_equal_p (dst, src))
9349 matching_memory = 1;
9351 dst = gen_reg_rtx (mode);
9354 /* When source operand is memory, destination must match. */
9355 if (MEM_P (src) && !matching_memory)
9356 src = force_reg (mode, src);
9358 /* Emit the instruction. */
9360 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9361 if (reload_in_progress || code == NOT)
9363 /* Reload doesn't know about the flags register, and doesn't know that
9364 it doesn't want to clobber it. */
9365 gcc_assert (code == NOT);
9370 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9371 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9374 /* Fix up the destination if needed. */
9375 if (dst != operands[0])
9376 emit_move_insn (operands[0], dst);
9379 /* Return TRUE or FALSE depending on whether the unary operator meets the
9380 appropriate constraints. */
9383 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9384 enum machine_mode mode ATTRIBUTE_UNUSED,
9385 rtx operands[2] ATTRIBUTE_UNUSED)
9387 /* If one of operands is memory, source and destination must match. */
9388 if ((GET_CODE (operands[0]) == MEM
9389 || GET_CODE (operands[1]) == MEM)
9390 && ! rtx_equal_p (operands[0], operands[1]))
9395 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9396 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9397 true, then replicate the mask for all elements of the vector register.
9398 If INVERT is true, then create a mask excluding the sign bit. */
9401 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9403 enum machine_mode vec_mode;
9404 HOST_WIDE_INT hi, lo;
9409 /* Find the sign bit, sign extended to 2*HWI. */
9411 lo = 0x80000000, hi = lo < 0;
9412 else if (HOST_BITS_PER_WIDE_INT >= 64)
9413 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9415 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9420 /* Force this value into the low part of a fp vector constant. */
9421 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9422 mask = gen_lowpart (mode, mask);
9427 v = gen_rtvec (4, mask, mask, mask, mask);
9429 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9430 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9431 vec_mode = V4SFmode;
9436 v = gen_rtvec (2, mask, mask);
9438 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9439 vec_mode = V2DFmode;
9442 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9445 /* Generate code for floating point ABS or NEG. */
9448 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9451 rtx mask, set, use, clob, dst, src;
9452 bool matching_memory;
9453 bool use_sse = false;
9454 bool vector_mode = VECTOR_MODE_P (mode);
9455 enum machine_mode elt_mode = mode;
9459 elt_mode = GET_MODE_INNER (mode);
9462 else if (TARGET_SSE_MATH)
9463 use_sse = SSE_FLOAT_MODE_P (mode);
9465 /* NEG and ABS performed with SSE use bitwise mask operations.
9466 Create the appropriate mask now. */
9468 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9475 /* If the destination is memory, and we don't have matching source
9476 operands or we're using the x87, do things in registers. */
9477 matching_memory = false;
9480 if (use_sse && rtx_equal_p (dst, src))
9481 matching_memory = true;
9483 dst = gen_reg_rtx (mode);
9485 if (MEM_P (src) && !matching_memory)
9486 src = force_reg (mode, src);
9490 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9491 set = gen_rtx_SET (VOIDmode, dst, set);
9496 set = gen_rtx_fmt_e (code, mode, src);
9497 set = gen_rtx_SET (VOIDmode, dst, set);
9500 use = gen_rtx_USE (VOIDmode, mask);
9501 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9502 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9503 gen_rtvec (3, set, use, clob)));
9509 if (dst != operands[0])
9510 emit_move_insn (operands[0], dst);
9513 /* Expand a copysign operation. Special case operand 0 being a constant. */
9516 ix86_expand_copysign (rtx operands[])
9518 enum machine_mode mode, vmode;
9519 rtx dest, op0, op1, mask, nmask;
9525 mode = GET_MODE (dest);
9526 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9528 if (GET_CODE (op0) == CONST_DOUBLE)
9532 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9533 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9535 if (op0 == CONST0_RTX (mode))
9536 op0 = CONST0_RTX (vmode);
9540 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9541 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9543 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9544 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9547 mask = ix86_build_signbit_mask (mode, 0, 0);
9550 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9552 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9556 nmask = ix86_build_signbit_mask (mode, 0, 1);
9557 mask = ix86_build_signbit_mask (mode, 0, 0);
9560 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9562 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9566 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9567 be a constant, and so has already been expanded into a vector constant. */
9570 ix86_split_copysign_const (rtx operands[])
9572 enum machine_mode mode, vmode;
9573 rtx dest, op0, op1, mask, x;
9580 mode = GET_MODE (dest);
9581 vmode = GET_MODE (mask);
9583 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9584 x = gen_rtx_AND (vmode, dest, mask);
9585 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9587 if (op0 != CONST0_RTX (vmode))
9589 x = gen_rtx_IOR (vmode, dest, op0);
9590 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9594 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9595 so we have to do two masks. */
9598 ix86_split_copysign_var (rtx operands[])
9600 enum machine_mode mode, vmode;
9601 rtx dest, scratch, op0, op1, mask, nmask, x;
9604 scratch = operands[1];
9607 nmask = operands[4];
9610 mode = GET_MODE (dest);
9611 vmode = GET_MODE (mask);
9613 if (rtx_equal_p (op0, op1))
9615 /* Shouldn't happen often (it's useless, obviously), but when it does
9616 we'd generate incorrect code if we continue below. */
9617 emit_move_insn (dest, op0);
9621 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9623 gcc_assert (REGNO (op1) == REGNO (scratch));
9625 x = gen_rtx_AND (vmode, scratch, mask);
9626 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9629 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9630 x = gen_rtx_NOT (vmode, dest);
9631 x = gen_rtx_AND (vmode, x, op0);
9632 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9636 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9638 x = gen_rtx_AND (vmode, scratch, mask);
9640 else /* alternative 2,4 */
9642 gcc_assert (REGNO (mask) == REGNO (scratch));
9643 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9644 x = gen_rtx_AND (vmode, scratch, op1);
9646 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9648 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9650 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9651 x = gen_rtx_AND (vmode, dest, nmask);
9653 else /* alternative 3,4 */
9655 gcc_assert (REGNO (nmask) == REGNO (dest));
9657 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9658 x = gen_rtx_AND (vmode, dest, op0);
9660 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9663 x = gen_rtx_IOR (vmode, dest, scratch);
9664 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9667 /* Return TRUE or FALSE depending on whether the first SET in INSN
9668 has source and destination with matching CC modes, and that the
9669 CC mode is at least as constrained as REQ_MODE. */
9672 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9675 enum machine_mode set_mode;
9677 set = PATTERN (insn);
9678 if (GET_CODE (set) == PARALLEL)
9679 set = XVECEXP (set, 0, 0);
9680 gcc_assert (GET_CODE (set) == SET);
9681 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9683 set_mode = GET_MODE (SET_DEST (set));
9687 if (req_mode != CCNOmode
9688 && (req_mode != CCmode
9689 || XEXP (SET_SRC (set), 1) != const0_rtx))
9693 if (req_mode == CCGCmode)
9697 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9701 if (req_mode == CCZmode)
9711 return (GET_MODE (SET_SRC (set)) == set_mode);
9714 /* Generate insn patterns to do an integer compare of OPERANDS. */
9717 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9719 enum machine_mode cmpmode;
9722 cmpmode = SELECT_CC_MODE (code, op0, op1);
9723 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9725 /* This is very simple, but making the interface the same as in the
9726 FP case makes the rest of the code easier. */
9727 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9728 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9730 /* Return the test that should be put into the flags user, i.e.
9731 the bcc, scc, or cmov instruction. */
9732 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9735 /* Figure out whether to use ordered or unordered fp comparisons.
9736 Return the appropriate mode to use. */
9739 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9741 /* ??? In order to make all comparisons reversible, we do all comparisons
9742 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9743 all forms trapping and nontrapping comparisons, we can make inequality
9744 comparisons trapping again, since it results in better code when using
9745 FCOM based compares. */
9746 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9750 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9752 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9753 return ix86_fp_compare_mode (code);
9756 /* Only zero flag is needed. */
9758 case NE: /* ZF!=0 */
9760 /* Codes needing carry flag. */
9761 case GEU: /* CF=0 */
9762 case GTU: /* CF=0 & ZF=0 */
9763 case LTU: /* CF=1 */
9764 case LEU: /* CF=1 | ZF=1 */
9766 /* Codes possibly doable only with sign flag when
9767 comparing against zero. */
9768 case GE: /* SF=OF or SF=0 */
9769 case LT: /* SF<>OF or SF=1 */
9770 if (op1 == const0_rtx)
9773 /* For other cases Carry flag is not required. */
9775 /* Codes doable only with sign flag when comparing
9776 against zero, but we miss jump instruction for it
9777 so we need to use relational tests against overflow
9778 that thus needs to be zero. */
9779 case GT: /* ZF=0 & SF=OF */
9780 case LE: /* ZF=1 | SF<>OF */
9781 if (op1 == const0_rtx)
9785 /* strcmp pattern do (use flags) and combine may ask us for proper
9794 /* Return the fixed registers used for condition codes. */
9797 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9804 /* If two condition code modes are compatible, return a condition code
9805 mode which is compatible with both. Otherwise, return
9808 static enum machine_mode
9809 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9814 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9817 if ((m1 == CCGCmode && m2 == CCGOCmode)
9818 || (m1 == CCGOCmode && m2 == CCGCmode))
9846 /* These are only compatible with themselves, which we already
9852 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9855 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9857 enum rtx_code swapped_code = swap_condition (code);
9858 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9859 || (ix86_fp_comparison_cost (swapped_code)
9860 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9863 /* Swap, force into registers, or otherwise massage the two operands
9864 to a fp comparison. The operands are updated in place; the new
9865 comparison code is returned. */
9867 static enum rtx_code
9868 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9870 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9871 rtx op0 = *pop0, op1 = *pop1;
9872 enum machine_mode op_mode = GET_MODE (op0);
9873 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9875 /* All of the unordered compare instructions only work on registers.
9876 The same is true of the fcomi compare instructions. The XFmode
9877 compare instructions require registers except when comparing
9878 against zero or when converting operand 1 from fixed point to
9882 && (fpcmp_mode == CCFPUmode
9883 || (op_mode == XFmode
9884 && ! (standard_80387_constant_p (op0) == 1
9885 || standard_80387_constant_p (op1) == 1)
9886 && GET_CODE (op1) != FLOAT)
9887 || ix86_use_fcomi_compare (code)))
9889 op0 = force_reg (op_mode, op0);
9890 op1 = force_reg (op_mode, op1);
9894 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9895 things around if they appear profitable, otherwise force op0
9898 if (standard_80387_constant_p (op0) == 0
9899 || (GET_CODE (op0) == MEM
9900 && ! (standard_80387_constant_p (op1) == 0
9901 || GET_CODE (op1) == MEM)))
9904 tmp = op0, op0 = op1, op1 = tmp;
9905 code = swap_condition (code);
9908 if (GET_CODE (op0) != REG)
9909 op0 = force_reg (op_mode, op0);
9911 if (CONSTANT_P (op1))
9913 int tmp = standard_80387_constant_p (op1);
9915 op1 = validize_mem (force_const_mem (op_mode, op1));
9919 op1 = force_reg (op_mode, op1);
9922 op1 = force_reg (op_mode, op1);
9926 /* Try to rearrange the comparison to make it cheaper. */
9927 if (ix86_fp_comparison_cost (code)
9928 > ix86_fp_comparison_cost (swap_condition (code))
9929 && (GET_CODE (op1) == REG || !no_new_pseudos))
9932 tmp = op0, op0 = op1, op1 = tmp;
9933 code = swap_condition (code);
9934 if (GET_CODE (op0) != REG)
9935 op0 = force_reg (op_mode, op0);
9943 /* Convert comparison codes we use to represent FP comparison to integer
9944 code that will result in proper branch. Return UNKNOWN if no such code
9948 ix86_fp_compare_code_to_integer (enum rtx_code code)
9977 /* Split comparison code CODE into comparisons we can do using branch
9978 instructions. BYPASS_CODE is comparison code for branch that will
9979 branch around FIRST_CODE and SECOND_CODE. If some of branches
9980 is not required, set value to UNKNOWN.
9981 We never require more than two branches. */
9984 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9985 enum rtx_code *first_code,
9986 enum rtx_code *second_code)
9989 *bypass_code = UNKNOWN;
9990 *second_code = UNKNOWN;
9992 /* The fcomi comparison sets flags as follows:
10002 case GT: /* GTU - CF=0 & ZF=0 */
10003 case GE: /* GEU - CF=0 */
10004 case ORDERED: /* PF=0 */
10005 case UNORDERED: /* PF=1 */
10006 case UNEQ: /* EQ - ZF=1 */
10007 case UNLT: /* LTU - CF=1 */
10008 case UNLE: /* LEU - CF=1 | ZF=1 */
10009 case LTGT: /* EQ - ZF=0 */
10011 case LT: /* LTU - CF=1 - fails on unordered */
10012 *first_code = UNLT;
10013 *bypass_code = UNORDERED;
10015 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10016 *first_code = UNLE;
10017 *bypass_code = UNORDERED;
10019 case EQ: /* EQ - ZF=1 - fails on unordered */
10020 *first_code = UNEQ;
10021 *bypass_code = UNORDERED;
10023 case NE: /* NE - ZF=0 - fails on unordered */
10024 *first_code = LTGT;
10025 *second_code = UNORDERED;
10027 case UNGE: /* GEU - CF=0 - fails on unordered */
10029 *second_code = UNORDERED;
10031 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10033 *second_code = UNORDERED;
10036 gcc_unreachable ();
10038 if (!TARGET_IEEE_FP)
10040 *second_code = UNKNOWN;
10041 *bypass_code = UNKNOWN;
10045 /* Return cost of comparison done fcom + arithmetics operations on AX.
10046 All following functions do use number of instructions as a cost metrics.
10047 In future this should be tweaked to compute bytes for optimize_size and
10048 take into account performance of various instructions on various CPUs. */
10050 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10052 if (!TARGET_IEEE_FP)
10054 /* The cost of code output by ix86_expand_fp_compare. */
10078 gcc_unreachable ();
10082 /* Return cost of comparison done using fcomi operation.
10083 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10085 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10087 enum rtx_code bypass_code, first_code, second_code;
10088 /* Return arbitrarily high cost when instruction is not supported - this
10089 prevents gcc from using it. */
10092 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10093 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10096 /* Return cost of comparison done using sahf operation.
10097 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10099 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10101 enum rtx_code bypass_code, first_code, second_code;
10102 /* Return arbitrarily high cost when instruction is not preferred - this
10103 avoids gcc from using it. */
10104 if (!TARGET_USE_SAHF && !optimize_size)
10106 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10107 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10110 /* Compute cost of the comparison done using any method.
10111 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10113 ix86_fp_comparison_cost (enum rtx_code code)
10115 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10118 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10119 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10121 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10122 if (min > sahf_cost)
10124 if (min > fcomi_cost)
10129 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10132 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10133 rtx *second_test, rtx *bypass_test)
10135 enum machine_mode fpcmp_mode, intcmp_mode;
10137 int cost = ix86_fp_comparison_cost (code);
10138 enum rtx_code bypass_code, first_code, second_code;
10140 fpcmp_mode = ix86_fp_compare_mode (code);
10141 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10144 *second_test = NULL_RTX;
10146 *bypass_test = NULL_RTX;
10148 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10150 /* Do fcomi/sahf based test when profitable. */
10151 if ((bypass_code == UNKNOWN || bypass_test)
10152 && (second_code == UNKNOWN || second_test)
10153 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10157 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10158 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10164 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10165 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10167 scratch = gen_reg_rtx (HImode);
10168 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10169 emit_insn (gen_x86_sahf_1 (scratch));
10172 /* The FP codes work out to act like unsigned. */
10173 intcmp_mode = fpcmp_mode;
10175 if (bypass_code != UNKNOWN)
10176 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10177 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10179 if (second_code != UNKNOWN)
10180 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10181 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10186 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10187 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10188 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10190 scratch = gen_reg_rtx (HImode);
10191 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10193 /* In the unordered case, we have to check C2 for NaN's, which
10194 doesn't happen to work out to anything nice combination-wise.
10195 So do some bit twiddling on the value we've got in AH to come
10196 up with an appropriate set of condition codes. */
10198 intcmp_mode = CCNOmode;
10203 if (code == GT || !TARGET_IEEE_FP)
10205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10210 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10211 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10212 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10213 intcmp_mode = CCmode;
10219 if (code == LT && TARGET_IEEE_FP)
10221 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10222 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10223 intcmp_mode = CCmode;
10228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10234 if (code == GE || !TARGET_IEEE_FP)
10236 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10241 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10242 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10249 if (code == LE && TARGET_IEEE_FP)
10251 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10252 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10253 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10254 intcmp_mode = CCmode;
10259 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10265 if (code == EQ && TARGET_IEEE_FP)
10267 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10268 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10269 intcmp_mode = CCmode;
10274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10281 if (code == NE && TARGET_IEEE_FP)
10283 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10284 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10290 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10296 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10300 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10305 gcc_unreachable ();
10309 /* Return the test that should be put into the flags user, i.e.
10310 the bcc, scc, or cmov instruction. */
10311 return gen_rtx_fmt_ee (code, VOIDmode,
10312 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10317 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10320 op0 = ix86_compare_op0;
10321 op1 = ix86_compare_op1;
10324 *second_test = NULL_RTX;
10326 *bypass_test = NULL_RTX;
10328 if (ix86_compare_emitted)
10330 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10331 ix86_compare_emitted = NULL_RTX;
10333 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10334 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10335 second_test, bypass_test);
10337 ret = ix86_expand_int_compare (code, op0, op1);
10342 /* Return true if the CODE will result in nontrivial jump sequence. */
10344 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10346 enum rtx_code bypass_code, first_code, second_code;
10349 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10350 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10354 ix86_expand_branch (enum rtx_code code, rtx label)
10358 /* If we have emitted a compare insn, go straight to simple.
10359 ix86_expand_compare won't emit anything if ix86_compare_emitted
10361 if (ix86_compare_emitted)
10364 switch (GET_MODE (ix86_compare_op0))
10370 tmp = ix86_expand_compare (code, NULL, NULL);
10371 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10372 gen_rtx_LABEL_REF (VOIDmode, label),
10374 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10383 enum rtx_code bypass_code, first_code, second_code;
10385 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10386 &ix86_compare_op1);
10388 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10390 /* Check whether we will use the natural sequence with one jump. If
10391 so, we can expand jump early. Otherwise delay expansion by
10392 creating compound insn to not confuse optimizers. */
10393 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10396 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10397 gen_rtx_LABEL_REF (VOIDmode, label),
10398 pc_rtx, NULL_RTX, NULL_RTX);
10402 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10403 ix86_compare_op0, ix86_compare_op1);
10404 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10405 gen_rtx_LABEL_REF (VOIDmode, label),
10407 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10409 use_fcomi = ix86_use_fcomi_compare (code);
10410 vec = rtvec_alloc (3 + !use_fcomi);
10411 RTVEC_ELT (vec, 0) = tmp;
10413 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10415 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10418 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10420 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10429 /* Expand DImode branch into multiple compare+branch. */
10431 rtx lo[2], hi[2], label2;
10432 enum rtx_code code1, code2, code3;
10433 enum machine_mode submode;
10435 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10437 tmp = ix86_compare_op0;
10438 ix86_compare_op0 = ix86_compare_op1;
10439 ix86_compare_op1 = tmp;
10440 code = swap_condition (code);
10442 if (GET_MODE (ix86_compare_op0) == DImode)
10444 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10445 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10450 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10451 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10455 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10456 avoid two branches. This costs one extra insn, so disable when
10457 optimizing for size. */
10459 if ((code == EQ || code == NE)
10461 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10466 if (hi[1] != const0_rtx)
10467 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10468 NULL_RTX, 0, OPTAB_WIDEN);
10471 if (lo[1] != const0_rtx)
10472 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10473 NULL_RTX, 0, OPTAB_WIDEN);
10475 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10476 NULL_RTX, 0, OPTAB_WIDEN);
10478 ix86_compare_op0 = tmp;
10479 ix86_compare_op1 = const0_rtx;
10480 ix86_expand_branch (code, label);
10484 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10485 op1 is a constant and the low word is zero, then we can just
10486 examine the high word. */
10488 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10491 case LT: case LTU: case GE: case GEU:
10492 ix86_compare_op0 = hi[0];
10493 ix86_compare_op1 = hi[1];
10494 ix86_expand_branch (code, label);
10500 /* Otherwise, we need two or three jumps. */
10502 label2 = gen_label_rtx ();
10505 code2 = swap_condition (code);
10506 code3 = unsigned_condition (code);
10510 case LT: case GT: case LTU: case GTU:
10513 case LE: code1 = LT; code2 = GT; break;
10514 case GE: code1 = GT; code2 = LT; break;
10515 case LEU: code1 = LTU; code2 = GTU; break;
10516 case GEU: code1 = GTU; code2 = LTU; break;
10518 case EQ: code1 = UNKNOWN; code2 = NE; break;
10519 case NE: code2 = UNKNOWN; break;
10522 gcc_unreachable ();
10527 * if (hi(a) < hi(b)) goto true;
10528 * if (hi(a) > hi(b)) goto false;
10529 * if (lo(a) < lo(b)) goto true;
10533 ix86_compare_op0 = hi[0];
10534 ix86_compare_op1 = hi[1];
10536 if (code1 != UNKNOWN)
10537 ix86_expand_branch (code1, label);
10538 if (code2 != UNKNOWN)
10539 ix86_expand_branch (code2, label2);
10541 ix86_compare_op0 = lo[0];
10542 ix86_compare_op1 = lo[1];
10543 ix86_expand_branch (code3, label);
10545 if (code2 != UNKNOWN)
10546 emit_label (label2);
10551 gcc_unreachable ();
10555 /* Split branch based on floating point condition. */
10557 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10558 rtx target1, rtx target2, rtx tmp, rtx pushed)
10560 rtx second, bypass;
10561 rtx label = NULL_RTX;
10563 int bypass_probability = -1, second_probability = -1, probability = -1;
10566 if (target2 != pc_rtx)
10569 code = reverse_condition_maybe_unordered (code);
10574 condition = ix86_expand_fp_compare (code, op1, op2,
10575 tmp, &second, &bypass);
10577 /* Remove pushed operand from stack. */
10579 ix86_free_from_memory (GET_MODE (pushed));
10581 if (split_branch_probability >= 0)
10583 /* Distribute the probabilities across the jumps.
10584 Assume the BYPASS and SECOND to be always test
10586 probability = split_branch_probability;
10588 /* Value of 1 is low enough to make no need for probability
10589 to be updated. Later we may run some experiments and see
10590 if unordered values are more frequent in practice. */
10592 bypass_probability = 1;
10594 second_probability = 1;
10596 if (bypass != NULL_RTX)
10598 label = gen_label_rtx ();
10599 i = emit_jump_insn (gen_rtx_SET
10601 gen_rtx_IF_THEN_ELSE (VOIDmode,
10603 gen_rtx_LABEL_REF (VOIDmode,
10606 if (bypass_probability >= 0)
10608 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10609 GEN_INT (bypass_probability),
10612 i = emit_jump_insn (gen_rtx_SET
10614 gen_rtx_IF_THEN_ELSE (VOIDmode,
10615 condition, target1, target2)));
10616 if (probability >= 0)
10618 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10619 GEN_INT (probability),
10621 if (second != NULL_RTX)
10623 i = emit_jump_insn (gen_rtx_SET
10625 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10627 if (second_probability >= 0)
10629 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10630 GEN_INT (second_probability),
10633 if (label != NULL_RTX)
10634 emit_label (label);
10638 ix86_expand_setcc (enum rtx_code code, rtx dest)
10640 rtx ret, tmp, tmpreg, equiv;
10641 rtx second_test, bypass_test;
10643 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10644 return 0; /* FAIL */
10646 gcc_assert (GET_MODE (dest) == QImode);
10648 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10649 PUT_MODE (ret, QImode);
10654 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10655 if (bypass_test || second_test)
10657 rtx test = second_test;
10659 rtx tmp2 = gen_reg_rtx (QImode);
10662 gcc_assert (!second_test);
10663 test = bypass_test;
10665 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10667 PUT_MODE (test, QImode);
10668 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10671 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10673 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10676 /* Attach a REG_EQUAL note describing the comparison result. */
10677 if (ix86_compare_op0 && ix86_compare_op1)
10679 equiv = simplify_gen_relational (code, QImode,
10680 GET_MODE (ix86_compare_op0),
10681 ix86_compare_op0, ix86_compare_op1);
10682 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10685 return 1; /* DONE */
10688 /* Expand comparison setting or clearing carry flag. Return true when
10689 successful and set pop for the operation. */
10691 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10693 enum machine_mode mode =
10694 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10696 /* Do not handle DImode compares that go through special path. Also we can't
10697 deal with FP compares yet. This is possible to add. */
10698 if (mode == (TARGET_64BIT ? TImode : DImode))
10700 if (FLOAT_MODE_P (mode))
10702 rtx second_test = NULL, bypass_test = NULL;
10703 rtx compare_op, compare_seq;
10705 /* Shortcut: following common codes never translate into carry flag compares. */
10706 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10707 || code == ORDERED || code == UNORDERED)
10710 /* These comparisons require zero flag; swap operands so they won't. */
10711 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10712 && !TARGET_IEEE_FP)
10717 code = swap_condition (code);
10720 /* Try to expand the comparison and verify that we end up with carry flag
10721 based comparison. This is fails to be true only when we decide to expand
10722 comparison using arithmetic that is not too common scenario. */
10724 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10725 &second_test, &bypass_test);
10726 compare_seq = get_insns ();
10729 if (second_test || bypass_test)
10731 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10732 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10733 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10735 code = GET_CODE (compare_op);
10736 if (code != LTU && code != GEU)
10738 emit_insn (compare_seq);
10742 if (!INTEGRAL_MODE_P (mode))
10750 /* Convert a==0 into (unsigned)a<1. */
10753 if (op1 != const0_rtx)
10756 code = (code == EQ ? LTU : GEU);
10759 /* Convert a>b into b<a or a>=b-1. */
10762 if (GET_CODE (op1) == CONST_INT)
10764 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10765 /* Bail out on overflow. We still can swap operands but that
10766 would force loading of the constant into register. */
10767 if (op1 == const0_rtx
10768 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10770 code = (code == GTU ? GEU : LTU);
10777 code = (code == GTU ? LTU : GEU);
10781 /* Convert a>=0 into (unsigned)a<0x80000000. */
10784 if (mode == DImode || op1 != const0_rtx)
10786 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10787 code = (code == LT ? GEU : LTU);
10791 if (mode == DImode || op1 != constm1_rtx)
10793 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10794 code = (code == LE ? GEU : LTU);
10800 /* Swapping operands may cause constant to appear as first operand. */
10801 if (!nonimmediate_operand (op0, VOIDmode))
10803 if (no_new_pseudos)
10805 op0 = force_reg (mode, op0);
10807 ix86_compare_op0 = op0;
10808 ix86_compare_op1 = op1;
10809 *pop = ix86_expand_compare (code, NULL, NULL);
10810 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10815 ix86_expand_int_movcc (rtx operands[])
10817 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10818 rtx compare_seq, compare_op;
10819 rtx second_test, bypass_test;
10820 enum machine_mode mode = GET_MODE (operands[0]);
10821 bool sign_bit_compare_p = false;;
10824 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10825 compare_seq = get_insns ();
10828 compare_code = GET_CODE (compare_op);
10830 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10831 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10832 sign_bit_compare_p = true;
10834 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10835 HImode insns, we'd be swallowed in word prefix ops. */
10837 if ((mode != HImode || TARGET_FAST_PREFIX)
10838 && (mode != (TARGET_64BIT ? TImode : DImode))
10839 && GET_CODE (operands[2]) == CONST_INT
10840 && GET_CODE (operands[3]) == CONST_INT)
10842 rtx out = operands[0];
10843 HOST_WIDE_INT ct = INTVAL (operands[2]);
10844 HOST_WIDE_INT cf = INTVAL (operands[3]);
10845 HOST_WIDE_INT diff;
10848 /* Sign bit compares are better done using shifts than we do by using
10850 if (sign_bit_compare_p
10851 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10852 ix86_compare_op1, &compare_op))
10854 /* Detect overlap between destination and compare sources. */
10857 if (!sign_bit_compare_p)
10859 bool fpcmp = false;
10861 compare_code = GET_CODE (compare_op);
10863 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10864 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10867 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10870 /* To simplify rest of code, restrict to the GEU case. */
10871 if (compare_code == LTU)
10873 HOST_WIDE_INT tmp = ct;
10876 compare_code = reverse_condition (compare_code);
10877 code = reverse_condition (code);
10882 PUT_CODE (compare_op,
10883 reverse_condition_maybe_unordered
10884 (GET_CODE (compare_op)));
10886 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10890 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10891 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10892 tmp = gen_reg_rtx (mode);
10894 if (mode == DImode)
10895 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10897 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10901 if (code == GT || code == GE)
10902 code = reverse_condition (code);
10905 HOST_WIDE_INT tmp = ct;
10910 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10911 ix86_compare_op1, VOIDmode, 0, -1);
10924 tmp = expand_simple_binop (mode, PLUS,
10926 copy_rtx (tmp), 1, OPTAB_DIRECT);
10937 tmp = expand_simple_binop (mode, IOR,
10939 copy_rtx (tmp), 1, OPTAB_DIRECT);
10941 else if (diff == -1 && ct)
10951 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10953 tmp = expand_simple_binop (mode, PLUS,
10954 copy_rtx (tmp), GEN_INT (cf),
10955 copy_rtx (tmp), 1, OPTAB_DIRECT);
10963 * andl cf - ct, dest
10973 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10976 tmp = expand_simple_binop (mode, AND,
10978 gen_int_mode (cf - ct, mode),
10979 copy_rtx (tmp), 1, OPTAB_DIRECT);
10981 tmp = expand_simple_binop (mode, PLUS,
10982 copy_rtx (tmp), GEN_INT (ct),
10983 copy_rtx (tmp), 1, OPTAB_DIRECT);
10986 if (!rtx_equal_p (tmp, out))
10987 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10989 return 1; /* DONE */
10995 tmp = ct, ct = cf, cf = tmp;
10997 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10999 /* We may be reversing unordered compare to normal compare, that
11000 is not valid in general (we may convert non-trapping condition
11001 to trapping one), however on i386 we currently emit all
11002 comparisons unordered. */
11003 compare_code = reverse_condition_maybe_unordered (compare_code);
11004 code = reverse_condition_maybe_unordered (code);
11008 compare_code = reverse_condition (compare_code);
11009 code = reverse_condition (code);
11013 compare_code = UNKNOWN;
11014 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11015 && GET_CODE (ix86_compare_op1) == CONST_INT)
11017 if (ix86_compare_op1 == const0_rtx
11018 && (code == LT || code == GE))
11019 compare_code = code;
11020 else if (ix86_compare_op1 == constm1_rtx)
11024 else if (code == GT)
11029 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11030 if (compare_code != UNKNOWN
11031 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11032 && (cf == -1 || ct == -1))
11034 /* If lea code below could be used, only optimize
11035 if it results in a 2 insn sequence. */
11037 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11038 || diff == 3 || diff == 5 || diff == 9)
11039 || (compare_code == LT && ct == -1)
11040 || (compare_code == GE && cf == -1))
11043 * notl op1 (if necessary)
11051 code = reverse_condition (code);
11054 out = emit_store_flag (out, code, ix86_compare_op0,
11055 ix86_compare_op1, VOIDmode, 0, -1);
11057 out = expand_simple_binop (mode, IOR,
11059 out, 1, OPTAB_DIRECT);
11060 if (out != operands[0])
11061 emit_move_insn (operands[0], out);
11063 return 1; /* DONE */
11068 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11069 || diff == 3 || diff == 5 || diff == 9)
11070 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11072 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11078 * lea cf(dest*(ct-cf)),dest
11082 * This also catches the degenerate setcc-only case.
11088 out = emit_store_flag (out, code, ix86_compare_op0,
11089 ix86_compare_op1, VOIDmode, 0, 1);
11092 /* On x86_64 the lea instruction operates on Pmode, so we need
11093 to get arithmetics done in proper mode to match. */
11095 tmp = copy_rtx (out);
11099 out1 = copy_rtx (out);
11100 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11104 tmp = gen_rtx_PLUS (mode, tmp, out1);
11110 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11113 if (!rtx_equal_p (tmp, out))
11116 out = force_operand (tmp, copy_rtx (out));
11118 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11120 if (!rtx_equal_p (out, operands[0]))
11121 emit_move_insn (operands[0], copy_rtx (out));
11123 return 1; /* DONE */
11127 * General case: Jumpful:
11128 * xorl dest,dest cmpl op1, op2
11129 * cmpl op1, op2 movl ct, dest
11130 * setcc dest jcc 1f
11131 * decl dest movl cf, dest
11132 * andl (cf-ct),dest 1:
11135 * Size 20. Size 14.
11137 * This is reasonably steep, but branch mispredict costs are
11138 * high on modern cpus, so consider failing only if optimizing
11142 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11143 && BRANCH_COST >= 2)
11149 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11150 /* We may be reversing unordered compare to normal compare,
11151 that is not valid in general (we may convert non-trapping
11152 condition to trapping one), however on i386 we currently
11153 emit all comparisons unordered. */
11154 code = reverse_condition_maybe_unordered (code);
11157 code = reverse_condition (code);
11158 if (compare_code != UNKNOWN)
11159 compare_code = reverse_condition (compare_code);
11163 if (compare_code != UNKNOWN)
11165 /* notl op1 (if needed)
11170 For x < 0 (resp. x <= -1) there will be no notl,
11171 so if possible swap the constants to get rid of the
11173 True/false will be -1/0 while code below (store flag
11174 followed by decrement) is 0/-1, so the constants need
11175 to be exchanged once more. */
11177 if (compare_code == GE || !cf)
11179 code = reverse_condition (code);
11184 HOST_WIDE_INT tmp = cf;
11189 out = emit_store_flag (out, code, ix86_compare_op0,
11190 ix86_compare_op1, VOIDmode, 0, -1);
11194 out = emit_store_flag (out, code, ix86_compare_op0,
11195 ix86_compare_op1, VOIDmode, 0, 1);
11197 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11198 copy_rtx (out), 1, OPTAB_DIRECT);
11201 out = expand_simple_binop (mode, AND, copy_rtx (out),
11202 gen_int_mode (cf - ct, mode),
11203 copy_rtx (out), 1, OPTAB_DIRECT);
11205 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11206 copy_rtx (out), 1, OPTAB_DIRECT);
11207 if (!rtx_equal_p (out, operands[0]))
11208 emit_move_insn (operands[0], copy_rtx (out));
11210 return 1; /* DONE */
11214 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11216 /* Try a few things more with specific constants and a variable. */
11219 rtx var, orig_out, out, tmp;
11221 if (BRANCH_COST <= 2)
11222 return 0; /* FAIL */
11224 /* If one of the two operands is an interesting constant, load a
11225 constant with the above and mask it in with a logical operation. */
11227 if (GET_CODE (operands[2]) == CONST_INT)
11230 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11231 operands[3] = constm1_rtx, op = and_optab;
11232 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11233 operands[3] = const0_rtx, op = ior_optab;
11235 return 0; /* FAIL */
11237 else if (GET_CODE (operands[3]) == CONST_INT)
11240 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11241 operands[2] = constm1_rtx, op = and_optab;
11242 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11243 operands[2] = const0_rtx, op = ior_optab;
11245 return 0; /* FAIL */
11248 return 0; /* FAIL */
11250 orig_out = operands[0];
11251 tmp = gen_reg_rtx (mode);
11254 /* Recurse to get the constant loaded. */
11255 if (ix86_expand_int_movcc (operands) == 0)
11256 return 0; /* FAIL */
11258 /* Mask in the interesting variable. */
11259 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11261 if (!rtx_equal_p (out, orig_out))
11262 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11264 return 1; /* DONE */
11268 * For comparison with above,
11278 if (! nonimmediate_operand (operands[2], mode))
11279 operands[2] = force_reg (mode, operands[2]);
11280 if (! nonimmediate_operand (operands[3], mode))
11281 operands[3] = force_reg (mode, operands[3]);
11283 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11285 rtx tmp = gen_reg_rtx (mode);
11286 emit_move_insn (tmp, operands[3]);
11289 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11291 rtx tmp = gen_reg_rtx (mode);
11292 emit_move_insn (tmp, operands[2]);
11296 if (! register_operand (operands[2], VOIDmode)
11298 || ! register_operand (operands[3], VOIDmode)))
11299 operands[2] = force_reg (mode, operands[2]);
11302 && ! register_operand (operands[3], VOIDmode))
11303 operands[3] = force_reg (mode, operands[3]);
11305 emit_insn (compare_seq);
11306 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11307 gen_rtx_IF_THEN_ELSE (mode,
11308 compare_op, operands[2],
11311 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11312 gen_rtx_IF_THEN_ELSE (mode,
11314 copy_rtx (operands[3]),
11315 copy_rtx (operands[0]))));
11317 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11318 gen_rtx_IF_THEN_ELSE (mode,
11320 copy_rtx (operands[2]),
11321 copy_rtx (operands[0]))));
11323 return 1; /* DONE */
11326 /* Swap, force into registers, or otherwise massage the two operands
11327 to an sse comparison with a mask result. Thus we differ a bit from
11328 ix86_prepare_fp_compare_args which expects to produce a flags result.
11330 The DEST operand exists to help determine whether to commute commutative
11331 operators. The POP0/POP1 operands are updated in place. The new
11332 comparison code is returned, or UNKNOWN if not implementable. */
11334 static enum rtx_code
11335 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11336 rtx *pop0, rtx *pop1)
11344 /* We have no LTGT as an operator. We could implement it with
11345 NE & ORDERED, but this requires an extra temporary. It's
11346 not clear that it's worth it. */
11353 /* These are supported directly. */
11360 /* For commutative operators, try to canonicalize the destination
11361 operand to be first in the comparison - this helps reload to
11362 avoid extra moves. */
11363 if (!dest || !rtx_equal_p (dest, *pop1))
11371 /* These are not supported directly. Swap the comparison operands
11372 to transform into something that is supported. */
11376 code = swap_condition (code);
11380 gcc_unreachable ();
11386 /* Detect conditional moves that exactly match min/max operational
11387 semantics. Note that this is IEEE safe, as long as we don't
11388 interchange the operands.
11390 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11391 and TRUE if the operation is successful and instructions are emitted. */
11394 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11395 rtx cmp_op1, rtx if_true, rtx if_false)
11397 enum machine_mode mode;
11403 else if (code == UNGE)
11406 if_true = if_false;
11412 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11414 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11419 mode = GET_MODE (dest);
11421 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11422 but MODE may be a vector mode and thus not appropriate. */
11423 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11425 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11428 if_true = force_reg (mode, if_true);
11429 v = gen_rtvec (2, if_true, if_false);
11430 tmp = gen_rtx_UNSPEC (mode, v, u);
11434 code = is_min ? SMIN : SMAX;
11435 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11438 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11442 /* Expand an sse vector comparison. Return the register with the result. */
11445 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11446 rtx op_true, rtx op_false)
11448 enum machine_mode mode = GET_MODE (dest);
11451 cmp_op0 = force_reg (mode, cmp_op0);
11452 if (!nonimmediate_operand (cmp_op1, mode))
11453 cmp_op1 = force_reg (mode, cmp_op1);
11456 || reg_overlap_mentioned_p (dest, op_true)
11457 || reg_overlap_mentioned_p (dest, op_false))
11458 dest = gen_reg_rtx (mode);
11460 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11461 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11466 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11467 operations. This is used for both scalar and vector conditional moves. */
11470 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11472 enum machine_mode mode = GET_MODE (dest);
11475 if (op_false == CONST0_RTX (mode))
11477 op_true = force_reg (mode, op_true);
11478 x = gen_rtx_AND (mode, cmp, op_true);
11479 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11481 else if (op_true == CONST0_RTX (mode))
11483 op_false = force_reg (mode, op_false);
11484 x = gen_rtx_NOT (mode, cmp);
11485 x = gen_rtx_AND (mode, x, op_false);
11486 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11490 op_true = force_reg (mode, op_true);
11491 op_false = force_reg (mode, op_false);
11493 t2 = gen_reg_rtx (mode);
11495 t3 = gen_reg_rtx (mode);
11499 x = gen_rtx_AND (mode, op_true, cmp);
11500 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11502 x = gen_rtx_NOT (mode, cmp);
11503 x = gen_rtx_AND (mode, x, op_false);
11504 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11506 x = gen_rtx_IOR (mode, t3, t2);
11507 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11511 /* Expand a floating-point conditional move. Return true if successful. */
11514 ix86_expand_fp_movcc (rtx operands[])
11516 enum machine_mode mode = GET_MODE (operands[0]);
11517 enum rtx_code code = GET_CODE (operands[1]);
11518 rtx tmp, compare_op, second_test, bypass_test;
11520 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11522 enum machine_mode cmode;
11524 /* Since we've no cmove for sse registers, don't force bad register
11525 allocation just to gain access to it. Deny movcc when the
11526 comparison mode doesn't match the move mode. */
11527 cmode = GET_MODE (ix86_compare_op0);
11528 if (cmode == VOIDmode)
11529 cmode = GET_MODE (ix86_compare_op1);
11533 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11535 &ix86_compare_op1);
11536 if (code == UNKNOWN)
11539 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11540 ix86_compare_op1, operands[2],
11544 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11545 ix86_compare_op1, operands[2], operands[3]);
11546 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11550 /* The floating point conditional move instructions don't directly
11551 support conditions resulting from a signed integer comparison. */
11553 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11555 /* The floating point conditional move instructions don't directly
11556 support signed integer comparisons. */
11558 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11560 gcc_assert (!second_test && !bypass_test);
11561 tmp = gen_reg_rtx (QImode);
11562 ix86_expand_setcc (code, tmp);
11564 ix86_compare_op0 = tmp;
11565 ix86_compare_op1 = const0_rtx;
11566 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11568 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11570 tmp = gen_reg_rtx (mode);
11571 emit_move_insn (tmp, operands[3]);
11574 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11576 tmp = gen_reg_rtx (mode);
11577 emit_move_insn (tmp, operands[2]);
11581 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11582 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11583 operands[2], operands[3])));
11585 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11586 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11587 operands[3], operands[0])));
11589 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11590 gen_rtx_IF_THEN_ELSE (mode, second_test,
11591 operands[2], operands[0])));
11596 /* Expand a floating-point vector conditional move; a vcond operation
11597 rather than a movcc operation. */
11600 ix86_expand_fp_vcond (rtx operands[])
11602 enum rtx_code code = GET_CODE (operands[3]);
11605 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11606 &operands[4], &operands[5]);
11607 if (code == UNKNOWN)
11610 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11611 operands[5], operands[1], operands[2]))
11614 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11615 operands[1], operands[2]);
11616 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11620 /* Expand a signed integral vector conditional move. */
11623 ix86_expand_int_vcond (rtx operands[])
11625 enum machine_mode mode = GET_MODE (operands[0]);
11626 enum rtx_code code = GET_CODE (operands[3]);
11627 bool negate = false;
11630 cop0 = operands[4];
11631 cop1 = operands[5];
11633 /* Canonicalize the comparison to EQ, GT, GTU. */
11644 code = reverse_condition (code);
11650 code = reverse_condition (code);
11656 code = swap_condition (code);
11657 x = cop0, cop0 = cop1, cop1 = x;
11661 gcc_unreachable ();
11664 /* Unsigned parallel compare is not supported by the hardware. Play some
11665 tricks to turn this into a signed comparison against 0. */
11668 cop0 = force_reg (mode, cop0);
11676 /* Perform a parallel modulo subtraction. */
11677 t1 = gen_reg_rtx (mode);
11678 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11680 /* Extract the original sign bit of op0. */
11681 mask = GEN_INT (-0x80000000);
11682 mask = gen_rtx_CONST_VECTOR (mode,
11683 gen_rtvec (4, mask, mask, mask, mask));
11684 mask = force_reg (mode, mask);
11685 t2 = gen_reg_rtx (mode);
11686 emit_insn (gen_andv4si3 (t2, cop0, mask));
11688 /* XOR it back into the result of the subtraction. This results
11689 in the sign bit set iff we saw unsigned underflow. */
11690 x = gen_reg_rtx (mode);
11691 emit_insn (gen_xorv4si3 (x, t1, t2));
11699 /* Perform a parallel unsigned saturating subtraction. */
11700 x = gen_reg_rtx (mode);
11701 emit_insn (gen_rtx_SET (VOIDmode, x,
11702 gen_rtx_US_MINUS (mode, cop0, cop1)));
11709 gcc_unreachable ();
11713 cop1 = CONST0_RTX (mode);
11716 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11717 operands[1+negate], operands[2-negate]);
11719 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11720 operands[2-negate]);
11724 /* Expand conditional increment or decrement using adb/sbb instructions.
11725 The default case using setcc followed by the conditional move can be
11726 done by generic code. */
11728 ix86_expand_int_addcc (rtx operands[])
11730 enum rtx_code code = GET_CODE (operands[1]);
11732 rtx val = const0_rtx;
11733 bool fpcmp = false;
11734 enum machine_mode mode = GET_MODE (operands[0]);
11736 if (operands[3] != const1_rtx
11737 && operands[3] != constm1_rtx)
11739 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11740 ix86_compare_op1, &compare_op))
11742 code = GET_CODE (compare_op);
11744 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11745 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11748 code = ix86_fp_compare_code_to_integer (code);
11755 PUT_CODE (compare_op,
11756 reverse_condition_maybe_unordered
11757 (GET_CODE (compare_op)));
11759 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11761 PUT_MODE (compare_op, mode);
11763 /* Construct either adc or sbb insn. */
11764 if ((code == LTU) == (operands[3] == constm1_rtx))
11766 switch (GET_MODE (operands[0]))
11769 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11772 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11775 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11778 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11781 gcc_unreachable ();
11786 switch (GET_MODE (operands[0]))
11789 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11792 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11795 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11798 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11801 gcc_unreachable ();
11804 return 1; /* DONE */
11808 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11809 works for floating pointer parameters and nonoffsetable memories.
11810 For pushes, it returns just stack offsets; the values will be saved
11811 in the right order. Maximally three parts are generated. */
11814 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11819 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11821 size = (GET_MODE_SIZE (mode) + 4) / 8;
11823 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11824 gcc_assert (size >= 2 && size <= 3);
11826 /* Optimize constant pool reference to immediates. This is used by fp
11827 moves, that force all constants to memory to allow combining. */
11828 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11830 rtx tmp = maybe_get_pool_constant (operand);
11835 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11837 /* The only non-offsetable memories we handle are pushes. */
11838 int ok = push_operand (operand, VOIDmode);
11842 operand = copy_rtx (operand);
11843 PUT_MODE (operand, Pmode);
11844 parts[0] = parts[1] = parts[2] = operand;
11848 if (GET_CODE (operand) == CONST_VECTOR)
11850 enum machine_mode imode = int_mode_for_mode (mode);
11851 /* Caution: if we looked through a constant pool memory above,
11852 the operand may actually have a different mode now. That's
11853 ok, since we want to pun this all the way back to an integer. */
11854 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11855 gcc_assert (operand != NULL);
11861 if (mode == DImode)
11862 split_di (&operand, 1, &parts[0], &parts[1]);
11865 if (REG_P (operand))
11867 gcc_assert (reload_completed);
11868 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11869 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11871 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11873 else if (offsettable_memref_p (operand))
11875 operand = adjust_address (operand, SImode, 0);
11876 parts[0] = operand;
11877 parts[1] = adjust_address (operand, SImode, 4);
11879 parts[2] = adjust_address (operand, SImode, 8);
11881 else if (GET_CODE (operand) == CONST_DOUBLE)
11886 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11890 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11891 parts[2] = gen_int_mode (l[2], SImode);
11894 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11897 gcc_unreachable ();
11899 parts[1] = gen_int_mode (l[1], SImode);
11900 parts[0] = gen_int_mode (l[0], SImode);
11903 gcc_unreachable ();
11908 if (mode == TImode)
11909 split_ti (&operand, 1, &parts[0], &parts[1]);
11910 if (mode == XFmode || mode == TFmode)
11912 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11913 if (REG_P (operand))
11915 gcc_assert (reload_completed);
11916 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11917 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11919 else if (offsettable_memref_p (operand))
11921 operand = adjust_address (operand, DImode, 0);
11922 parts[0] = operand;
11923 parts[1] = adjust_address (operand, upper_mode, 8);
11925 else if (GET_CODE (operand) == CONST_DOUBLE)
11930 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11931 real_to_target (l, &r, mode);
11933 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11934 if (HOST_BITS_PER_WIDE_INT >= 64)
11937 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11938 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11941 parts[0] = immed_double_const (l[0], l[1], DImode);
11943 if (upper_mode == SImode)
11944 parts[1] = gen_int_mode (l[2], SImode);
11945 else if (HOST_BITS_PER_WIDE_INT >= 64)
11948 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11949 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11952 parts[1] = immed_double_const (l[2], l[3], DImode);
11955 gcc_unreachable ();
11962 /* Emit insns to perform a move or push of DI, DF, and XF values.
11963 Return false when normal moves are needed; true when all required
11964 insns have been emitted. Operands 2-4 contain the input values
11965 int the correct order; operands 5-7 contain the output values. */
11968 ix86_split_long_move (rtx operands[])
11973 int collisions = 0;
11974 enum machine_mode mode = GET_MODE (operands[0]);
11976 /* The DFmode expanders may ask us to move double.
11977 For 64bit target this is single move. By hiding the fact
11978 here we simplify i386.md splitters. */
11979 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11981 /* Optimize constant pool reference to immediates. This is used by
11982 fp moves, that force all constants to memory to allow combining. */
11984 if (GET_CODE (operands[1]) == MEM
11985 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11986 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11987 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11988 if (push_operand (operands[0], VOIDmode))
11990 operands[0] = copy_rtx (operands[0]);
11991 PUT_MODE (operands[0], Pmode);
11994 operands[0] = gen_lowpart (DImode, operands[0]);
11995 operands[1] = gen_lowpart (DImode, operands[1]);
11996 emit_move_insn (operands[0], operands[1]);
12000 /* The only non-offsettable memory we handle is push. */
12001 if (push_operand (operands[0], VOIDmode))
12004 gcc_assert (GET_CODE (operands[0]) != MEM
12005 || offsettable_memref_p (operands[0]));
12007 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12008 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12010 /* When emitting push, take care for source operands on the stack. */
12011 if (push && GET_CODE (operands[1]) == MEM
12012 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12015 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12016 XEXP (part[1][2], 0));
12017 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12018 XEXP (part[1][1], 0));
12021 /* We need to do copy in the right order in case an address register
12022 of the source overlaps the destination. */
12023 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12025 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12027 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12030 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12033 /* Collision in the middle part can be handled by reordering. */
12034 if (collisions == 1 && nparts == 3
12035 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12038 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12039 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12042 /* If there are more collisions, we can't handle it by reordering.
12043 Do an lea to the last part and use only one colliding move. */
12044 else if (collisions > 1)
12050 base = part[0][nparts - 1];
12052 /* Handle the case when the last part isn't valid for lea.
12053 Happens in 64-bit mode storing the 12-byte XFmode. */
12054 if (GET_MODE (base) != Pmode)
12055 base = gen_rtx_REG (Pmode, REGNO (base));
12057 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12058 part[1][0] = replace_equiv_address (part[1][0], base);
12059 part[1][1] = replace_equiv_address (part[1][1],
12060 plus_constant (base, UNITS_PER_WORD));
12062 part[1][2] = replace_equiv_address (part[1][2],
12063 plus_constant (base, 8));
12073 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12074 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12075 emit_move_insn (part[0][2], part[1][2]);
12080 /* In 64bit mode we don't have 32bit push available. In case this is
12081 register, it is OK - we will just use larger counterpart. We also
12082 retype memory - these comes from attempt to avoid REX prefix on
12083 moving of second half of TFmode value. */
12084 if (GET_MODE (part[1][1]) == SImode)
12086 switch (GET_CODE (part[1][1]))
12089 part[1][1] = adjust_address (part[1][1], DImode, 0);
12093 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12097 gcc_unreachable ();
12100 if (GET_MODE (part[1][0]) == SImode)
12101 part[1][0] = part[1][1];
12104 emit_move_insn (part[0][1], part[1][1]);
12105 emit_move_insn (part[0][0], part[1][0]);
12109 /* Choose correct order to not overwrite the source before it is copied. */
12110 if ((REG_P (part[0][0])
12111 && REG_P (part[1][1])
12112 && (REGNO (part[0][0]) == REGNO (part[1][1])
12114 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12116 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12120 operands[2] = part[0][2];
12121 operands[3] = part[0][1];
12122 operands[4] = part[0][0];
12123 operands[5] = part[1][2];
12124 operands[6] = part[1][1];
12125 operands[7] = part[1][0];
12129 operands[2] = part[0][1];
12130 operands[3] = part[0][0];
12131 operands[5] = part[1][1];
12132 operands[6] = part[1][0];
12139 operands[2] = part[0][0];
12140 operands[3] = part[0][1];
12141 operands[4] = part[0][2];
12142 operands[5] = part[1][0];
12143 operands[6] = part[1][1];
12144 operands[7] = part[1][2];
12148 operands[2] = part[0][0];
12149 operands[3] = part[0][1];
12150 operands[5] = part[1][0];
12151 operands[6] = part[1][1];
12155 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12158 if (GET_CODE (operands[5]) == CONST_INT
12159 && operands[5] != const0_rtx
12160 && REG_P (operands[2]))
12162 if (GET_CODE (operands[6]) == CONST_INT
12163 && INTVAL (operands[6]) == INTVAL (operands[5]))
12164 operands[6] = operands[2];
12167 && GET_CODE (operands[7]) == CONST_INT
12168 && INTVAL (operands[7]) == INTVAL (operands[5]))
12169 operands[7] = operands[2];
12173 && GET_CODE (operands[6]) == CONST_INT
12174 && operands[6] != const0_rtx
12175 && REG_P (operands[3])
12176 && GET_CODE (operands[7]) == CONST_INT
12177 && INTVAL (operands[7]) == INTVAL (operands[6]))
12178 operands[7] = operands[3];
12181 emit_move_insn (operands[2], operands[5]);
12182 emit_move_insn (operands[3], operands[6]);
12184 emit_move_insn (operands[4], operands[7]);
12189 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12190 left shift by a constant, either using a single shift or
12191 a sequence of add instructions. */
12194 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12198 emit_insn ((mode == DImode
12200 : gen_adddi3) (operand, operand, operand));
12202 else if (!optimize_size
12203 && count * ix86_cost->add <= ix86_cost->shift_const)
12206 for (i=0; i<count; i++)
12208 emit_insn ((mode == DImode
12210 : gen_adddi3) (operand, operand, operand));
12214 emit_insn ((mode == DImode
12216 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12220 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12222 rtx low[2], high[2];
12224 const int single_width = mode == DImode ? 32 : 64;
12226 if (GET_CODE (operands[2]) == CONST_INT)
12228 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12229 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12231 if (count >= single_width)
12233 emit_move_insn (high[0], low[1]);
12234 emit_move_insn (low[0], const0_rtx);
12236 if (count > single_width)
12237 ix86_expand_ashl_const (high[0], count - single_width, mode);
12241 if (!rtx_equal_p (operands[0], operands[1]))
12242 emit_move_insn (operands[0], operands[1]);
12243 emit_insn ((mode == DImode
12245 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12246 ix86_expand_ashl_const (low[0], count, mode);
12251 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12253 if (operands[1] == const1_rtx)
12255 /* Assuming we've chosen a QImode capable registers, then 1 << N
12256 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12257 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12259 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12261 ix86_expand_clear (low[0]);
12262 ix86_expand_clear (high[0]);
12263 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12265 d = gen_lowpart (QImode, low[0]);
12266 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12267 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12268 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12270 d = gen_lowpart (QImode, high[0]);
12271 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12272 s = gen_rtx_NE (QImode, flags, const0_rtx);
12273 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12276 /* Otherwise, we can get the same results by manually performing
12277 a bit extract operation on bit 5/6, and then performing the two
12278 shifts. The two methods of getting 0/1 into low/high are exactly
12279 the same size. Avoiding the shift in the bit extract case helps
12280 pentium4 a bit; no one else seems to care much either way. */
12285 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12286 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12288 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12289 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12291 emit_insn ((mode == DImode
12293 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12294 emit_insn ((mode == DImode
12296 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12297 emit_move_insn (low[0], high[0]);
12298 emit_insn ((mode == DImode
12300 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12303 emit_insn ((mode == DImode
12305 : gen_ashldi3) (low[0], low[0], operands[2]));
12306 emit_insn ((mode == DImode
12308 : gen_ashldi3) (high[0], high[0], operands[2]));
12312 if (operands[1] == constm1_rtx)
12314 /* For -1 << N, we can avoid the shld instruction, because we
12315 know that we're shifting 0...31/63 ones into a -1. */
12316 emit_move_insn (low[0], constm1_rtx);
12318 emit_move_insn (high[0], low[0]);
12320 emit_move_insn (high[0], constm1_rtx);
12324 if (!rtx_equal_p (operands[0], operands[1]))
12325 emit_move_insn (operands[0], operands[1]);
12327 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12328 emit_insn ((mode == DImode
12330 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12333 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12335 if (TARGET_CMOVE && scratch)
12337 ix86_expand_clear (scratch);
12338 emit_insn ((mode == DImode
12339 ? gen_x86_shift_adj_1
12340 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12343 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12347 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12349 rtx low[2], high[2];
12351 const int single_width = mode == DImode ? 32 : 64;
12353 if (GET_CODE (operands[2]) == CONST_INT)
12355 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12356 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12358 if (count == single_width * 2 - 1)
12360 emit_move_insn (high[0], high[1]);
12361 emit_insn ((mode == DImode
12363 : gen_ashrdi3) (high[0], high[0],
12364 GEN_INT (single_width - 1)));
12365 emit_move_insn (low[0], high[0]);
12368 else if (count >= single_width)
12370 emit_move_insn (low[0], high[1]);
12371 emit_move_insn (high[0], low[0]);
12372 emit_insn ((mode == DImode
12374 : gen_ashrdi3) (high[0], high[0],
12375 GEN_INT (single_width - 1)));
12376 if (count > single_width)
12377 emit_insn ((mode == DImode
12379 : gen_ashrdi3) (low[0], low[0],
12380 GEN_INT (count - single_width)));
12384 if (!rtx_equal_p (operands[0], operands[1]))
12385 emit_move_insn (operands[0], operands[1]);
12386 emit_insn ((mode == DImode
12388 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12389 emit_insn ((mode == DImode
12391 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12396 if (!rtx_equal_p (operands[0], operands[1]))
12397 emit_move_insn (operands[0], operands[1]);
12399 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12401 emit_insn ((mode == DImode
12403 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12404 emit_insn ((mode == DImode
12406 : gen_ashrdi3) (high[0], high[0], operands[2]));
12408 if (TARGET_CMOVE && scratch)
12410 emit_move_insn (scratch, high[0]);
12411 emit_insn ((mode == DImode
12413 : gen_ashrdi3) (scratch, scratch,
12414 GEN_INT (single_width - 1)));
12415 emit_insn ((mode == DImode
12416 ? gen_x86_shift_adj_1
12417 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12421 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12426 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12428 rtx low[2], high[2];
12430 const int single_width = mode == DImode ? 32 : 64;
12432 if (GET_CODE (operands[2]) == CONST_INT)
12434 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12435 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12437 if (count >= single_width)
12439 emit_move_insn (low[0], high[1]);
12440 ix86_expand_clear (high[0]);
12442 if (count > single_width)
12443 emit_insn ((mode == DImode
12445 : gen_lshrdi3) (low[0], low[0],
12446 GEN_INT (count - single_width)));
12450 if (!rtx_equal_p (operands[0], operands[1]))
12451 emit_move_insn (operands[0], operands[1]);
12452 emit_insn ((mode == DImode
12454 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12455 emit_insn ((mode == DImode
12457 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12462 if (!rtx_equal_p (operands[0], operands[1]))
12463 emit_move_insn (operands[0], operands[1]);
12465 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12467 emit_insn ((mode == DImode
12469 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12470 emit_insn ((mode == DImode
12472 : gen_lshrdi3) (high[0], high[0], operands[2]));
12474 /* Heh. By reversing the arguments, we can reuse this pattern. */
12475 if (TARGET_CMOVE && scratch)
12477 ix86_expand_clear (scratch);
12478 emit_insn ((mode == DImode
12479 ? gen_x86_shift_adj_1
12480 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12484 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12488 /* Helper function for the string operations below. Dest VARIABLE whether
12489 it is aligned to VALUE bytes. If true, jump to the label. */
12491 ix86_expand_aligntest (rtx variable, int value)
12493 rtx label = gen_label_rtx ();
12494 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12495 if (GET_MODE (variable) == DImode)
12496 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12498 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12499 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12504 /* Adjust COUNTER by the VALUE. */
12506 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12508 if (GET_MODE (countreg) == DImode)
12509 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12511 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12514 /* Zero extend possibly SImode EXP to Pmode register. */
12516 ix86_zero_extend_to_Pmode (rtx exp)
12519 if (GET_MODE (exp) == VOIDmode)
12520 return force_reg (Pmode, exp);
12521 if (GET_MODE (exp) == Pmode)
12522 return copy_to_mode_reg (Pmode, exp);
12523 r = gen_reg_rtx (Pmode);
12524 emit_insn (gen_zero_extendsidi2 (r, exp));
12528 /* Expand string move (memcpy) operation. Use i386 string operations when
12529 profitable. expand_clrmem contains similar code. */
12531 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12533 rtx srcreg, destreg, countreg, srcexp, destexp;
12534 enum machine_mode counter_mode;
12535 HOST_WIDE_INT align = 0;
12536 unsigned HOST_WIDE_INT count = 0;
12538 if (GET_CODE (align_exp) == CONST_INT)
12539 align = INTVAL (align_exp);
12541 /* Can't use any of this if the user has appropriated esi or edi. */
12542 if (global_regs[4] || global_regs[5])
12545 /* This simple hack avoids all inlining code and simplifies code below. */
12546 if (!TARGET_ALIGN_STRINGOPS)
12549 if (GET_CODE (count_exp) == CONST_INT)
12551 count = INTVAL (count_exp);
12552 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12556 /* Figure out proper mode for counter. For 32bits it is always SImode,
12557 for 64bits use SImode when possible, otherwise DImode.
12558 Set count to number of bytes copied when known at compile time. */
12560 || GET_MODE (count_exp) == SImode
12561 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12562 counter_mode = SImode;
12564 counter_mode = DImode;
12566 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12568 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12569 if (destreg != XEXP (dst, 0))
12570 dst = replace_equiv_address_nv (dst, destreg);
12571 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12572 if (srcreg != XEXP (src, 0))
12573 src = replace_equiv_address_nv (src, srcreg);
12575 /* When optimizing for size emit simple rep ; movsb instruction for
12576 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12577 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12578 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12579 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12580 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12581 known to be zero or not. The rep; movsb sequence causes higher
12582 register pressure though, so take that into account. */
12584 if ((!optimize || optimize_size)
12589 || (count & 3) + count / 4 > 6))))
12591 emit_insn (gen_cld ());
12592 countreg = ix86_zero_extend_to_Pmode (count_exp);
12593 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12594 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12595 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12599 /* For constant aligned (or small unaligned) copies use rep movsl
12600 followed by code copying the rest. For PentiumPro ensure 8 byte
12601 alignment to allow rep movsl acceleration. */
12603 else if (count != 0
12605 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12606 || optimize_size || count < (unsigned int) 64))
12608 unsigned HOST_WIDE_INT offset = 0;
12609 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12610 rtx srcmem, dstmem;
12612 emit_insn (gen_cld ());
12613 if (count & ~(size - 1))
12615 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12617 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12619 while (offset < (count & ~(size - 1)))
12621 srcmem = adjust_automodify_address_nv (src, movs_mode,
12623 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12625 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12631 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12632 & (TARGET_64BIT ? -1 : 0x3fffffff));
12633 countreg = copy_to_mode_reg (counter_mode, countreg);
12634 countreg = ix86_zero_extend_to_Pmode (countreg);
12636 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12637 GEN_INT (size == 4 ? 2 : 3));
12638 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12639 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12641 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12642 countreg, destexp, srcexp));
12643 offset = count & ~(size - 1);
12646 if (size == 8 && (count & 0x04))
12648 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12650 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12652 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12657 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12659 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12661 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12666 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12668 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12670 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12673 /* The generic code based on the glibc implementation:
12674 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12675 allowing accelerated copying there)
12676 - copy the data using rep movsl
12677 - copy the rest. */
12682 rtx srcmem, dstmem;
12683 int desired_alignment = (TARGET_PENTIUMPRO
12684 && (count == 0 || count >= (unsigned int) 260)
12685 ? 8 : UNITS_PER_WORD);
12686 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12687 dst = change_address (dst, BLKmode, destreg);
12688 src = change_address (src, BLKmode, srcreg);
12690 /* In case we don't know anything about the alignment, default to
12691 library version, since it is usually equally fast and result in
12694 Also emit call when we know that the count is large and call overhead
12695 will not be important. */
12696 if (!TARGET_INLINE_ALL_STRINGOPS
12697 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12700 if (TARGET_SINGLE_STRINGOP)
12701 emit_insn (gen_cld ());
12703 countreg2 = gen_reg_rtx (Pmode);
12704 countreg = copy_to_mode_reg (counter_mode, count_exp);
12706 /* We don't use loops to align destination and to copy parts smaller
12707 than 4 bytes, because gcc is able to optimize such code better (in
12708 the case the destination or the count really is aligned, gcc is often
12709 able to predict the branches) and also it is friendlier to the
12710 hardware branch prediction.
12712 Using loops is beneficial for generic case, because we can
12713 handle small counts using the loops. Many CPUs (such as Athlon)
12714 have large REP prefix setup costs.
12716 This is quite costly. Maybe we can revisit this decision later or
12717 add some customizability to this code. */
12719 if (count == 0 && align < desired_alignment)
12721 label = gen_label_rtx ();
12722 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12723 LEU, 0, counter_mode, 1, label);
12727 rtx label = ix86_expand_aligntest (destreg, 1);
12728 srcmem = change_address (src, QImode, srcreg);
12729 dstmem = change_address (dst, QImode, destreg);
12730 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12731 ix86_adjust_counter (countreg, 1);
12732 emit_label (label);
12733 LABEL_NUSES (label) = 1;
12737 rtx label = ix86_expand_aligntest (destreg, 2);
12738 srcmem = change_address (src, HImode, srcreg);
12739 dstmem = change_address (dst, HImode, destreg);
12740 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12741 ix86_adjust_counter (countreg, 2);
12742 emit_label (label);
12743 LABEL_NUSES (label) = 1;
12745 if (align <= 4 && desired_alignment > 4)
12747 rtx label = ix86_expand_aligntest (destreg, 4);
12748 srcmem = change_address (src, SImode, srcreg);
12749 dstmem = change_address (dst, SImode, destreg);
12750 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12751 ix86_adjust_counter (countreg, 4);
12752 emit_label (label);
12753 LABEL_NUSES (label) = 1;
12756 if (label && desired_alignment > 4 && !TARGET_64BIT)
12758 emit_label (label);
12759 LABEL_NUSES (label) = 1;
12762 if (!TARGET_SINGLE_STRINGOP)
12763 emit_insn (gen_cld ());
12766 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12768 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12772 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12773 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12775 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12776 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12777 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12778 countreg2, destexp, srcexp));
12782 emit_label (label);
12783 LABEL_NUSES (label) = 1;
12785 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12787 srcmem = change_address (src, SImode, srcreg);
12788 dstmem = change_address (dst, SImode, destreg);
12789 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12791 if ((align <= 4 || count == 0) && TARGET_64BIT)
12793 rtx label = ix86_expand_aligntest (countreg, 4);
12794 srcmem = change_address (src, SImode, srcreg);
12795 dstmem = change_address (dst, SImode, destreg);
12796 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12797 emit_label (label);
12798 LABEL_NUSES (label) = 1;
12800 if (align > 2 && count != 0 && (count & 2))
12802 srcmem = change_address (src, HImode, srcreg);
12803 dstmem = change_address (dst, HImode, destreg);
12804 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12806 if (align <= 2 || count == 0)
12808 rtx label = ix86_expand_aligntest (countreg, 2);
12809 srcmem = change_address (src, HImode, srcreg);
12810 dstmem = change_address (dst, HImode, destreg);
12811 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12812 emit_label (label);
12813 LABEL_NUSES (label) = 1;
12815 if (align > 1 && count != 0 && (count & 1))
12817 srcmem = change_address (src, QImode, srcreg);
12818 dstmem = change_address (dst, QImode, destreg);
12819 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12821 if (align <= 1 || count == 0)
12823 rtx label = ix86_expand_aligntest (countreg, 1);
12824 srcmem = change_address (src, QImode, srcreg);
12825 dstmem = change_address (dst, QImode, destreg);
12826 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12827 emit_label (label);
12828 LABEL_NUSES (label) = 1;
12835 /* Expand string clear operation (bzero). Use i386 string operations when
12836 profitable. expand_movmem contains similar code. */
12838 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12840 rtx destreg, zeroreg, countreg, destexp;
12841 enum machine_mode counter_mode;
12842 HOST_WIDE_INT align = 0;
12843 unsigned HOST_WIDE_INT count = 0;
12845 if (GET_CODE (align_exp) == CONST_INT)
12846 align = INTVAL (align_exp);
12848 /* Can't use any of this if the user has appropriated esi. */
12849 if (global_regs[4])
12852 /* This simple hack avoids all inlining code and simplifies code below. */
12853 if (!TARGET_ALIGN_STRINGOPS)
12856 if (GET_CODE (count_exp) == CONST_INT)
12858 count = INTVAL (count_exp);
12859 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12862 /* Figure out proper mode for counter. For 32bits it is always SImode,
12863 for 64bits use SImode when possible, otherwise DImode.
12864 Set count to number of bytes copied when known at compile time. */
12866 || GET_MODE (count_exp) == SImode
12867 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12868 counter_mode = SImode;
12870 counter_mode = DImode;
12872 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12873 if (destreg != XEXP (dst, 0))
12874 dst = replace_equiv_address_nv (dst, destreg);
12877 /* When optimizing for size emit simple rep ; movsb instruction for
12878 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12879 sequence is 7 bytes long, so if optimizing for size and count is
12880 small enough that some stosl, stosw and stosb instructions without
12881 rep are shorter, fall back into the next if. */
12883 if ((!optimize || optimize_size)
12886 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12888 emit_insn (gen_cld ());
12890 countreg = ix86_zero_extend_to_Pmode (count_exp);
12891 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12892 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12893 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12895 else if (count != 0
12897 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12898 || optimize_size || count < (unsigned int) 64))
12900 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12901 unsigned HOST_WIDE_INT offset = 0;
12903 emit_insn (gen_cld ());
12905 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12906 if (count & ~(size - 1))
12908 unsigned HOST_WIDE_INT repcount;
12909 unsigned int max_nonrep;
12911 repcount = count >> (size == 4 ? 2 : 3);
12913 repcount &= 0x3fffffff;
12915 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12916 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12917 bytes. In both cases the latter seems to be faster for small
12919 max_nonrep = size == 4 ? 7 : 4;
12920 if (!optimize_size)
12923 case PROCESSOR_PENTIUM4:
12924 case PROCESSOR_NOCONA:
12931 if (repcount <= max_nonrep)
12932 while (repcount-- > 0)
12934 rtx mem = adjust_automodify_address_nv (dst,
12935 GET_MODE (zeroreg),
12937 emit_insn (gen_strset (destreg, mem, zeroreg));
12942 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12943 countreg = ix86_zero_extend_to_Pmode (countreg);
12944 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12945 GEN_INT (size == 4 ? 2 : 3));
12946 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12947 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12949 offset = count & ~(size - 1);
12952 if (size == 8 && (count & 0x04))
12954 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12956 emit_insn (gen_strset (destreg, mem,
12957 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12962 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12964 emit_insn (gen_strset (destreg, mem,
12965 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12970 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12972 emit_insn (gen_strset (destreg, mem,
12973 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12980 /* Compute desired alignment of the string operation. */
12981 int desired_alignment = (TARGET_PENTIUMPRO
12982 && (count == 0 || count >= (unsigned int) 260)
12983 ? 8 : UNITS_PER_WORD);
12985 /* In case we don't know anything about the alignment, default to
12986 library version, since it is usually equally fast and result in
12989 Also emit call when we know that the count is large and call overhead
12990 will not be important. */
12991 if (!TARGET_INLINE_ALL_STRINGOPS
12992 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12995 if (TARGET_SINGLE_STRINGOP)
12996 emit_insn (gen_cld ());
12998 countreg2 = gen_reg_rtx (Pmode);
12999 countreg = copy_to_mode_reg (counter_mode, count_exp);
13000 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
13001 /* Get rid of MEM_OFFSET, it won't be accurate. */
13002 dst = change_address (dst, BLKmode, destreg);
13004 if (count == 0 && align < desired_alignment)
13006 label = gen_label_rtx ();
13007 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
13008 LEU, 0, counter_mode, 1, label);
13012 rtx label = ix86_expand_aligntest (destreg, 1);
13013 emit_insn (gen_strset (destreg, dst,
13014 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13015 ix86_adjust_counter (countreg, 1);
13016 emit_label (label);
13017 LABEL_NUSES (label) = 1;
13021 rtx label = ix86_expand_aligntest (destreg, 2);
13022 emit_insn (gen_strset (destreg, dst,
13023 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13024 ix86_adjust_counter (countreg, 2);
13025 emit_label (label);
13026 LABEL_NUSES (label) = 1;
13028 if (align <= 4 && desired_alignment > 4)
13030 rtx label = ix86_expand_aligntest (destreg, 4);
13031 emit_insn (gen_strset (destreg, dst,
13033 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
13035 ix86_adjust_counter (countreg, 4);
13036 emit_label (label);
13037 LABEL_NUSES (label) = 1;
13040 if (label && desired_alignment > 4 && !TARGET_64BIT)
13042 emit_label (label);
13043 LABEL_NUSES (label) = 1;
13047 if (!TARGET_SINGLE_STRINGOP)
13048 emit_insn (gen_cld ());
13051 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
13053 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
13057 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
13058 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
13060 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
13061 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
13065 emit_label (label);
13066 LABEL_NUSES (label) = 1;
13069 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
13070 emit_insn (gen_strset (destreg, dst,
13071 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13072 if (TARGET_64BIT && (align <= 4 || count == 0))
13074 rtx label = ix86_expand_aligntest (countreg, 4);
13075 emit_insn (gen_strset (destreg, dst,
13076 gen_rtx_SUBREG (SImode, zeroreg, 0)));
13077 emit_label (label);
13078 LABEL_NUSES (label) = 1;
13080 if (align > 2 && count != 0 && (count & 2))
13081 emit_insn (gen_strset (destreg, dst,
13082 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13083 if (align <= 2 || count == 0)
13085 rtx label = ix86_expand_aligntest (countreg, 2);
13086 emit_insn (gen_strset (destreg, dst,
13087 gen_rtx_SUBREG (HImode, zeroreg, 0)));
13088 emit_label (label);
13089 LABEL_NUSES (label) = 1;
13091 if (align > 1 && count != 0 && (count & 1))
13092 emit_insn (gen_strset (destreg, dst,
13093 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13094 if (align <= 1 || count == 0)
13096 rtx label = ix86_expand_aligntest (countreg, 1);
13097 emit_insn (gen_strset (destreg, dst,
13098 gen_rtx_SUBREG (QImode, zeroreg, 0)));
13099 emit_label (label);
13100 LABEL_NUSES (label) = 1;
13106 /* Expand strlen. */
13108 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
13110 rtx addr, scratch1, scratch2, scratch3, scratch4;
13112 /* The generic case of strlen expander is long. Avoid it's
13113 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
13115 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13116 && !TARGET_INLINE_ALL_STRINGOPS
13118 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
13121 addr = force_reg (Pmode, XEXP (src, 0));
13122 scratch1 = gen_reg_rtx (Pmode);
13124 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
13127 /* Well it seems that some optimizer does not combine a call like
13128 foo(strlen(bar), strlen(bar));
13129 when the move and the subtraction is done here. It does calculate
13130 the length just once when these instructions are done inside of
13131 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
13132 often used and I use one fewer register for the lifetime of
13133 output_strlen_unroll() this is better. */
13135 emit_move_insn (out, addr);
13137 ix86_expand_strlensi_unroll_1 (out, src, align);
13139 /* strlensi_unroll_1 returns the address of the zero at the end of
13140 the string, like memchr(), so compute the length by subtracting
13141 the start address. */
13143 emit_insn (gen_subdi3 (out, out, addr));
13145 emit_insn (gen_subsi3 (out, out, addr));
13150 scratch2 = gen_reg_rtx (Pmode);
13151 scratch3 = gen_reg_rtx (Pmode);
13152 scratch4 = force_reg (Pmode, constm1_rtx);
13154 emit_move_insn (scratch3, addr);
13155 eoschar = force_reg (QImode, eoschar);
13157 emit_insn (gen_cld ());
13158 src = replace_equiv_address_nv (src, scratch3);
13160 /* If .md starts supporting :P, this can be done in .md. */
13161 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
13162 scratch4), UNSPEC_SCAS);
13163 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
13166 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
13167 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
13171 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
13172 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
13178 /* Expand the appropriate insns for doing strlen if not just doing
13181 out = result, initialized with the start address
13182 align_rtx = alignment of the address.
13183 scratch = scratch register, initialized with the startaddress when
13184 not aligned, otherwise undefined
13186 This is just the body. It needs the initializations mentioned above and
13187 some address computing at the end. These things are done in i386.md. */
13190 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
13194 rtx align_2_label = NULL_RTX;
13195 rtx align_3_label = NULL_RTX;
13196 rtx align_4_label = gen_label_rtx ();
13197 rtx end_0_label = gen_label_rtx ();
13199 rtx tmpreg = gen_reg_rtx (SImode);
13200 rtx scratch = gen_reg_rtx (SImode);
13204 if (GET_CODE (align_rtx) == CONST_INT)
13205 align = INTVAL (align_rtx);
13207 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
13209 /* Is there a known alignment and is it less than 4? */
13212 rtx scratch1 = gen_reg_rtx (Pmode);
13213 emit_move_insn (scratch1, out);
13214 /* Is there a known alignment and is it not 2? */
13217 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
13218 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
13220 /* Leave just the 3 lower bits. */
13221 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
13222 NULL_RTX, 0, OPTAB_WIDEN);
13224 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13225 Pmode, 1, align_4_label);
13226 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
13227 Pmode, 1, align_2_label);
13228 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
13229 Pmode, 1, align_3_label);
13233 /* Since the alignment is 2, we have to check 2 or 0 bytes;
13234 check if is aligned to 4 - byte. */
13236 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
13237 NULL_RTX, 0, OPTAB_WIDEN);
13239 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
13240 Pmode, 1, align_4_label);
13243 mem = change_address (src, QImode, out);
13245 /* Now compare the bytes. */
13247 /* Compare the first n unaligned byte on a byte per byte basis. */
13248 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
13249 QImode, 1, end_0_label);
13251 /* Increment the address. */
13253 emit_insn (gen_adddi3 (out, out, const1_rtx));
13255 emit_insn (gen_addsi3 (out, out, const1_rtx));
13257 /* Not needed with an alignment of 2 */
13260 emit_label (align_2_label);
13262 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13266 emit_insn (gen_adddi3 (out, out, const1_rtx));
13268 emit_insn (gen_addsi3 (out, out, const1_rtx));
13270 emit_label (align_3_label);
13273 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13277 emit_insn (gen_adddi3 (out, out, const1_rtx));
13279 emit_insn (gen_addsi3 (out, out, const1_rtx));
13282 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13283 align this loop. It gives only huge programs, but does not help to
13285 emit_label (align_4_label);
13287 mem = change_address (src, SImode, out);
13288 emit_move_insn (scratch, mem);
13290 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13292 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13294 /* This formula yields a nonzero result iff one of the bytes is zero.
13295 This saves three branches inside loop and many cycles. */
13297 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13298 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13299 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13300 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13301 gen_int_mode (0x80808080, SImode)));
13302 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13307 rtx reg = gen_reg_rtx (SImode);
13308 rtx reg2 = gen_reg_rtx (Pmode);
13309 emit_move_insn (reg, tmpreg);
13310 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13312 /* If zero is not in the first two bytes, move two bytes forward. */
13313 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13314 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13315 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13316 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13317 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13320 /* Emit lea manually to avoid clobbering of flags. */
13321 emit_insn (gen_rtx_SET (SImode, reg2,
13322 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13324 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13325 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13326 emit_insn (gen_rtx_SET (VOIDmode, out,
13327 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13334 rtx end_2_label = gen_label_rtx ();
13335 /* Is zero in the first two bytes? */
13337 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13338 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13339 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13340 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13341 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13343 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13344 JUMP_LABEL (tmp) = end_2_label;
13346 /* Not in the first two. Move two bytes forward. */
13347 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13349 emit_insn (gen_adddi3 (out, out, const2_rtx));
13351 emit_insn (gen_addsi3 (out, out, const2_rtx));
13353 emit_label (end_2_label);
13357 /* Avoid branch in fixing the byte. */
13358 tmpreg = gen_lowpart (QImode, tmpreg);
13359 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13360 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13362 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13364 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13366 emit_label (end_0_label);
13370 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13371 rtx callarg2 ATTRIBUTE_UNUSED,
13372 rtx pop, int sibcall)
13374 rtx use = NULL, call;
13376 if (pop == const0_rtx)
13378 gcc_assert (!TARGET_64BIT || !pop);
13380 if (TARGET_MACHO && !TARGET_64BIT)
13383 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13384 fnaddr = machopic_indirect_call_target (fnaddr);
13389 /* Static functions and indirect calls don't need the pic register. */
13390 if (! TARGET_64BIT && flag_pic
13391 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13392 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13393 use_reg (&use, pic_offset_table_rtx);
13396 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13398 rtx al = gen_rtx_REG (QImode, 0);
13399 emit_move_insn (al, callarg2);
13400 use_reg (&use, al);
13403 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13405 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13406 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13408 if (sibcall && TARGET_64BIT
13409 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13412 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13413 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13414 emit_move_insn (fnaddr, addr);
13415 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13418 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13420 call = gen_rtx_SET (VOIDmode, retval, call);
13423 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13424 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13425 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13428 call = emit_call_insn (call);
13430 CALL_INSN_FUNCTION_USAGE (call) = use;
13434 /* Clear stack slot assignments remembered from previous functions.
13435 This is called from INIT_EXPANDERS once before RTL is emitted for each
13438 static struct machine_function *
13439 ix86_init_machine_status (void)
13441 struct machine_function *f;
13443 f = ggc_alloc_cleared (sizeof (struct machine_function));
13444 f->use_fast_prologue_epilogue_nregs = -1;
13445 f->tls_descriptor_call_expanded_p = 0;
13450 /* Return a MEM corresponding to a stack slot with mode MODE.
13451 Allocate a new slot if necessary.
13453 The RTL for a function can have several slots available: N is
13454 which slot to use. */
13457 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13459 struct stack_local_entry *s;
13461 gcc_assert (n < MAX_386_STACK_LOCALS);
13463 for (s = ix86_stack_locals; s; s = s->next)
13464 if (s->mode == mode && s->n == n)
13467 s = (struct stack_local_entry *)
13468 ggc_alloc (sizeof (struct stack_local_entry));
13471 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13473 s->next = ix86_stack_locals;
13474 ix86_stack_locals = s;
13478 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13480 static GTY(()) rtx ix86_tls_symbol;
13482 ix86_tls_get_addr (void)
13485 if (!ix86_tls_symbol)
13487 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13488 (TARGET_ANY_GNU_TLS
13490 ? "___tls_get_addr"
13491 : "__tls_get_addr");
13494 return ix86_tls_symbol;
13497 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13499 static GTY(()) rtx ix86_tls_module_base_symbol;
13501 ix86_tls_module_base (void)
13504 if (!ix86_tls_module_base_symbol)
13506 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13507 "_TLS_MODULE_BASE_");
13508 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13509 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13512 return ix86_tls_module_base_symbol;
13515 /* Calculate the length of the memory address in the instruction
13516 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13519 memory_address_length (rtx addr)
13521 struct ix86_address parts;
13522 rtx base, index, disp;
13526 if (GET_CODE (addr) == PRE_DEC
13527 || GET_CODE (addr) == POST_INC
13528 || GET_CODE (addr) == PRE_MODIFY
13529 || GET_CODE (addr) == POST_MODIFY)
13532 ok = ix86_decompose_address (addr, &parts);
13535 if (parts.base && GET_CODE (parts.base) == SUBREG)
13536 parts.base = SUBREG_REG (parts.base);
13537 if (parts.index && GET_CODE (parts.index) == SUBREG)
13538 parts.index = SUBREG_REG (parts.index);
13541 index = parts.index;
13546 - esp as the base always wants an index,
13547 - ebp as the base always wants a displacement. */
13549 /* Register Indirect. */
13550 if (base && !index && !disp)
13552 /* esp (for its index) and ebp (for its displacement) need
13553 the two-byte modrm form. */
13554 if (addr == stack_pointer_rtx
13555 || addr == arg_pointer_rtx
13556 || addr == frame_pointer_rtx
13557 || addr == hard_frame_pointer_rtx)
13561 /* Direct Addressing. */
13562 else if (disp && !base && !index)
13567 /* Find the length of the displacement constant. */
13570 if (base && satisfies_constraint_K (disp))
13575 /* ebp always wants a displacement. */
13576 else if (base == hard_frame_pointer_rtx)
13579 /* An index requires the two-byte modrm form.... */
13581 /* ...like esp, which always wants an index. */
13582 || base == stack_pointer_rtx
13583 || base == arg_pointer_rtx
13584 || base == frame_pointer_rtx)
13591 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13592 is set, expect that insn have 8bit immediate alternative. */
13594 ix86_attr_length_immediate_default (rtx insn, int shortform)
13598 extract_insn_cached (insn);
13599 for (i = recog_data.n_operands - 1; i >= 0; --i)
13600 if (CONSTANT_P (recog_data.operand[i]))
13603 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
13607 switch (get_attr_mode (insn))
13618 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13623 fatal_insn ("unknown insn mode", insn);
13629 /* Compute default value for "length_address" attribute. */
13631 ix86_attr_length_address_default (rtx insn)
13635 if (get_attr_type (insn) == TYPE_LEA)
13637 rtx set = PATTERN (insn);
13639 if (GET_CODE (set) == PARALLEL)
13640 set = XVECEXP (set, 0, 0);
13642 gcc_assert (GET_CODE (set) == SET);
13644 return memory_address_length (SET_SRC (set));
13647 extract_insn_cached (insn);
13648 for (i = recog_data.n_operands - 1; i >= 0; --i)
13649 if (GET_CODE (recog_data.operand[i]) == MEM)
13651 return memory_address_length (XEXP (recog_data.operand[i], 0));
13657 /* Return the maximum number of instructions a cpu can issue. */
13660 ix86_issue_rate (void)
13664 case PROCESSOR_PENTIUM:
13668 case PROCESSOR_PENTIUMPRO:
13669 case PROCESSOR_PENTIUM4:
13670 case PROCESSOR_ATHLON:
13672 case PROCESSOR_NOCONA:
13673 case PROCESSOR_GENERIC32:
13674 case PROCESSOR_GENERIC64:
13682 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13683 by DEP_INSN and nothing set by DEP_INSN. */
13686 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13690 /* Simplify the test for uninteresting insns. */
13691 if (insn_type != TYPE_SETCC
13692 && insn_type != TYPE_ICMOV
13693 && insn_type != TYPE_FCMOV
13694 && insn_type != TYPE_IBR)
13697 if ((set = single_set (dep_insn)) != 0)
13699 set = SET_DEST (set);
13702 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13703 && XVECLEN (PATTERN (dep_insn), 0) == 2
13704 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13705 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13707 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13708 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13713 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13716 /* This test is true if the dependent insn reads the flags but
13717 not any other potentially set register. */
13718 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13721 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13727 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13728 address with operands set by DEP_INSN. */
13731 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
13735 if (insn_type == TYPE_LEA
13738 addr = PATTERN (insn);
13740 if (GET_CODE (addr) == PARALLEL)
13741 addr = XVECEXP (addr, 0, 0);
13743 gcc_assert (GET_CODE (addr) == SET);
13745 addr = SET_SRC (addr);
13750 extract_insn_cached (insn);
13751 for (i = recog_data.n_operands - 1; i >= 0; --i)
13752 if (GET_CODE (recog_data.operand[i]) == MEM)
13754 addr = XEXP (recog_data.operand[i], 0);
13761 return modified_in_p (addr, dep_insn);
13765 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13767 enum attr_type insn_type, dep_insn_type;
13768 enum attr_memory memory;
13770 int dep_insn_code_number;
13772 /* Anti and output dependencies have zero cost on all CPUs. */
13773 if (REG_NOTE_KIND (link) != 0)
13776 dep_insn_code_number = recog_memoized (dep_insn);
13778 /* If we can't recognize the insns, we can't really do anything. */
13779 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13782 insn_type = get_attr_type (insn);
13783 dep_insn_type = get_attr_type (dep_insn);
13787 case PROCESSOR_PENTIUM:
13788 /* Address Generation Interlock adds a cycle of latency. */
13789 if (ix86_agi_dependent (insn, dep_insn, insn_type))
13792 /* ??? Compares pair with jump/setcc. */
13793 if (ix86_flags_dependent (insn, dep_insn, insn_type))
13796 /* Floating point stores require value to be ready one cycle earlier. */
13797 if (insn_type == TYPE_FMOV
13798 && get_attr_memory (insn) == MEMORY_STORE
13799 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13803 case PROCESSOR_PENTIUMPRO:
13804 memory = get_attr_memory (insn);
13806 /* INT->FP conversion is expensive. */
13807 if (get_attr_fp_int_src (dep_insn))
13810 /* There is one cycle extra latency between an FP op and a store. */
13811 if (insn_type == TYPE_FMOV
13812 && (set = single_set (dep_insn)) != NULL_RTX
13813 && (set2 = single_set (insn)) != NULL_RTX
13814 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13815 && GET_CODE (SET_DEST (set2)) == MEM)
13818 /* Show ability of reorder buffer to hide latency of load by executing
13819 in parallel with previous instruction in case
13820 previous instruction is not needed to compute the address. */
13821 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13822 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13824 /* Claim moves to take one cycle, as core can issue one load
13825 at time and the next load can start cycle later. */
13826 if (dep_insn_type == TYPE_IMOV
13827 || dep_insn_type == TYPE_FMOV)
13835 memory = get_attr_memory (insn);
13837 /* The esp dependency is resolved before the instruction is really
13839 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13840 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13843 /* INT->FP conversion is expensive. */
13844 if (get_attr_fp_int_src (dep_insn))
13847 /* Show ability of reorder buffer to hide latency of load by executing
13848 in parallel with previous instruction in case
13849 previous instruction is not needed to compute the address. */
13850 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13851 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13853 /* Claim moves to take one cycle, as core can issue one load
13854 at time and the next load can start cycle later. */
13855 if (dep_insn_type == TYPE_IMOV
13856 || dep_insn_type == TYPE_FMOV)
13865 case PROCESSOR_ATHLON:
13867 case PROCESSOR_GENERIC32:
13868 case PROCESSOR_GENERIC64:
13869 memory = get_attr_memory (insn);
13871 /* Show ability of reorder buffer to hide latency of load by executing
13872 in parallel with previous instruction in case
13873 previous instruction is not needed to compute the address. */
13874 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13875 && !ix86_agi_dependent (insn, dep_insn, insn_type))
13877 enum attr_unit unit = get_attr_unit (insn);
13880 /* Because of the difference between the length of integer and
13881 floating unit pipeline preparation stages, the memory operands
13882 for floating point are cheaper.
13884 ??? For Athlon it the difference is most probably 2. */
13885 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13888 loadcost = TARGET_ATHLON ? 2 : 0;
13890 if (cost >= loadcost)
13903 /* How many alternative schedules to try. This should be as wide as the
13904 scheduling freedom in the DFA, but no wider. Making this value too
13905 large results extra work for the scheduler. */
13908 ia32_multipass_dfa_lookahead (void)
13910 if (ix86_tune == PROCESSOR_PENTIUM)
13913 if (ix86_tune == PROCESSOR_PENTIUMPRO
13914 || ix86_tune == PROCESSOR_K6)
13922 /* Compute the alignment given to a constant that is being placed in memory.
13923 EXP is the constant and ALIGN is the alignment that the object would
13925 The value of this function is used instead of that alignment to align
13929 ix86_constant_alignment (tree exp, int align)
13931 if (TREE_CODE (exp) == REAL_CST)
13933 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13935 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13938 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13939 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13940 return BITS_PER_WORD;
13945 /* Compute the alignment for a static variable.
13946 TYPE is the data type, and ALIGN is the alignment that
13947 the object would ordinarily have. The value of this function is used
13948 instead of that alignment to align the object. */
13951 ix86_data_alignment (tree type, int align)
13953 int max_align = optimize_size ? BITS_PER_WORD : 256;
13955 if (AGGREGATE_TYPE_P (type)
13956 && TYPE_SIZE (type)
13957 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13958 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13959 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13960 && align < max_align)
13963 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13964 to 16byte boundary. */
13967 if (AGGREGATE_TYPE_P (type)
13968 && TYPE_SIZE (type)
13969 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13970 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13971 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13975 if (TREE_CODE (type) == ARRAY_TYPE)
13977 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13979 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13982 else if (TREE_CODE (type) == COMPLEX_TYPE)
13985 if (TYPE_MODE (type) == DCmode && align < 64)
13987 if (TYPE_MODE (type) == XCmode && align < 128)
13990 else if ((TREE_CODE (type) == RECORD_TYPE
13991 || TREE_CODE (type) == UNION_TYPE
13992 || TREE_CODE (type) == QUAL_UNION_TYPE)
13993 && TYPE_FIELDS (type))
13995 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13997 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14000 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14001 || TREE_CODE (type) == INTEGER_TYPE)
14003 if (TYPE_MODE (type) == DFmode && align < 64)
14005 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14012 /* Compute the alignment for a local variable.
14013 TYPE is the data type, and ALIGN is the alignment that
14014 the object would ordinarily have. The value of this macro is used
14015 instead of that alignment to align the object. */
14018 ix86_local_alignment (tree type, int align)
14020 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14021 to 16byte boundary. */
14024 if (AGGREGATE_TYPE_P (type)
14025 && TYPE_SIZE (type)
14026 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14027 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
14028 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14031 if (TREE_CODE (type) == ARRAY_TYPE)
14033 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14035 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14038 else if (TREE_CODE (type) == COMPLEX_TYPE)
14040 if (TYPE_MODE (type) == DCmode && align < 64)
14042 if (TYPE_MODE (type) == XCmode && align < 128)
14045 else if ((TREE_CODE (type) == RECORD_TYPE
14046 || TREE_CODE (type) == UNION_TYPE
14047 || TREE_CODE (type) == QUAL_UNION_TYPE)
14048 && TYPE_FIELDS (type))
14050 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14052 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14055 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14056 || TREE_CODE (type) == INTEGER_TYPE)
14059 if (TYPE_MODE (type) == DFmode && align < 64)
14061 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14067 /* Emit RTL insns to initialize the variable parts of a trampoline.
14068 FNADDR is an RTX for the address of the function's pure code.
14069 CXT is an RTX for the static chain value for the function. */
14071 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
14075 /* Compute offset from the end of the jmp to the target function. */
14076 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
14077 plus_constant (tramp, 10),
14078 NULL_RTX, 1, OPTAB_DIRECT);
14079 emit_move_insn (gen_rtx_MEM (QImode, tramp),
14080 gen_int_mode (0xb9, QImode));
14081 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
14082 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
14083 gen_int_mode (0xe9, QImode));
14084 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
14089 /* Try to load address using shorter movl instead of movabs.
14090 We may want to support movq for kernel mode, but kernel does not use
14091 trampolines at the moment. */
14092 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
14094 fnaddr = copy_to_mode_reg (DImode, fnaddr);
14095 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14096 gen_int_mode (0xbb41, HImode));
14097 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
14098 gen_lowpart (SImode, fnaddr));
14103 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14104 gen_int_mode (0xbb49, HImode));
14105 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14109 /* Load static chain using movabs to r10. */
14110 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14111 gen_int_mode (0xba49, HImode));
14112 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
14115 /* Jump to the r11 */
14116 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
14117 gen_int_mode (0xff49, HImode));
14118 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
14119 gen_int_mode (0xe3, QImode));
14121 gcc_assert (offset <= TRAMPOLINE_SIZE);
14124 #ifdef ENABLE_EXECUTE_STACK
14125 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
14126 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
14130 /* Codes for all the SSE/MMX builtins. */
14133 IX86_BUILTIN_ADDPS,
14134 IX86_BUILTIN_ADDSS,
14135 IX86_BUILTIN_DIVPS,
14136 IX86_BUILTIN_DIVSS,
14137 IX86_BUILTIN_MULPS,
14138 IX86_BUILTIN_MULSS,
14139 IX86_BUILTIN_SUBPS,
14140 IX86_BUILTIN_SUBSS,
14142 IX86_BUILTIN_CMPEQPS,
14143 IX86_BUILTIN_CMPLTPS,
14144 IX86_BUILTIN_CMPLEPS,
14145 IX86_BUILTIN_CMPGTPS,
14146 IX86_BUILTIN_CMPGEPS,
14147 IX86_BUILTIN_CMPNEQPS,
14148 IX86_BUILTIN_CMPNLTPS,
14149 IX86_BUILTIN_CMPNLEPS,
14150 IX86_BUILTIN_CMPNGTPS,
14151 IX86_BUILTIN_CMPNGEPS,
14152 IX86_BUILTIN_CMPORDPS,
14153 IX86_BUILTIN_CMPUNORDPS,
14154 IX86_BUILTIN_CMPEQSS,
14155 IX86_BUILTIN_CMPLTSS,
14156 IX86_BUILTIN_CMPLESS,
14157 IX86_BUILTIN_CMPNEQSS,
14158 IX86_BUILTIN_CMPNLTSS,
14159 IX86_BUILTIN_CMPNLESS,
14160 IX86_BUILTIN_CMPNGTSS,
14161 IX86_BUILTIN_CMPNGESS,
14162 IX86_BUILTIN_CMPORDSS,
14163 IX86_BUILTIN_CMPUNORDSS,
14165 IX86_BUILTIN_COMIEQSS,
14166 IX86_BUILTIN_COMILTSS,
14167 IX86_BUILTIN_COMILESS,
14168 IX86_BUILTIN_COMIGTSS,
14169 IX86_BUILTIN_COMIGESS,
14170 IX86_BUILTIN_COMINEQSS,
14171 IX86_BUILTIN_UCOMIEQSS,
14172 IX86_BUILTIN_UCOMILTSS,
14173 IX86_BUILTIN_UCOMILESS,
14174 IX86_BUILTIN_UCOMIGTSS,
14175 IX86_BUILTIN_UCOMIGESS,
14176 IX86_BUILTIN_UCOMINEQSS,
14178 IX86_BUILTIN_CVTPI2PS,
14179 IX86_BUILTIN_CVTPS2PI,
14180 IX86_BUILTIN_CVTSI2SS,
14181 IX86_BUILTIN_CVTSI642SS,
14182 IX86_BUILTIN_CVTSS2SI,
14183 IX86_BUILTIN_CVTSS2SI64,
14184 IX86_BUILTIN_CVTTPS2PI,
14185 IX86_BUILTIN_CVTTSS2SI,
14186 IX86_BUILTIN_CVTTSS2SI64,
14188 IX86_BUILTIN_MAXPS,
14189 IX86_BUILTIN_MAXSS,
14190 IX86_BUILTIN_MINPS,
14191 IX86_BUILTIN_MINSS,
14193 IX86_BUILTIN_LOADUPS,
14194 IX86_BUILTIN_STOREUPS,
14195 IX86_BUILTIN_MOVSS,
14197 IX86_BUILTIN_MOVHLPS,
14198 IX86_BUILTIN_MOVLHPS,
14199 IX86_BUILTIN_LOADHPS,
14200 IX86_BUILTIN_LOADLPS,
14201 IX86_BUILTIN_STOREHPS,
14202 IX86_BUILTIN_STORELPS,
14204 IX86_BUILTIN_MASKMOVQ,
14205 IX86_BUILTIN_MOVMSKPS,
14206 IX86_BUILTIN_PMOVMSKB,
14208 IX86_BUILTIN_MOVNTPS,
14209 IX86_BUILTIN_MOVNTQ,
14211 IX86_BUILTIN_LOADDQU,
14212 IX86_BUILTIN_STOREDQU,
14214 IX86_BUILTIN_PACKSSWB,
14215 IX86_BUILTIN_PACKSSDW,
14216 IX86_BUILTIN_PACKUSWB,
14218 IX86_BUILTIN_PADDB,
14219 IX86_BUILTIN_PADDW,
14220 IX86_BUILTIN_PADDD,
14221 IX86_BUILTIN_PADDQ,
14222 IX86_BUILTIN_PADDSB,
14223 IX86_BUILTIN_PADDSW,
14224 IX86_BUILTIN_PADDUSB,
14225 IX86_BUILTIN_PADDUSW,
14226 IX86_BUILTIN_PSUBB,
14227 IX86_BUILTIN_PSUBW,
14228 IX86_BUILTIN_PSUBD,
14229 IX86_BUILTIN_PSUBQ,
14230 IX86_BUILTIN_PSUBSB,
14231 IX86_BUILTIN_PSUBSW,
14232 IX86_BUILTIN_PSUBUSB,
14233 IX86_BUILTIN_PSUBUSW,
14236 IX86_BUILTIN_PANDN,
14240 IX86_BUILTIN_PAVGB,
14241 IX86_BUILTIN_PAVGW,
14243 IX86_BUILTIN_PCMPEQB,
14244 IX86_BUILTIN_PCMPEQW,
14245 IX86_BUILTIN_PCMPEQD,
14246 IX86_BUILTIN_PCMPGTB,
14247 IX86_BUILTIN_PCMPGTW,
14248 IX86_BUILTIN_PCMPGTD,
14250 IX86_BUILTIN_PMADDWD,
14252 IX86_BUILTIN_PMAXSW,
14253 IX86_BUILTIN_PMAXUB,
14254 IX86_BUILTIN_PMINSW,
14255 IX86_BUILTIN_PMINUB,
14257 IX86_BUILTIN_PMULHUW,
14258 IX86_BUILTIN_PMULHW,
14259 IX86_BUILTIN_PMULLW,
14261 IX86_BUILTIN_PSADBW,
14262 IX86_BUILTIN_PSHUFW,
14264 IX86_BUILTIN_PSLLW,
14265 IX86_BUILTIN_PSLLD,
14266 IX86_BUILTIN_PSLLQ,
14267 IX86_BUILTIN_PSRAW,
14268 IX86_BUILTIN_PSRAD,
14269 IX86_BUILTIN_PSRLW,
14270 IX86_BUILTIN_PSRLD,
14271 IX86_BUILTIN_PSRLQ,
14272 IX86_BUILTIN_PSLLWI,
14273 IX86_BUILTIN_PSLLDI,
14274 IX86_BUILTIN_PSLLQI,
14275 IX86_BUILTIN_PSRAWI,
14276 IX86_BUILTIN_PSRADI,
14277 IX86_BUILTIN_PSRLWI,
14278 IX86_BUILTIN_PSRLDI,
14279 IX86_BUILTIN_PSRLQI,
14281 IX86_BUILTIN_PUNPCKHBW,
14282 IX86_BUILTIN_PUNPCKHWD,
14283 IX86_BUILTIN_PUNPCKHDQ,
14284 IX86_BUILTIN_PUNPCKLBW,
14285 IX86_BUILTIN_PUNPCKLWD,
14286 IX86_BUILTIN_PUNPCKLDQ,
14288 IX86_BUILTIN_SHUFPS,
14290 IX86_BUILTIN_RCPPS,
14291 IX86_BUILTIN_RCPSS,
14292 IX86_BUILTIN_RSQRTPS,
14293 IX86_BUILTIN_RSQRTSS,
14294 IX86_BUILTIN_SQRTPS,
14295 IX86_BUILTIN_SQRTSS,
14297 IX86_BUILTIN_UNPCKHPS,
14298 IX86_BUILTIN_UNPCKLPS,
14300 IX86_BUILTIN_ANDPS,
14301 IX86_BUILTIN_ANDNPS,
14303 IX86_BUILTIN_XORPS,
14306 IX86_BUILTIN_LDMXCSR,
14307 IX86_BUILTIN_STMXCSR,
14308 IX86_BUILTIN_SFENCE,
14310 /* 3DNow! Original */
14311 IX86_BUILTIN_FEMMS,
14312 IX86_BUILTIN_PAVGUSB,
14313 IX86_BUILTIN_PF2ID,
14314 IX86_BUILTIN_PFACC,
14315 IX86_BUILTIN_PFADD,
14316 IX86_BUILTIN_PFCMPEQ,
14317 IX86_BUILTIN_PFCMPGE,
14318 IX86_BUILTIN_PFCMPGT,
14319 IX86_BUILTIN_PFMAX,
14320 IX86_BUILTIN_PFMIN,
14321 IX86_BUILTIN_PFMUL,
14322 IX86_BUILTIN_PFRCP,
14323 IX86_BUILTIN_PFRCPIT1,
14324 IX86_BUILTIN_PFRCPIT2,
14325 IX86_BUILTIN_PFRSQIT1,
14326 IX86_BUILTIN_PFRSQRT,
14327 IX86_BUILTIN_PFSUB,
14328 IX86_BUILTIN_PFSUBR,
14329 IX86_BUILTIN_PI2FD,
14330 IX86_BUILTIN_PMULHRW,
14332 /* 3DNow! Athlon Extensions */
14333 IX86_BUILTIN_PF2IW,
14334 IX86_BUILTIN_PFNACC,
14335 IX86_BUILTIN_PFPNACC,
14336 IX86_BUILTIN_PI2FW,
14337 IX86_BUILTIN_PSWAPDSI,
14338 IX86_BUILTIN_PSWAPDSF,
14341 IX86_BUILTIN_ADDPD,
14342 IX86_BUILTIN_ADDSD,
14343 IX86_BUILTIN_DIVPD,
14344 IX86_BUILTIN_DIVSD,
14345 IX86_BUILTIN_MULPD,
14346 IX86_BUILTIN_MULSD,
14347 IX86_BUILTIN_SUBPD,
14348 IX86_BUILTIN_SUBSD,
14350 IX86_BUILTIN_CMPEQPD,
14351 IX86_BUILTIN_CMPLTPD,
14352 IX86_BUILTIN_CMPLEPD,
14353 IX86_BUILTIN_CMPGTPD,
14354 IX86_BUILTIN_CMPGEPD,
14355 IX86_BUILTIN_CMPNEQPD,
14356 IX86_BUILTIN_CMPNLTPD,
14357 IX86_BUILTIN_CMPNLEPD,
14358 IX86_BUILTIN_CMPNGTPD,
14359 IX86_BUILTIN_CMPNGEPD,
14360 IX86_BUILTIN_CMPORDPD,
14361 IX86_BUILTIN_CMPUNORDPD,
14362 IX86_BUILTIN_CMPNEPD,
14363 IX86_BUILTIN_CMPEQSD,
14364 IX86_BUILTIN_CMPLTSD,
14365 IX86_BUILTIN_CMPLESD,
14366 IX86_BUILTIN_CMPNEQSD,
14367 IX86_BUILTIN_CMPNLTSD,
14368 IX86_BUILTIN_CMPNLESD,
14369 IX86_BUILTIN_CMPORDSD,
14370 IX86_BUILTIN_CMPUNORDSD,
14371 IX86_BUILTIN_CMPNESD,
14373 IX86_BUILTIN_COMIEQSD,
14374 IX86_BUILTIN_COMILTSD,
14375 IX86_BUILTIN_COMILESD,
14376 IX86_BUILTIN_COMIGTSD,
14377 IX86_BUILTIN_COMIGESD,
14378 IX86_BUILTIN_COMINEQSD,
14379 IX86_BUILTIN_UCOMIEQSD,
14380 IX86_BUILTIN_UCOMILTSD,
14381 IX86_BUILTIN_UCOMILESD,
14382 IX86_BUILTIN_UCOMIGTSD,
14383 IX86_BUILTIN_UCOMIGESD,
14384 IX86_BUILTIN_UCOMINEQSD,
14386 IX86_BUILTIN_MAXPD,
14387 IX86_BUILTIN_MAXSD,
14388 IX86_BUILTIN_MINPD,
14389 IX86_BUILTIN_MINSD,
14391 IX86_BUILTIN_ANDPD,
14392 IX86_BUILTIN_ANDNPD,
14394 IX86_BUILTIN_XORPD,
14396 IX86_BUILTIN_SQRTPD,
14397 IX86_BUILTIN_SQRTSD,
14399 IX86_BUILTIN_UNPCKHPD,
14400 IX86_BUILTIN_UNPCKLPD,
14402 IX86_BUILTIN_SHUFPD,
14404 IX86_BUILTIN_LOADUPD,
14405 IX86_BUILTIN_STOREUPD,
14406 IX86_BUILTIN_MOVSD,
14408 IX86_BUILTIN_LOADHPD,
14409 IX86_BUILTIN_LOADLPD,
14411 IX86_BUILTIN_CVTDQ2PD,
14412 IX86_BUILTIN_CVTDQ2PS,
14414 IX86_BUILTIN_CVTPD2DQ,
14415 IX86_BUILTIN_CVTPD2PI,
14416 IX86_BUILTIN_CVTPD2PS,
14417 IX86_BUILTIN_CVTTPD2DQ,
14418 IX86_BUILTIN_CVTTPD2PI,
14420 IX86_BUILTIN_CVTPI2PD,
14421 IX86_BUILTIN_CVTSI2SD,
14422 IX86_BUILTIN_CVTSI642SD,
14424 IX86_BUILTIN_CVTSD2SI,
14425 IX86_BUILTIN_CVTSD2SI64,
14426 IX86_BUILTIN_CVTSD2SS,
14427 IX86_BUILTIN_CVTSS2SD,
14428 IX86_BUILTIN_CVTTSD2SI,
14429 IX86_BUILTIN_CVTTSD2SI64,
14431 IX86_BUILTIN_CVTPS2DQ,
14432 IX86_BUILTIN_CVTPS2PD,
14433 IX86_BUILTIN_CVTTPS2DQ,
14435 IX86_BUILTIN_MOVNTI,
14436 IX86_BUILTIN_MOVNTPD,
14437 IX86_BUILTIN_MOVNTDQ,
14440 IX86_BUILTIN_MASKMOVDQU,
14441 IX86_BUILTIN_MOVMSKPD,
14442 IX86_BUILTIN_PMOVMSKB128,
14444 IX86_BUILTIN_PACKSSWB128,
14445 IX86_BUILTIN_PACKSSDW128,
14446 IX86_BUILTIN_PACKUSWB128,
14448 IX86_BUILTIN_PADDB128,
14449 IX86_BUILTIN_PADDW128,
14450 IX86_BUILTIN_PADDD128,
14451 IX86_BUILTIN_PADDQ128,
14452 IX86_BUILTIN_PADDSB128,
14453 IX86_BUILTIN_PADDSW128,
14454 IX86_BUILTIN_PADDUSB128,
14455 IX86_BUILTIN_PADDUSW128,
14456 IX86_BUILTIN_PSUBB128,
14457 IX86_BUILTIN_PSUBW128,
14458 IX86_BUILTIN_PSUBD128,
14459 IX86_BUILTIN_PSUBQ128,
14460 IX86_BUILTIN_PSUBSB128,
14461 IX86_BUILTIN_PSUBSW128,
14462 IX86_BUILTIN_PSUBUSB128,
14463 IX86_BUILTIN_PSUBUSW128,
14465 IX86_BUILTIN_PAND128,
14466 IX86_BUILTIN_PANDN128,
14467 IX86_BUILTIN_POR128,
14468 IX86_BUILTIN_PXOR128,
14470 IX86_BUILTIN_PAVGB128,
14471 IX86_BUILTIN_PAVGW128,
14473 IX86_BUILTIN_PCMPEQB128,
14474 IX86_BUILTIN_PCMPEQW128,
14475 IX86_BUILTIN_PCMPEQD128,
14476 IX86_BUILTIN_PCMPGTB128,
14477 IX86_BUILTIN_PCMPGTW128,
14478 IX86_BUILTIN_PCMPGTD128,
14480 IX86_BUILTIN_PMADDWD128,
14482 IX86_BUILTIN_PMAXSW128,
14483 IX86_BUILTIN_PMAXUB128,
14484 IX86_BUILTIN_PMINSW128,
14485 IX86_BUILTIN_PMINUB128,
14487 IX86_BUILTIN_PMULUDQ,
14488 IX86_BUILTIN_PMULUDQ128,
14489 IX86_BUILTIN_PMULHUW128,
14490 IX86_BUILTIN_PMULHW128,
14491 IX86_BUILTIN_PMULLW128,
14493 IX86_BUILTIN_PSADBW128,
14494 IX86_BUILTIN_PSHUFHW,
14495 IX86_BUILTIN_PSHUFLW,
14496 IX86_BUILTIN_PSHUFD,
14498 IX86_BUILTIN_PSLLW128,
14499 IX86_BUILTIN_PSLLD128,
14500 IX86_BUILTIN_PSLLQ128,
14501 IX86_BUILTIN_PSRAW128,
14502 IX86_BUILTIN_PSRAD128,
14503 IX86_BUILTIN_PSRLW128,
14504 IX86_BUILTIN_PSRLD128,
14505 IX86_BUILTIN_PSRLQ128,
14506 IX86_BUILTIN_PSLLDQI128,
14507 IX86_BUILTIN_PSLLWI128,
14508 IX86_BUILTIN_PSLLDI128,
14509 IX86_BUILTIN_PSLLQI128,
14510 IX86_BUILTIN_PSRAWI128,
14511 IX86_BUILTIN_PSRADI128,
14512 IX86_BUILTIN_PSRLDQI128,
14513 IX86_BUILTIN_PSRLWI128,
14514 IX86_BUILTIN_PSRLDI128,
14515 IX86_BUILTIN_PSRLQI128,
14517 IX86_BUILTIN_PUNPCKHBW128,
14518 IX86_BUILTIN_PUNPCKHWD128,
14519 IX86_BUILTIN_PUNPCKHDQ128,
14520 IX86_BUILTIN_PUNPCKHQDQ128,
14521 IX86_BUILTIN_PUNPCKLBW128,
14522 IX86_BUILTIN_PUNPCKLWD128,
14523 IX86_BUILTIN_PUNPCKLDQ128,
14524 IX86_BUILTIN_PUNPCKLQDQ128,
14526 IX86_BUILTIN_CLFLUSH,
14527 IX86_BUILTIN_MFENCE,
14528 IX86_BUILTIN_LFENCE,
14530 /* Prescott New Instructions. */
14531 IX86_BUILTIN_ADDSUBPS,
14532 IX86_BUILTIN_HADDPS,
14533 IX86_BUILTIN_HSUBPS,
14534 IX86_BUILTIN_MOVSHDUP,
14535 IX86_BUILTIN_MOVSLDUP,
14536 IX86_BUILTIN_ADDSUBPD,
14537 IX86_BUILTIN_HADDPD,
14538 IX86_BUILTIN_HSUBPD,
14539 IX86_BUILTIN_LDDQU,
14541 IX86_BUILTIN_MONITOR,
14542 IX86_BUILTIN_MWAIT,
14545 IX86_BUILTIN_PHADDW,
14546 IX86_BUILTIN_PHADDD,
14547 IX86_BUILTIN_PHADDSW,
14548 IX86_BUILTIN_PHSUBW,
14549 IX86_BUILTIN_PHSUBD,
14550 IX86_BUILTIN_PHSUBSW,
14551 IX86_BUILTIN_PMADDUBSW,
14552 IX86_BUILTIN_PMULHRSW,
14553 IX86_BUILTIN_PSHUFB,
14554 IX86_BUILTIN_PSIGNB,
14555 IX86_BUILTIN_PSIGNW,
14556 IX86_BUILTIN_PSIGND,
14557 IX86_BUILTIN_PALIGNR,
14558 IX86_BUILTIN_PABSB,
14559 IX86_BUILTIN_PABSW,
14560 IX86_BUILTIN_PABSD,
14562 IX86_BUILTIN_PHADDW128,
14563 IX86_BUILTIN_PHADDD128,
14564 IX86_BUILTIN_PHADDSW128,
14565 IX86_BUILTIN_PHSUBW128,
14566 IX86_BUILTIN_PHSUBD128,
14567 IX86_BUILTIN_PHSUBSW128,
14568 IX86_BUILTIN_PMADDUBSW128,
14569 IX86_BUILTIN_PMULHRSW128,
14570 IX86_BUILTIN_PSHUFB128,
14571 IX86_BUILTIN_PSIGNB128,
14572 IX86_BUILTIN_PSIGNW128,
14573 IX86_BUILTIN_PSIGND128,
14574 IX86_BUILTIN_PALIGNR128,
14575 IX86_BUILTIN_PABSB128,
14576 IX86_BUILTIN_PABSW128,
14577 IX86_BUILTIN_PABSD128,
14579 IX86_BUILTIN_VEC_INIT_V2SI,
14580 IX86_BUILTIN_VEC_INIT_V4HI,
14581 IX86_BUILTIN_VEC_INIT_V8QI,
14582 IX86_BUILTIN_VEC_EXT_V2DF,
14583 IX86_BUILTIN_VEC_EXT_V2DI,
14584 IX86_BUILTIN_VEC_EXT_V4SF,
14585 IX86_BUILTIN_VEC_EXT_V4SI,
14586 IX86_BUILTIN_VEC_EXT_V8HI,
14587 IX86_BUILTIN_VEC_EXT_V2SI,
14588 IX86_BUILTIN_VEC_EXT_V4HI,
14589 IX86_BUILTIN_VEC_SET_V8HI,
14590 IX86_BUILTIN_VEC_SET_V4HI,
14595 #define def_builtin(MASK, NAME, TYPE, CODE) \
14597 if ((MASK) & target_flags \
14598 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14599 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14600 NULL, NULL_TREE); \
14603 /* Bits for builtin_description.flag. */
14605 /* Set when we don't support the comparison natively, and should
14606 swap_comparison in order to support it. */
14607 #define BUILTIN_DESC_SWAP_OPERANDS 1
14609 struct builtin_description
14611 const unsigned int mask;
14612 const enum insn_code icode;
14613 const char *const name;
14614 const enum ix86_builtins code;
14615 const enum rtx_code comparison;
14616 const unsigned int flag;
14619 static const struct builtin_description bdesc_comi[] =
14621 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14622 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14623 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14624 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14625 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14626 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14627 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14628 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14629 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14630 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14631 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14632 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14633 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14634 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14635 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14636 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14637 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14638 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14639 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14640 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14641 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14642 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14643 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14644 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14647 static const struct builtin_description bdesc_2arg[] =
14650 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14651 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14652 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14653 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14654 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14655 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14656 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14657 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14659 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14660 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14661 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14662 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14663 BUILTIN_DESC_SWAP_OPERANDS },
14664 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14665 BUILTIN_DESC_SWAP_OPERANDS },
14666 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14667 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14668 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14669 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14670 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14671 BUILTIN_DESC_SWAP_OPERANDS },
14672 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14673 BUILTIN_DESC_SWAP_OPERANDS },
14674 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14675 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14676 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14677 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14678 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14679 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14680 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14681 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14682 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14683 BUILTIN_DESC_SWAP_OPERANDS },
14684 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14685 BUILTIN_DESC_SWAP_OPERANDS },
14686 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14688 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14689 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14690 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14691 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14693 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14694 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14695 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14696 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14698 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14699 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14700 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14701 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14702 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14705 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14706 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14707 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14708 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14709 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14710 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14711 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14712 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14714 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14715 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14716 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14717 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14718 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14719 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14720 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14721 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14723 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14724 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14725 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14727 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14728 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14729 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14730 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14732 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14735 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14736 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14737 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14738 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14739 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14740 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14742 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14743 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14745 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14747 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14748 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14749 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14750 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14751 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14752 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14755 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14756 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14757 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14759 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14760 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14761 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14763 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14764 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14765 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14766 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14767 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14768 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14770 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14771 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14772 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14773 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14774 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14775 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14777 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14778 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14779 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14780 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14783 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14786 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14787 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14788 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14789 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14790 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14791 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14792 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14793 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14795 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14796 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14797 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14798 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14799 BUILTIN_DESC_SWAP_OPERANDS },
14800 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14801 BUILTIN_DESC_SWAP_OPERANDS },
14802 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14803 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14804 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14805 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14806 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14807 BUILTIN_DESC_SWAP_OPERANDS },
14808 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14809 BUILTIN_DESC_SWAP_OPERANDS },
14810 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14811 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14812 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14813 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14814 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14815 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14816 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14817 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14818 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14820 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14821 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14822 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14823 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14825 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14826 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14827 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14828 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14830 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14831 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14832 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14835 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14836 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14837 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14838 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14839 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14840 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14841 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14842 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14844 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14845 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14846 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14847 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14848 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14849 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14850 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14851 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14853 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14854 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14856 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14857 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14858 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14859 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14861 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14862 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14864 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14865 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14866 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14867 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14868 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14869 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14871 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14872 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14873 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14874 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14876 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14877 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14878 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14879 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14880 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14881 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14882 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14883 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14885 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14886 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14887 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14889 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14890 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14892 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14893 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14895 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14896 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14897 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14899 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14900 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14901 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14903 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14904 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14906 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14908 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14909 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14910 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14911 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14914 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14915 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14916 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14917 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14918 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14919 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
14922 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
14923 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
14924 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
14925 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
14926 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
14927 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
14928 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
14929 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
14930 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
14931 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
14932 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
14933 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
14934 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
14935 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
14936 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
14937 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
14938 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
14939 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
14940 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
14941 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
14942 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
14943 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
14944 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
14945 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
14948 static const struct builtin_description bdesc_1arg[] =
14950 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14951 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14953 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14954 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14955 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14957 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14958 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14959 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14960 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14961 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14962 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14964 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14965 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14967 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14969 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14970 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14972 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14973 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14974 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14975 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14976 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14978 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14980 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14981 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14982 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14983 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14985 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14986 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14987 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14990 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14991 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14994 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
14995 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
14996 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
14997 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
14998 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
14999 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
15003 ix86_init_builtins (void)
15006 ix86_init_mmx_sse_builtins ();
15009 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
15010 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
15013 ix86_init_mmx_sse_builtins (void)
15015 const struct builtin_description * d;
15018 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
15019 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15020 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
15021 tree V2DI_type_node
15022 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
15023 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
15024 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
15025 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
15026 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15027 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15028 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
15030 tree pchar_type_node = build_pointer_type (char_type_node);
15031 tree pcchar_type_node = build_pointer_type (
15032 build_type_variant (char_type_node, 1, 0));
15033 tree pfloat_type_node = build_pointer_type (float_type_node);
15034 tree pcfloat_type_node = build_pointer_type (
15035 build_type_variant (float_type_node, 1, 0));
15036 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
15037 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
15038 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
15041 tree int_ftype_v4sf_v4sf
15042 = build_function_type_list (integer_type_node,
15043 V4SF_type_node, V4SF_type_node, NULL_TREE);
15044 tree v4si_ftype_v4sf_v4sf
15045 = build_function_type_list (V4SI_type_node,
15046 V4SF_type_node, V4SF_type_node, NULL_TREE);
15047 /* MMX/SSE/integer conversions. */
15048 tree int_ftype_v4sf
15049 = build_function_type_list (integer_type_node,
15050 V4SF_type_node, NULL_TREE);
15051 tree int64_ftype_v4sf
15052 = build_function_type_list (long_long_integer_type_node,
15053 V4SF_type_node, NULL_TREE);
15054 tree int_ftype_v8qi
15055 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
15056 tree v4sf_ftype_v4sf_int
15057 = build_function_type_list (V4SF_type_node,
15058 V4SF_type_node, integer_type_node, NULL_TREE);
15059 tree v4sf_ftype_v4sf_int64
15060 = build_function_type_list (V4SF_type_node,
15061 V4SF_type_node, long_long_integer_type_node,
15063 tree v4sf_ftype_v4sf_v2si
15064 = build_function_type_list (V4SF_type_node,
15065 V4SF_type_node, V2SI_type_node, NULL_TREE);
15067 /* Miscellaneous. */
15068 tree v8qi_ftype_v4hi_v4hi
15069 = build_function_type_list (V8QI_type_node,
15070 V4HI_type_node, V4HI_type_node, NULL_TREE);
15071 tree v4hi_ftype_v2si_v2si
15072 = build_function_type_list (V4HI_type_node,
15073 V2SI_type_node, V2SI_type_node, NULL_TREE);
15074 tree v4sf_ftype_v4sf_v4sf_int
15075 = build_function_type_list (V4SF_type_node,
15076 V4SF_type_node, V4SF_type_node,
15077 integer_type_node, NULL_TREE);
15078 tree v2si_ftype_v4hi_v4hi
15079 = build_function_type_list (V2SI_type_node,
15080 V4HI_type_node, V4HI_type_node, NULL_TREE);
15081 tree v4hi_ftype_v4hi_int
15082 = build_function_type_list (V4HI_type_node,
15083 V4HI_type_node, integer_type_node, NULL_TREE);
15084 tree v4hi_ftype_v4hi_di
15085 = build_function_type_list (V4HI_type_node,
15086 V4HI_type_node, long_long_unsigned_type_node,
15088 tree v2si_ftype_v2si_di
15089 = build_function_type_list (V2SI_type_node,
15090 V2SI_type_node, long_long_unsigned_type_node,
15092 tree void_ftype_void
15093 = build_function_type (void_type_node, void_list_node);
15094 tree void_ftype_unsigned
15095 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
15096 tree void_ftype_unsigned_unsigned
15097 = build_function_type_list (void_type_node, unsigned_type_node,
15098 unsigned_type_node, NULL_TREE);
15099 tree void_ftype_pcvoid_unsigned_unsigned
15100 = build_function_type_list (void_type_node, const_ptr_type_node,
15101 unsigned_type_node, unsigned_type_node,
15103 tree unsigned_ftype_void
15104 = build_function_type (unsigned_type_node, void_list_node);
15105 tree v2si_ftype_v4sf
15106 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
15107 /* Loads/stores. */
15108 tree void_ftype_v8qi_v8qi_pchar
15109 = build_function_type_list (void_type_node,
15110 V8QI_type_node, V8QI_type_node,
15111 pchar_type_node, NULL_TREE);
15112 tree v4sf_ftype_pcfloat
15113 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
15114 /* @@@ the type is bogus */
15115 tree v4sf_ftype_v4sf_pv2si
15116 = build_function_type_list (V4SF_type_node,
15117 V4SF_type_node, pv2si_type_node, NULL_TREE);
15118 tree void_ftype_pv2si_v4sf
15119 = build_function_type_list (void_type_node,
15120 pv2si_type_node, V4SF_type_node, NULL_TREE);
15121 tree void_ftype_pfloat_v4sf
15122 = build_function_type_list (void_type_node,
15123 pfloat_type_node, V4SF_type_node, NULL_TREE);
15124 tree void_ftype_pdi_di
15125 = build_function_type_list (void_type_node,
15126 pdi_type_node, long_long_unsigned_type_node,
15128 tree void_ftype_pv2di_v2di
15129 = build_function_type_list (void_type_node,
15130 pv2di_type_node, V2DI_type_node, NULL_TREE);
15131 /* Normal vector unops. */
15132 tree v4sf_ftype_v4sf
15133 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15134 tree v16qi_ftype_v16qi
15135 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15136 tree v8hi_ftype_v8hi
15137 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15138 tree v4si_ftype_v4si
15139 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15140 tree v8qi_ftype_v8qi
15141 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
15142 tree v4hi_ftype_v4hi
15143 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
15145 /* Normal vector binops. */
15146 tree v4sf_ftype_v4sf_v4sf
15147 = build_function_type_list (V4SF_type_node,
15148 V4SF_type_node, V4SF_type_node, NULL_TREE);
15149 tree v8qi_ftype_v8qi_v8qi
15150 = build_function_type_list (V8QI_type_node,
15151 V8QI_type_node, V8QI_type_node, NULL_TREE);
15152 tree v4hi_ftype_v4hi_v4hi
15153 = build_function_type_list (V4HI_type_node,
15154 V4HI_type_node, V4HI_type_node, NULL_TREE);
15155 tree v2si_ftype_v2si_v2si
15156 = build_function_type_list (V2SI_type_node,
15157 V2SI_type_node, V2SI_type_node, NULL_TREE);
15158 tree di_ftype_di_di
15159 = build_function_type_list (long_long_unsigned_type_node,
15160 long_long_unsigned_type_node,
15161 long_long_unsigned_type_node, NULL_TREE);
15163 tree di_ftype_di_di_int
15164 = build_function_type_list (long_long_unsigned_type_node,
15165 long_long_unsigned_type_node,
15166 long_long_unsigned_type_node,
15167 integer_type_node, NULL_TREE);
15169 tree v2si_ftype_v2sf
15170 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
15171 tree v2sf_ftype_v2si
15172 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
15173 tree v2si_ftype_v2si
15174 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
15175 tree v2sf_ftype_v2sf
15176 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
15177 tree v2sf_ftype_v2sf_v2sf
15178 = build_function_type_list (V2SF_type_node,
15179 V2SF_type_node, V2SF_type_node, NULL_TREE);
15180 tree v2si_ftype_v2sf_v2sf
15181 = build_function_type_list (V2SI_type_node,
15182 V2SF_type_node, V2SF_type_node, NULL_TREE);
15183 tree pint_type_node = build_pointer_type (integer_type_node);
15184 tree pdouble_type_node = build_pointer_type (double_type_node);
15185 tree pcdouble_type_node = build_pointer_type (
15186 build_type_variant (double_type_node, 1, 0));
15187 tree int_ftype_v2df_v2df
15188 = build_function_type_list (integer_type_node,
15189 V2DF_type_node, V2DF_type_node, NULL_TREE);
15191 tree void_ftype_pcvoid
15192 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
15193 tree v4sf_ftype_v4si
15194 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
15195 tree v4si_ftype_v4sf
15196 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
15197 tree v2df_ftype_v4si
15198 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
15199 tree v4si_ftype_v2df
15200 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
15201 tree v2si_ftype_v2df
15202 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
15203 tree v4sf_ftype_v2df
15204 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
15205 tree v2df_ftype_v2si
15206 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
15207 tree v2df_ftype_v4sf
15208 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
15209 tree int_ftype_v2df
15210 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
15211 tree int64_ftype_v2df
15212 = build_function_type_list (long_long_integer_type_node,
15213 V2DF_type_node, NULL_TREE);
15214 tree v2df_ftype_v2df_int
15215 = build_function_type_list (V2DF_type_node,
15216 V2DF_type_node, integer_type_node, NULL_TREE);
15217 tree v2df_ftype_v2df_int64
15218 = build_function_type_list (V2DF_type_node,
15219 V2DF_type_node, long_long_integer_type_node,
15221 tree v4sf_ftype_v4sf_v2df
15222 = build_function_type_list (V4SF_type_node,
15223 V4SF_type_node, V2DF_type_node, NULL_TREE);
15224 tree v2df_ftype_v2df_v4sf
15225 = build_function_type_list (V2DF_type_node,
15226 V2DF_type_node, V4SF_type_node, NULL_TREE);
15227 tree v2df_ftype_v2df_v2df_int
15228 = build_function_type_list (V2DF_type_node,
15229 V2DF_type_node, V2DF_type_node,
15232 tree v2df_ftype_v2df_pcdouble
15233 = build_function_type_list (V2DF_type_node,
15234 V2DF_type_node, pcdouble_type_node, NULL_TREE);
15235 tree void_ftype_pdouble_v2df
15236 = build_function_type_list (void_type_node,
15237 pdouble_type_node, V2DF_type_node, NULL_TREE);
15238 tree void_ftype_pint_int
15239 = build_function_type_list (void_type_node,
15240 pint_type_node, integer_type_node, NULL_TREE);
15241 tree void_ftype_v16qi_v16qi_pchar
15242 = build_function_type_list (void_type_node,
15243 V16QI_type_node, V16QI_type_node,
15244 pchar_type_node, NULL_TREE);
15245 tree v2df_ftype_pcdouble
15246 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
15247 tree v2df_ftype_v2df_v2df
15248 = build_function_type_list (V2DF_type_node,
15249 V2DF_type_node, V2DF_type_node, NULL_TREE);
15250 tree v16qi_ftype_v16qi_v16qi
15251 = build_function_type_list (V16QI_type_node,
15252 V16QI_type_node, V16QI_type_node, NULL_TREE);
15253 tree v8hi_ftype_v8hi_v8hi
15254 = build_function_type_list (V8HI_type_node,
15255 V8HI_type_node, V8HI_type_node, NULL_TREE);
15256 tree v4si_ftype_v4si_v4si
15257 = build_function_type_list (V4SI_type_node,
15258 V4SI_type_node, V4SI_type_node, NULL_TREE);
15259 tree v2di_ftype_v2di_v2di
15260 = build_function_type_list (V2DI_type_node,
15261 V2DI_type_node, V2DI_type_node, NULL_TREE);
15262 tree v2di_ftype_v2df_v2df
15263 = build_function_type_list (V2DI_type_node,
15264 V2DF_type_node, V2DF_type_node, NULL_TREE);
15265 tree v2df_ftype_v2df
15266 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15267 tree v2di_ftype_v2di_int
15268 = build_function_type_list (V2DI_type_node,
15269 V2DI_type_node, integer_type_node, NULL_TREE);
15270 tree v2di_ftype_v2di_v2di_int
15271 = build_function_type_list (V2DI_type_node, V2DI_type_node,
15272 V2DI_type_node, integer_type_node, NULL_TREE);
15273 tree v4si_ftype_v4si_int
15274 = build_function_type_list (V4SI_type_node,
15275 V4SI_type_node, integer_type_node, NULL_TREE);
15276 tree v8hi_ftype_v8hi_int
15277 = build_function_type_list (V8HI_type_node,
15278 V8HI_type_node, integer_type_node, NULL_TREE);
15279 tree v8hi_ftype_v8hi_v2di
15280 = build_function_type_list (V8HI_type_node,
15281 V8HI_type_node, V2DI_type_node, NULL_TREE);
15282 tree v4si_ftype_v4si_v2di
15283 = build_function_type_list (V4SI_type_node,
15284 V4SI_type_node, V2DI_type_node, NULL_TREE);
15285 tree v4si_ftype_v8hi_v8hi
15286 = build_function_type_list (V4SI_type_node,
15287 V8HI_type_node, V8HI_type_node, NULL_TREE);
15288 tree di_ftype_v8qi_v8qi
15289 = build_function_type_list (long_long_unsigned_type_node,
15290 V8QI_type_node, V8QI_type_node, NULL_TREE);
15291 tree di_ftype_v2si_v2si
15292 = build_function_type_list (long_long_unsigned_type_node,
15293 V2SI_type_node, V2SI_type_node, NULL_TREE);
15294 tree v2di_ftype_v16qi_v16qi
15295 = build_function_type_list (V2DI_type_node,
15296 V16QI_type_node, V16QI_type_node, NULL_TREE);
15297 tree v2di_ftype_v4si_v4si
15298 = build_function_type_list (V2DI_type_node,
15299 V4SI_type_node, V4SI_type_node, NULL_TREE);
15300 tree int_ftype_v16qi
15301 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
15302 tree v16qi_ftype_pcchar
15303 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
15304 tree void_ftype_pchar_v16qi
15305 = build_function_type_list (void_type_node,
15306 pchar_type_node, V16QI_type_node, NULL_TREE);
15309 tree float128_type;
15312 /* The __float80 type. */
15313 if (TYPE_MODE (long_double_type_node) == XFmode)
15314 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
15318 /* The __float80 type. */
15319 float80_type = make_node (REAL_TYPE);
15320 TYPE_PRECISION (float80_type) = 80;
15321 layout_type (float80_type);
15322 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
15327 float128_type = make_node (REAL_TYPE);
15328 TYPE_PRECISION (float128_type) = 128;
15329 layout_type (float128_type);
15330 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
15333 /* Add all builtins that are more or less simple operations on two
15335 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15337 /* Use one of the operands; the target can have a different mode for
15338 mask-generating compares. */
15339 enum machine_mode mode;
15344 mode = insn_data[d->icode].operand[1].mode;
15349 type = v16qi_ftype_v16qi_v16qi;
15352 type = v8hi_ftype_v8hi_v8hi;
15355 type = v4si_ftype_v4si_v4si;
15358 type = v2di_ftype_v2di_v2di;
15361 type = v2df_ftype_v2df_v2df;
15364 type = v4sf_ftype_v4sf_v4sf;
15367 type = v8qi_ftype_v8qi_v8qi;
15370 type = v4hi_ftype_v4hi_v4hi;
15373 type = v2si_ftype_v2si_v2si;
15376 type = di_ftype_di_di;
15380 gcc_unreachable ();
15383 /* Override for comparisons. */
15384 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15385 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15386 type = v4si_ftype_v4sf_v4sf;
15388 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15389 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15390 type = v2di_ftype_v2df_v2df;
15392 def_builtin (d->mask, d->name, type, d->code);
15395 /* Add all builtins that are more or less simple operations on 1 operand. */
15396 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15398 enum machine_mode mode;
15403 mode = insn_data[d->icode].operand[1].mode;
15408 type = v16qi_ftype_v16qi;
15411 type = v8hi_ftype_v8hi;
15414 type = v4si_ftype_v4si;
15417 type = v2df_ftype_v2df;
15420 type = v4sf_ftype_v4sf;
15423 type = v8qi_ftype_v8qi;
15426 type = v4hi_ftype_v4hi;
15429 type = v2si_ftype_v2si;
15436 def_builtin (d->mask, d->name, type, d->code);
15439 /* Add the remaining MMX insns with somewhat more complicated types. */
15440 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15441 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15442 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15443 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15445 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15446 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15447 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15449 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15450 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15452 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15453 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15455 /* comi/ucomi insns. */
15456 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15457 if (d->mask == MASK_SSE2)
15458 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15460 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15462 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15463 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15464 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15466 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15467 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15468 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15469 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15470 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15471 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15472 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15473 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15474 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15475 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15476 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15478 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15480 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15481 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15483 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15484 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15485 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15486 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15488 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15489 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15490 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15491 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15493 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15495 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15497 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15498 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15499 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15500 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15501 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15502 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15504 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15506 /* Original 3DNow! */
15507 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15508 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15509 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15515 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15516 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15517 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15518 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15519 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15520 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15521 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15522 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15523 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15524 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15525 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15526 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15528 /* 3DNow! extension as used in the Athlon CPU. */
15529 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15530 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15531 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15532 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15533 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15534 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15537 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15539 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15540 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15542 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15543 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15545 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15546 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15547 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15548 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15549 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15551 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15552 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15553 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15554 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15556 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15557 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15559 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15562 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15566 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15567 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15568 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15573 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15574 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15575 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15577 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15578 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15579 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15581 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15582 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15583 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15584 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15586 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15587 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15588 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15590 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15591 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15593 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15594 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15596 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15597 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15598 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15600 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15601 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15604 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15605 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15607 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15608 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15609 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15610 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15612 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15613 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15614 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15615 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15617 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15618 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15620 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15622 /* Prescott New Instructions. */
15623 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15624 void_ftype_pcvoid_unsigned_unsigned,
15625 IX86_BUILTIN_MONITOR);
15626 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15627 void_ftype_unsigned_unsigned,
15628 IX86_BUILTIN_MWAIT);
15629 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15631 IX86_BUILTIN_MOVSHDUP);
15632 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15634 IX86_BUILTIN_MOVSLDUP);
15635 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15636 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15639 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
15640 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
15641 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
15642 IX86_BUILTIN_PALIGNR);
15644 /* Access to the vec_init patterns. */
15645 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15646 integer_type_node, NULL_TREE);
15647 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15648 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15650 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15651 short_integer_type_node,
15652 short_integer_type_node,
15653 short_integer_type_node, NULL_TREE);
15654 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15655 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15657 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15658 char_type_node, char_type_node,
15659 char_type_node, char_type_node,
15660 char_type_node, char_type_node,
15661 char_type_node, NULL_TREE);
15662 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15663 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15665 /* Access to the vec_extract patterns. */
15666 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15667 integer_type_node, NULL_TREE);
15668 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15669 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15671 ftype = build_function_type_list (long_long_integer_type_node,
15672 V2DI_type_node, integer_type_node,
15674 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15675 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15677 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15678 integer_type_node, NULL_TREE);
15679 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15680 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15682 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15683 integer_type_node, NULL_TREE);
15684 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15685 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15687 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15688 integer_type_node, NULL_TREE);
15689 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15690 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15692 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15693 integer_type_node, NULL_TREE);
15694 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15695 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15697 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15698 integer_type_node, NULL_TREE);
15699 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15700 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15702 /* Access to the vec_set patterns. */
15703 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15705 integer_type_node, NULL_TREE);
15706 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15707 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15709 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15711 integer_type_node, NULL_TREE);
15712 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15713 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15716 /* Errors in the source file can cause expand_expr to return const0_rtx
15717 where we expect a vector. To avoid crashing, use one of the vector
15718 clear instructions. */
15720 safe_vector_operand (rtx x, enum machine_mode mode)
15722 if (x == const0_rtx)
15723 x = CONST0_RTX (mode);
15727 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15730 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15733 tree arg0 = TREE_VALUE (arglist);
15734 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15735 rtx op0 = expand_normal (arg0);
15736 rtx op1 = expand_normal (arg1);
15737 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15738 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15739 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15741 if (VECTOR_MODE_P (mode0))
15742 op0 = safe_vector_operand (op0, mode0);
15743 if (VECTOR_MODE_P (mode1))
15744 op1 = safe_vector_operand (op1, mode1);
15746 if (optimize || !target
15747 || GET_MODE (target) != tmode
15748 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15749 target = gen_reg_rtx (tmode);
15751 if (GET_MODE (op1) == SImode && mode1 == TImode)
15753 rtx x = gen_reg_rtx (V4SImode);
15754 emit_insn (gen_sse2_loadd (x, op1));
15755 op1 = gen_lowpart (TImode, x);
15758 /* The insn must want input operands in the same modes as the
15760 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15761 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15763 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15764 op0 = copy_to_mode_reg (mode0, op0);
15765 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15766 op1 = copy_to_mode_reg (mode1, op1);
15768 /* ??? Using ix86_fixup_binary_operands is problematic when
15769 we've got mismatched modes. Fake it. */
15775 if (tmode == mode0 && tmode == mode1)
15777 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15781 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15783 op0 = force_reg (mode0, op0);
15784 op1 = force_reg (mode1, op1);
15785 target = gen_reg_rtx (tmode);
15788 pat = GEN_FCN (icode) (target, op0, op1);
15795 /* Subroutine of ix86_expand_builtin to take care of stores. */
15798 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15801 tree arg0 = TREE_VALUE (arglist);
15802 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15803 rtx op0 = expand_normal (arg0);
15804 rtx op1 = expand_normal (arg1);
15805 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15806 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15808 if (VECTOR_MODE_P (mode1))
15809 op1 = safe_vector_operand (op1, mode1);
15811 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15812 op1 = copy_to_mode_reg (mode1, op1);
15814 pat = GEN_FCN (icode) (op0, op1);
15820 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15823 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15824 rtx target, int do_load)
15827 tree arg0 = TREE_VALUE (arglist);
15828 rtx op0 = expand_normal (arg0);
15829 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15830 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15832 if (optimize || !target
15833 || GET_MODE (target) != tmode
15834 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15835 target = gen_reg_rtx (tmode);
15837 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15840 if (VECTOR_MODE_P (mode0))
15841 op0 = safe_vector_operand (op0, mode0);
15843 if ((optimize && !register_operand (op0, mode0))
15844 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15845 op0 = copy_to_mode_reg (mode0, op0);
15848 pat = GEN_FCN (icode) (target, op0);
15855 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15856 sqrtss, rsqrtss, rcpss. */
15859 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15862 tree arg0 = TREE_VALUE (arglist);
15863 rtx op1, op0 = expand_normal (arg0);
15864 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15865 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15867 if (optimize || !target
15868 || GET_MODE (target) != tmode
15869 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15870 target = gen_reg_rtx (tmode);
15872 if (VECTOR_MODE_P (mode0))
15873 op0 = safe_vector_operand (op0, mode0);
15875 if ((optimize && !register_operand (op0, mode0))
15876 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15877 op0 = copy_to_mode_reg (mode0, op0);
15880 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15881 op1 = copy_to_mode_reg (mode0, op1);
15883 pat = GEN_FCN (icode) (target, op0, op1);
15890 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15893 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15897 tree arg0 = TREE_VALUE (arglist);
15898 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15899 rtx op0 = expand_normal (arg0);
15900 rtx op1 = expand_normal (arg1);
15902 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15903 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15904 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15905 enum rtx_code comparison = d->comparison;
15907 if (VECTOR_MODE_P (mode0))
15908 op0 = safe_vector_operand (op0, mode0);
15909 if (VECTOR_MODE_P (mode1))
15910 op1 = safe_vector_operand (op1, mode1);
15912 /* Swap operands if we have a comparison that isn't available in
15914 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15916 rtx tmp = gen_reg_rtx (mode1);
15917 emit_move_insn (tmp, op1);
15922 if (optimize || !target
15923 || GET_MODE (target) != tmode
15924 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15925 target = gen_reg_rtx (tmode);
15927 if ((optimize && !register_operand (op0, mode0))
15928 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15929 op0 = copy_to_mode_reg (mode0, op0);
15930 if ((optimize && !register_operand (op1, mode1))
15931 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15932 op1 = copy_to_mode_reg (mode1, op1);
15934 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15935 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15942 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15945 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15949 tree arg0 = TREE_VALUE (arglist);
15950 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15951 rtx op0 = expand_normal (arg0);
15952 rtx op1 = expand_normal (arg1);
15954 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15955 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15956 enum rtx_code comparison = d->comparison;
15958 if (VECTOR_MODE_P (mode0))
15959 op0 = safe_vector_operand (op0, mode0);
15960 if (VECTOR_MODE_P (mode1))
15961 op1 = safe_vector_operand (op1, mode1);
15963 /* Swap operands if we have a comparison that isn't available in
15965 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15972 target = gen_reg_rtx (SImode);
15973 emit_move_insn (target, const0_rtx);
15974 target = gen_rtx_SUBREG (QImode, target, 0);
15976 if ((optimize && !register_operand (op0, mode0))
15977 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15978 op0 = copy_to_mode_reg (mode0, op0);
15979 if ((optimize && !register_operand (op1, mode1))
15980 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15981 op1 = copy_to_mode_reg (mode1, op1);
15983 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15984 pat = GEN_FCN (d->icode) (op0, op1);
15988 emit_insn (gen_rtx_SET (VOIDmode,
15989 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15990 gen_rtx_fmt_ee (comparison, QImode,
15994 return SUBREG_REG (target);
15997 /* Return the integer constant in ARG. Constrain it to be in the range
15998 of the subparts of VEC_TYPE; issue an error if not. */
16001 get_element_number (tree vec_type, tree arg)
16003 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
16005 if (!host_integerp (arg, 1)
16006 || (elt = tree_low_cst (arg, 1), elt > max))
16008 error ("selector must be an integer constant in the range 0..%wi", max);
16015 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16016 ix86_expand_vector_init. We DO have language-level syntax for this, in
16017 the form of (type){ init-list }. Except that since we can't place emms
16018 instructions from inside the compiler, we can't allow the use of MMX
16019 registers unless the user explicitly asks for it. So we do *not* define
16020 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
16021 we have builtins invoked by mmintrin.h that gives us license to emit
16022 these sorts of instructions. */
16025 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
16027 enum machine_mode tmode = TYPE_MODE (type);
16028 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
16029 int i, n_elt = GET_MODE_NUNITS (tmode);
16030 rtvec v = rtvec_alloc (n_elt);
16032 gcc_assert (VECTOR_MODE_P (tmode));
16034 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
16036 rtx x = expand_normal (TREE_VALUE (arglist));
16037 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
16040 gcc_assert (arglist == NULL);
16042 if (!target || !register_operand (target, tmode))
16043 target = gen_reg_rtx (tmode);
16045 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
16049 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16050 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
16051 had a language-level syntax for referencing vector elements. */
16054 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
16056 enum machine_mode tmode, mode0;
16061 arg0 = TREE_VALUE (arglist);
16062 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16064 op0 = expand_normal (arg0);
16065 elt = get_element_number (TREE_TYPE (arg0), arg1);
16067 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16068 mode0 = TYPE_MODE (TREE_TYPE (arg0));
16069 gcc_assert (VECTOR_MODE_P (mode0));
16071 op0 = force_reg (mode0, op0);
16073 if (optimize || !target || !register_operand (target, tmode))
16074 target = gen_reg_rtx (tmode);
16076 ix86_expand_vector_extract (true, target, op0, elt);
16081 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
16082 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
16083 a language-level syntax for referencing vector elements. */
16086 ix86_expand_vec_set_builtin (tree arglist)
16088 enum machine_mode tmode, mode1;
16089 tree arg0, arg1, arg2;
16093 arg0 = TREE_VALUE (arglist);
16094 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16095 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16097 tmode = TYPE_MODE (TREE_TYPE (arg0));
16098 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
16099 gcc_assert (VECTOR_MODE_P (tmode));
16101 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
16102 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
16103 elt = get_element_number (TREE_TYPE (arg0), arg2);
16105 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
16106 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
16108 op0 = force_reg (tmode, op0);
16109 op1 = force_reg (mode1, op1);
16111 ix86_expand_vector_set (true, op0, op1, elt);
16116 /* Expand an expression EXP that calls a built-in function,
16117 with result going to TARGET if that's convenient
16118 (and in mode MODE if that's convenient).
16119 SUBTARGET may be used as the target for computing one of EXP's operands.
16120 IGNORE is nonzero if the value is to be ignored. */
16123 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16124 enum machine_mode mode ATTRIBUTE_UNUSED,
16125 int ignore ATTRIBUTE_UNUSED)
16127 const struct builtin_description *d;
16129 enum insn_code icode;
16130 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
16131 tree arglist = TREE_OPERAND (exp, 1);
16132 tree arg0, arg1, arg2;
16133 rtx op0, op1, op2, pat;
16134 enum machine_mode tmode, mode0, mode1, mode2, mode3;
16135 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
16139 case IX86_BUILTIN_EMMS:
16140 emit_insn (gen_mmx_emms ());
16143 case IX86_BUILTIN_SFENCE:
16144 emit_insn (gen_sse_sfence ());
16147 case IX86_BUILTIN_MASKMOVQ:
16148 case IX86_BUILTIN_MASKMOVDQU:
16149 icode = (fcode == IX86_BUILTIN_MASKMOVQ
16150 ? CODE_FOR_mmx_maskmovq
16151 : CODE_FOR_sse2_maskmovdqu);
16152 /* Note the arg order is different from the operand order. */
16153 arg1 = TREE_VALUE (arglist);
16154 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
16155 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16156 op0 = expand_normal (arg0);
16157 op1 = expand_normal (arg1);
16158 op2 = expand_normal (arg2);
16159 mode0 = insn_data[icode].operand[0].mode;
16160 mode1 = insn_data[icode].operand[1].mode;
16161 mode2 = insn_data[icode].operand[2].mode;
16163 op0 = force_reg (Pmode, op0);
16164 op0 = gen_rtx_MEM (mode1, op0);
16166 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
16167 op0 = copy_to_mode_reg (mode0, op0);
16168 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
16169 op1 = copy_to_mode_reg (mode1, op1);
16170 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
16171 op2 = copy_to_mode_reg (mode2, op2);
16172 pat = GEN_FCN (icode) (op0, op1, op2);
16178 case IX86_BUILTIN_SQRTSS:
16179 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
16180 case IX86_BUILTIN_RSQRTSS:
16181 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
16182 case IX86_BUILTIN_RCPSS:
16183 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
16185 case IX86_BUILTIN_LOADUPS:
16186 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
16188 case IX86_BUILTIN_STOREUPS:
16189 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
16191 case IX86_BUILTIN_LOADHPS:
16192 case IX86_BUILTIN_LOADLPS:
16193 case IX86_BUILTIN_LOADHPD:
16194 case IX86_BUILTIN_LOADLPD:
16195 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
16196 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
16197 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
16198 : CODE_FOR_sse2_loadlpd);
16199 arg0 = TREE_VALUE (arglist);
16200 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16201 op0 = expand_normal (arg0);
16202 op1 = expand_normal (arg1);
16203 tmode = insn_data[icode].operand[0].mode;
16204 mode0 = insn_data[icode].operand[1].mode;
16205 mode1 = insn_data[icode].operand[2].mode;
16207 op0 = force_reg (mode0, op0);
16208 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
16209 if (optimize || target == 0
16210 || GET_MODE (target) != tmode
16211 || !register_operand (target, tmode))
16212 target = gen_reg_rtx (tmode);
16213 pat = GEN_FCN (icode) (target, op0, op1);
16219 case IX86_BUILTIN_STOREHPS:
16220 case IX86_BUILTIN_STORELPS:
16221 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
16222 : CODE_FOR_sse_storelps);
16223 arg0 = TREE_VALUE (arglist);
16224 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16225 op0 = expand_normal (arg0);
16226 op1 = expand_normal (arg1);
16227 mode0 = insn_data[icode].operand[0].mode;
16228 mode1 = insn_data[icode].operand[1].mode;
16230 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16231 op1 = force_reg (mode1, op1);
16233 pat = GEN_FCN (icode) (op0, op1);
16239 case IX86_BUILTIN_MOVNTPS:
16240 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
16241 case IX86_BUILTIN_MOVNTQ:
16242 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
16244 case IX86_BUILTIN_LDMXCSR:
16245 op0 = expand_normal (TREE_VALUE (arglist));
16246 target = assign_386_stack_local (SImode, SLOT_TEMP);
16247 emit_move_insn (target, op0);
16248 emit_insn (gen_sse_ldmxcsr (target));
16251 case IX86_BUILTIN_STMXCSR:
16252 target = assign_386_stack_local (SImode, SLOT_TEMP);
16253 emit_insn (gen_sse_stmxcsr (target));
16254 return copy_to_mode_reg (SImode, target);
16256 case IX86_BUILTIN_SHUFPS:
16257 case IX86_BUILTIN_SHUFPD:
16258 icode = (fcode == IX86_BUILTIN_SHUFPS
16259 ? CODE_FOR_sse_shufps
16260 : CODE_FOR_sse2_shufpd);
16261 arg0 = TREE_VALUE (arglist);
16262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16263 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16264 op0 = expand_normal (arg0);
16265 op1 = expand_normal (arg1);
16266 op2 = expand_normal (arg2);
16267 tmode = insn_data[icode].operand[0].mode;
16268 mode0 = insn_data[icode].operand[1].mode;
16269 mode1 = insn_data[icode].operand[2].mode;
16270 mode2 = insn_data[icode].operand[3].mode;
16272 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16273 op0 = copy_to_mode_reg (mode0, op0);
16274 if ((optimize && !register_operand (op1, mode1))
16275 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
16276 op1 = copy_to_mode_reg (mode1, op1);
16277 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16279 /* @@@ better error message */
16280 error ("mask must be an immediate");
16281 return gen_reg_rtx (tmode);
16283 if (optimize || target == 0
16284 || GET_MODE (target) != tmode
16285 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16286 target = gen_reg_rtx (tmode);
16287 pat = GEN_FCN (icode) (target, op0, op1, op2);
16293 case IX86_BUILTIN_PSHUFW:
16294 case IX86_BUILTIN_PSHUFD:
16295 case IX86_BUILTIN_PSHUFHW:
16296 case IX86_BUILTIN_PSHUFLW:
16297 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
16298 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
16299 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
16300 : CODE_FOR_mmx_pshufw);
16301 arg0 = TREE_VALUE (arglist);
16302 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16303 op0 = expand_normal (arg0);
16304 op1 = expand_normal (arg1);
16305 tmode = insn_data[icode].operand[0].mode;
16306 mode1 = insn_data[icode].operand[1].mode;
16307 mode2 = insn_data[icode].operand[2].mode;
16309 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16310 op0 = copy_to_mode_reg (mode1, op0);
16311 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16313 /* @@@ better error message */
16314 error ("mask must be an immediate");
16318 || GET_MODE (target) != tmode
16319 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16320 target = gen_reg_rtx (tmode);
16321 pat = GEN_FCN (icode) (target, op0, op1);
16327 case IX86_BUILTIN_PSLLDQI128:
16328 case IX86_BUILTIN_PSRLDQI128:
16329 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16330 : CODE_FOR_sse2_lshrti3);
16331 arg0 = TREE_VALUE (arglist);
16332 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16333 op0 = expand_normal (arg0);
16334 op1 = expand_normal (arg1);
16335 tmode = insn_data[icode].operand[0].mode;
16336 mode1 = insn_data[icode].operand[1].mode;
16337 mode2 = insn_data[icode].operand[2].mode;
16339 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16341 op0 = copy_to_reg (op0);
16342 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16346 error ("shift must be an immediate");
16349 target = gen_reg_rtx (V2DImode);
16350 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16356 case IX86_BUILTIN_FEMMS:
16357 emit_insn (gen_mmx_femms ());
16360 case IX86_BUILTIN_PAVGUSB:
16361 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16363 case IX86_BUILTIN_PF2ID:
16364 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16366 case IX86_BUILTIN_PFACC:
16367 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16369 case IX86_BUILTIN_PFADD:
16370 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16372 case IX86_BUILTIN_PFCMPEQ:
16373 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16375 case IX86_BUILTIN_PFCMPGE:
16376 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16378 case IX86_BUILTIN_PFCMPGT:
16379 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16381 case IX86_BUILTIN_PFMAX:
16382 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16384 case IX86_BUILTIN_PFMIN:
16385 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16387 case IX86_BUILTIN_PFMUL:
16388 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16390 case IX86_BUILTIN_PFRCP:
16391 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16393 case IX86_BUILTIN_PFRCPIT1:
16394 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16396 case IX86_BUILTIN_PFRCPIT2:
16397 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16399 case IX86_BUILTIN_PFRSQIT1:
16400 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16402 case IX86_BUILTIN_PFRSQRT:
16403 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16405 case IX86_BUILTIN_PFSUB:
16406 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16408 case IX86_BUILTIN_PFSUBR:
16409 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16411 case IX86_BUILTIN_PI2FD:
16412 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16414 case IX86_BUILTIN_PMULHRW:
16415 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16417 case IX86_BUILTIN_PF2IW:
16418 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16420 case IX86_BUILTIN_PFNACC:
16421 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16423 case IX86_BUILTIN_PFPNACC:
16424 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16426 case IX86_BUILTIN_PI2FW:
16427 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16429 case IX86_BUILTIN_PSWAPDSI:
16430 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16432 case IX86_BUILTIN_PSWAPDSF:
16433 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16435 case IX86_BUILTIN_SQRTSD:
16436 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16437 case IX86_BUILTIN_LOADUPD:
16438 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16439 case IX86_BUILTIN_STOREUPD:
16440 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16442 case IX86_BUILTIN_MFENCE:
16443 emit_insn (gen_sse2_mfence ());
16445 case IX86_BUILTIN_LFENCE:
16446 emit_insn (gen_sse2_lfence ());
16449 case IX86_BUILTIN_CLFLUSH:
16450 arg0 = TREE_VALUE (arglist);
16451 op0 = expand_normal (arg0);
16452 icode = CODE_FOR_sse2_clflush;
16453 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16454 op0 = copy_to_mode_reg (Pmode, op0);
16456 emit_insn (gen_sse2_clflush (op0));
16459 case IX86_BUILTIN_MOVNTPD:
16460 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16461 case IX86_BUILTIN_MOVNTDQ:
16462 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16463 case IX86_BUILTIN_MOVNTI:
16464 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16466 case IX86_BUILTIN_LOADDQU:
16467 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16468 case IX86_BUILTIN_STOREDQU:
16469 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16471 case IX86_BUILTIN_MONITOR:
16472 arg0 = TREE_VALUE (arglist);
16473 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16474 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16475 op0 = expand_normal (arg0);
16476 op1 = expand_normal (arg1);
16477 op2 = expand_normal (arg2);
16479 op0 = copy_to_mode_reg (Pmode, op0);
16481 op1 = copy_to_mode_reg (SImode, op1);
16483 op2 = copy_to_mode_reg (SImode, op2);
16485 emit_insn (gen_sse3_monitor (op0, op1, op2));
16487 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
16490 case IX86_BUILTIN_MWAIT:
16491 arg0 = TREE_VALUE (arglist);
16492 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16493 op0 = expand_normal (arg0);
16494 op1 = expand_normal (arg1);
16496 op0 = copy_to_mode_reg (SImode, op0);
16498 op1 = copy_to_mode_reg (SImode, op1);
16499 emit_insn (gen_sse3_mwait (op0, op1));
16502 case IX86_BUILTIN_LDDQU:
16503 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16506 case IX86_BUILTIN_PALIGNR:
16507 case IX86_BUILTIN_PALIGNR128:
16508 if (fcode == IX86_BUILTIN_PALIGNR)
16510 icode = CODE_FOR_ssse3_palignrdi;
16515 icode = CODE_FOR_ssse3_palignrti;
16518 arg0 = TREE_VALUE (arglist);
16519 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16520 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16521 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
16522 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
16523 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
16524 tmode = insn_data[icode].operand[0].mode;
16525 mode1 = insn_data[icode].operand[1].mode;
16526 mode2 = insn_data[icode].operand[2].mode;
16527 mode3 = insn_data[icode].operand[3].mode;
16529 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16531 op0 = copy_to_reg (op0);
16532 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16534 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16536 op1 = copy_to_reg (op1);
16537 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
16539 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
16541 error ("shift must be an immediate");
16544 target = gen_reg_rtx (mode);
16545 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
16552 case IX86_BUILTIN_VEC_INIT_V2SI:
16553 case IX86_BUILTIN_VEC_INIT_V4HI:
16554 case IX86_BUILTIN_VEC_INIT_V8QI:
16555 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16557 case IX86_BUILTIN_VEC_EXT_V2DF:
16558 case IX86_BUILTIN_VEC_EXT_V2DI:
16559 case IX86_BUILTIN_VEC_EXT_V4SF:
16560 case IX86_BUILTIN_VEC_EXT_V4SI:
16561 case IX86_BUILTIN_VEC_EXT_V8HI:
16562 case IX86_BUILTIN_VEC_EXT_V2SI:
16563 case IX86_BUILTIN_VEC_EXT_V4HI:
16564 return ix86_expand_vec_ext_builtin (arglist, target);
16566 case IX86_BUILTIN_VEC_SET_V8HI:
16567 case IX86_BUILTIN_VEC_SET_V4HI:
16568 return ix86_expand_vec_set_builtin (arglist);
16574 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16575 if (d->code == fcode)
16577 /* Compares are treated specially. */
16578 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16579 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16580 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16581 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16582 return ix86_expand_sse_compare (d, arglist, target);
16584 return ix86_expand_binop_builtin (d->icode, arglist, target);
16587 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16588 if (d->code == fcode)
16589 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16591 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16592 if (d->code == fcode)
16593 return ix86_expand_sse_comi (d, arglist, target);
16595 gcc_unreachable ();
16598 /* Store OPERAND to the memory after reload is completed. This means
16599 that we can't easily use assign_stack_local. */
16601 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16605 gcc_assert (reload_completed);
16606 if (TARGET_RED_ZONE)
16608 result = gen_rtx_MEM (mode,
16609 gen_rtx_PLUS (Pmode,
16611 GEN_INT (-RED_ZONE_SIZE)));
16612 emit_move_insn (result, operand);
16614 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16620 operand = gen_lowpart (DImode, operand);
16624 gen_rtx_SET (VOIDmode,
16625 gen_rtx_MEM (DImode,
16626 gen_rtx_PRE_DEC (DImode,
16627 stack_pointer_rtx)),
16631 gcc_unreachable ();
16633 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16642 split_di (&operand, 1, operands, operands + 1);
16644 gen_rtx_SET (VOIDmode,
16645 gen_rtx_MEM (SImode,
16646 gen_rtx_PRE_DEC (Pmode,
16647 stack_pointer_rtx)),
16650 gen_rtx_SET (VOIDmode,
16651 gen_rtx_MEM (SImode,
16652 gen_rtx_PRE_DEC (Pmode,
16653 stack_pointer_rtx)),
16658 /* Store HImodes as SImodes. */
16659 operand = gen_lowpart (SImode, operand);
16663 gen_rtx_SET (VOIDmode,
16664 gen_rtx_MEM (GET_MODE (operand),
16665 gen_rtx_PRE_DEC (SImode,
16666 stack_pointer_rtx)),
16670 gcc_unreachable ();
16672 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16677 /* Free operand from the memory. */
16679 ix86_free_from_memory (enum machine_mode mode)
16681 if (!TARGET_RED_ZONE)
16685 if (mode == DImode || TARGET_64BIT)
16689 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16690 to pop or add instruction if registers are available. */
16691 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16692 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16697 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16698 QImode must go into class Q_REGS.
16699 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16700 movdf to do mem-to-mem moves through integer regs. */
16702 ix86_preferred_reload_class (rtx x, enum reg_class class)
16704 enum machine_mode mode = GET_MODE (x);
16706 /* We're only allowed to return a subclass of CLASS. Many of the
16707 following checks fail for NO_REGS, so eliminate that early. */
16708 if (class == NO_REGS)
16711 /* All classes can load zeros. */
16712 if (x == CONST0_RTX (mode))
16715 /* Force constants into memory if we are loading a (nonzero) constant into
16716 an MMX or SSE register. This is because there are no MMX/SSE instructions
16717 to load from a constant. */
16719 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
16722 /* Prefer SSE regs only, if we can use them for math. */
16723 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
16724 return SSE_CLASS_P (class) ? class : NO_REGS;
16726 /* Floating-point constants need more complex checks. */
16727 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16729 /* General regs can load everything. */
16730 if (reg_class_subset_p (class, GENERAL_REGS))
16733 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16734 zero above. We only want to wind up preferring 80387 registers if
16735 we plan on doing computation with them. */
16737 && standard_80387_constant_p (x))
16739 /* Limit class to non-sse. */
16740 if (class == FLOAT_SSE_REGS)
16742 if (class == FP_TOP_SSE_REGS)
16744 if (class == FP_SECOND_SSE_REGS)
16745 return FP_SECOND_REG;
16746 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16753 /* Generally when we see PLUS here, it's the function invariant
16754 (plus soft-fp const_int). Which can only be computed into general
16756 if (GET_CODE (x) == PLUS)
16757 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16759 /* QImode constants are easy to load, but non-constant QImode data
16760 must go into Q_REGS. */
16761 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16763 if (reg_class_subset_p (class, Q_REGS))
16765 if (reg_class_subset_p (Q_REGS, class))
16773 /* Discourage putting floating-point values in SSE registers unless
16774 SSE math is being used, and likewise for the 387 registers. */
16776 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
16778 enum machine_mode mode = GET_MODE (x);
16780 /* Restrict the output reload class to the register bank that we are doing
16781 math on. If we would like not to return a subset of CLASS, reject this
16782 alternative: if reload cannot do this, it will still use its choice. */
16783 mode = GET_MODE (x);
16784 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16785 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
16787 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
16789 if (class == FP_TOP_SSE_REGS)
16791 else if (class == FP_SECOND_SSE_REGS)
16792 return FP_SECOND_REG;
16794 return FLOAT_CLASS_P (class) ? class : NO_REGS;
16800 /* If we are copying between general and FP registers, we need a memory
16801 location. The same is true for SSE and MMX registers.
16803 The macro can't work reliably when one of the CLASSES is class containing
16804 registers from multiple units (SSE, MMX, integer). We avoid this by never
16805 combining those units in single alternative in the machine description.
16806 Ensure that this constraint holds to avoid unexpected surprises.
16808 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16809 enforce these sanity checks. */
16812 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16813 enum machine_mode mode, int strict)
16815 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16816 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16817 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16818 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16819 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16820 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16822 gcc_assert (!strict);
16826 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16829 /* ??? This is a lie. We do have moves between mmx/general, and for
16830 mmx/sse2. But by saying we need secondary memory we discourage the
16831 register allocator from using the mmx registers unless needed. */
16832 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16835 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16837 /* SSE1 doesn't have any direct moves from other classes. */
16841 /* If the target says that inter-unit moves are more expensive
16842 than moving through memory, then don't generate them. */
16843 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16846 /* Between SSE and general, we have moves no larger than word size. */
16847 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16850 /* ??? For the cost of one register reformat penalty, we could use
16851 the same instructions to move SFmode and DFmode data, but the
16852 relevant move patterns don't support those alternatives. */
16853 if (mode == SFmode || mode == DFmode)
16860 /* Return true if the registers in CLASS cannot represent the change from
16861 modes FROM to TO. */
16864 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16865 enum reg_class class)
16870 /* x87 registers can't do subreg at all, as all values are reformatted
16871 to extended precision. */
16872 if (MAYBE_FLOAT_CLASS_P (class))
16875 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16877 /* Vector registers do not support QI or HImode loads. If we don't
16878 disallow a change to these modes, reload will assume it's ok to
16879 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16880 the vec_dupv4hi pattern. */
16881 if (GET_MODE_SIZE (from) < 4)
16884 /* Vector registers do not support subreg with nonzero offsets, which
16885 are otherwise valid for integer registers. Since we can't see
16886 whether we have a nonzero offset from here, prohibit all
16887 nonparadoxical subregs changing size. */
16888 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16895 /* Return the cost of moving data from a register in class CLASS1 to
16896 one in class CLASS2.
16898 It is not required that the cost always equal 2 when FROM is the same as TO;
16899 on some machines it is expensive to move between registers if they are not
16900 general registers. */
16903 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16904 enum reg_class class2)
16906 /* In case we require secondary memory, compute cost of the store followed
16907 by load. In order to avoid bad register allocation choices, we need
16908 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16910 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16914 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16915 MEMORY_MOVE_COST (mode, class1, 1));
16916 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16917 MEMORY_MOVE_COST (mode, class2, 1));
16919 /* In case of copying from general_purpose_register we may emit multiple
16920 stores followed by single load causing memory size mismatch stall.
16921 Count this as arbitrarily high cost of 20. */
16922 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16925 /* In the case of FP/MMX moves, the registers actually overlap, and we
16926 have to switch modes in order to treat them differently. */
16927 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16928 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16934 /* Moves between SSE/MMX and integer unit are expensive. */
16935 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16936 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16937 return ix86_cost->mmxsse_to_integer;
16938 if (MAYBE_FLOAT_CLASS_P (class1))
16939 return ix86_cost->fp_move;
16940 if (MAYBE_SSE_CLASS_P (class1))
16941 return ix86_cost->sse_move;
16942 if (MAYBE_MMX_CLASS_P (class1))
16943 return ix86_cost->mmx_move;
16947 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16950 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16952 /* Flags and only flags can only hold CCmode values. */
16953 if (CC_REGNO_P (regno))
16954 return GET_MODE_CLASS (mode) == MODE_CC;
16955 if (GET_MODE_CLASS (mode) == MODE_CC
16956 || GET_MODE_CLASS (mode) == MODE_RANDOM
16957 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16959 if (FP_REGNO_P (regno))
16960 return VALID_FP_MODE_P (mode);
16961 if (SSE_REGNO_P (regno))
16963 /* We implement the move patterns for all vector modes into and
16964 out of SSE registers, even when no operation instructions
16966 return (VALID_SSE_REG_MODE (mode)
16967 || VALID_SSE2_REG_MODE (mode)
16968 || VALID_MMX_REG_MODE (mode)
16969 || VALID_MMX_REG_MODE_3DNOW (mode));
16971 if (MMX_REGNO_P (regno))
16973 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16974 so if the register is available at all, then we can move data of
16975 the given mode into or out of it. */
16976 return (VALID_MMX_REG_MODE (mode)
16977 || VALID_MMX_REG_MODE_3DNOW (mode));
16980 if (mode == QImode)
16982 /* Take care for QImode values - they can be in non-QI regs,
16983 but then they do cause partial register stalls. */
16984 if (regno < 4 || TARGET_64BIT)
16986 if (!TARGET_PARTIAL_REG_STALL)
16988 return reload_in_progress || reload_completed;
16990 /* We handle both integer and floats in the general purpose registers. */
16991 else if (VALID_INT_MODE_P (mode))
16993 else if (VALID_FP_MODE_P (mode))
16995 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16996 on to use that value in smaller contexts, this can easily force a
16997 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16998 supporting DImode, allow it. */
16999 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
17005 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
17006 tieable integer mode. */
17009 ix86_tieable_integer_mode_p (enum machine_mode mode)
17018 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
17021 return TARGET_64BIT;
17028 /* Return true if MODE1 is accessible in a register that can hold MODE2
17029 without copying. That is, all register classes that can hold MODE2
17030 can also hold MODE1. */
17033 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
17035 if (mode1 == mode2)
17038 if (ix86_tieable_integer_mode_p (mode1)
17039 && ix86_tieable_integer_mode_p (mode2))
17042 /* MODE2 being XFmode implies fp stack or general regs, which means we
17043 can tie any smaller floating point modes to it. Note that we do not
17044 tie this with TFmode. */
17045 if (mode2 == XFmode)
17046 return mode1 == SFmode || mode1 == DFmode;
17048 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
17049 that we can tie it with SFmode. */
17050 if (mode2 == DFmode)
17051 return mode1 == SFmode;
17053 /* If MODE2 is only appropriate for an SSE register, then tie with
17054 any other mode acceptable to SSE registers. */
17055 if (GET_MODE_SIZE (mode2) >= 8
17056 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
17057 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
17059 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
17060 with any other mode acceptable to MMX registers. */
17061 if (GET_MODE_SIZE (mode2) == 8
17062 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
17063 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
17068 /* Return the cost of moving data of mode M between a
17069 register and memory. A value of 2 is the default; this cost is
17070 relative to those in `REGISTER_MOVE_COST'.
17072 If moving between registers and memory is more expensive than
17073 between two registers, you should define this macro to express the
17076 Model also increased moving costs of QImode registers in non
17080 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
17082 if (FLOAT_CLASS_P (class))
17099 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
17101 if (SSE_CLASS_P (class))
17104 switch (GET_MODE_SIZE (mode))
17118 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
17120 if (MMX_CLASS_P (class))
17123 switch (GET_MODE_SIZE (mode))
17134 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
17136 switch (GET_MODE_SIZE (mode))
17140 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
17141 : ix86_cost->movzbl_load);
17143 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
17144 : ix86_cost->int_store[0] + 4);
17147 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
17149 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
17150 if (mode == TFmode)
17152 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
17153 * (((int) GET_MODE_SIZE (mode)
17154 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
17158 /* Compute a (partial) cost for rtx X. Return true if the complete
17159 cost has been computed, and false if subexpressions should be
17160 scanned. In either case, *TOTAL contains the cost result. */
17163 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
17165 enum machine_mode mode = GET_MODE (x);
17173 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
17175 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
17177 else if (flag_pic && SYMBOLIC_CONST (x)
17179 || (!GET_CODE (x) != LABEL_REF
17180 && (GET_CODE (x) != SYMBOL_REF
17181 || !SYMBOL_REF_LOCAL_P (x)))))
17188 if (mode == VOIDmode)
17191 switch (standard_80387_constant_p (x))
17196 default: /* Other constants */
17201 /* Start with (MEM (SYMBOL_REF)), since that's where
17202 it'll probably end up. Add a penalty for size. */
17203 *total = (COSTS_N_INSNS (1)
17204 + (flag_pic != 0 && !TARGET_64BIT)
17205 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
17211 /* The zero extensions is often completely free on x86_64, so make
17212 it as cheap as possible. */
17213 if (TARGET_64BIT && mode == DImode
17214 && GET_MODE (XEXP (x, 0)) == SImode)
17216 else if (TARGET_ZERO_EXTEND_WITH_AND)
17217 *total = ix86_cost->add;
17219 *total = ix86_cost->movzx;
17223 *total = ix86_cost->movsx;
17227 if (GET_CODE (XEXP (x, 1)) == CONST_INT
17228 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
17230 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17233 *total = ix86_cost->add;
17236 if ((value == 2 || value == 3)
17237 && ix86_cost->lea <= ix86_cost->shift_const)
17239 *total = ix86_cost->lea;
17249 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
17251 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17253 if (INTVAL (XEXP (x, 1)) > 32)
17254 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
17256 *total = ix86_cost->shift_const * 2;
17260 if (GET_CODE (XEXP (x, 1)) == AND)
17261 *total = ix86_cost->shift_var * 2;
17263 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
17268 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17269 *total = ix86_cost->shift_const;
17271 *total = ix86_cost->shift_var;
17276 if (FLOAT_MODE_P (mode))
17278 *total = ix86_cost->fmul;
17283 rtx op0 = XEXP (x, 0);
17284 rtx op1 = XEXP (x, 1);
17286 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17288 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
17289 for (nbits = 0; value != 0; value &= value - 1)
17293 /* This is arbitrary. */
17296 /* Compute costs correctly for widening multiplication. */
17297 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
17298 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
17299 == GET_MODE_SIZE (mode))
17301 int is_mulwiden = 0;
17302 enum machine_mode inner_mode = GET_MODE (op0);
17304 if (GET_CODE (op0) == GET_CODE (op1))
17305 is_mulwiden = 1, op1 = XEXP (op1, 0);
17306 else if (GET_CODE (op1) == CONST_INT)
17308 if (GET_CODE (op0) == SIGN_EXTEND)
17309 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
17312 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
17316 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
17319 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
17320 + nbits * ix86_cost->mult_bit
17321 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
17330 if (FLOAT_MODE_P (mode))
17331 *total = ix86_cost->fdiv;
17333 *total = ix86_cost->divide[MODE_INDEX (mode)];
17337 if (FLOAT_MODE_P (mode))
17338 *total = ix86_cost->fadd;
17339 else if (GET_MODE_CLASS (mode) == MODE_INT
17340 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
17342 if (GET_CODE (XEXP (x, 0)) == PLUS
17343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
17344 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
17345 && CONSTANT_P (XEXP (x, 1)))
17347 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
17348 if (val == 2 || val == 4 || val == 8)
17350 *total = ix86_cost->lea;
17351 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17352 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
17354 *total += rtx_cost (XEXP (x, 1), outer_code);
17358 else if (GET_CODE (XEXP (x, 0)) == MULT
17359 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
17361 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
17362 if (val == 2 || val == 4 || val == 8)
17364 *total = ix86_cost->lea;
17365 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17366 *total += rtx_cost (XEXP (x, 1), outer_code);
17370 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17372 *total = ix86_cost->lea;
17373 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17374 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17375 *total += rtx_cost (XEXP (x, 1), outer_code);
17382 if (FLOAT_MODE_P (mode))
17384 *total = ix86_cost->fadd;
17392 if (!TARGET_64BIT && mode == DImode)
17394 *total = (ix86_cost->add * 2
17395 + (rtx_cost (XEXP (x, 0), outer_code)
17396 << (GET_MODE (XEXP (x, 0)) != DImode))
17397 + (rtx_cost (XEXP (x, 1), outer_code)
17398 << (GET_MODE (XEXP (x, 1)) != DImode)));
17404 if (FLOAT_MODE_P (mode))
17406 *total = ix86_cost->fchs;
17412 if (!TARGET_64BIT && mode == DImode)
17413 *total = ix86_cost->add * 2;
17415 *total = ix86_cost->add;
17419 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17420 && XEXP (XEXP (x, 0), 1) == const1_rtx
17421 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17422 && XEXP (x, 1) == const0_rtx)
17424 /* This kind of construct is implemented using test[bwl].
17425 Treat it as if we had an AND. */
17426 *total = (ix86_cost->add
17427 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17428 + rtx_cost (const1_rtx, outer_code));
17434 if (!TARGET_SSE_MATH
17436 || (mode == DFmode && !TARGET_SSE2))
17437 /* For standard 80387 constants, raise the cost to prevent
17438 compress_float_constant() to generate load from memory. */
17439 switch (standard_80387_constant_p (XEXP (x, 0)))
17449 *total = (x86_ext_80387_constants & TUNEMASK
17456 if (FLOAT_MODE_P (mode))
17457 *total = ix86_cost->fabs;
17461 if (FLOAT_MODE_P (mode))
17462 *total = ix86_cost->fsqrt;
17466 if (XINT (x, 1) == UNSPEC_TP)
17477 static int current_machopic_label_num;
17479 /* Given a symbol name and its associated stub, write out the
17480 definition of the stub. */
17483 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17485 unsigned int length;
17486 char *binder_name, *symbol_name, lazy_ptr_name[32];
17487 int label = ++current_machopic_label_num;
17489 /* For 64-bit we shouldn't get here. */
17490 gcc_assert (!TARGET_64BIT);
17492 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17493 symb = (*targetm.strip_name_encoding) (symb);
17495 length = strlen (stub);
17496 binder_name = alloca (length + 32);
17497 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17499 length = strlen (symb);
17500 symbol_name = alloca (length + 32);
17501 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17503 sprintf (lazy_ptr_name, "L%d$lz", label);
17506 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17508 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17510 fprintf (file, "%s:\n", stub);
17511 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17515 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
17516 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17517 fprintf (file, "\tjmp\t*%%edx\n");
17520 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
17522 fprintf (file, "%s:\n", binder_name);
17526 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17527 fprintf (file, "\tpushl\t%%eax\n");
17530 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
17532 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
17534 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17535 fprintf (file, "%s:\n", lazy_ptr_name);
17536 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17537 fprintf (file, "\t.long %s\n", binder_name);
17541 darwin_x86_file_end (void)
17543 darwin_file_end ();
17546 #endif /* TARGET_MACHO */
17548 /* Order the registers for register allocator. */
17551 x86_order_regs_for_local_alloc (void)
17556 /* First allocate the local general purpose registers. */
17557 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17558 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17559 reg_alloc_order [pos++] = i;
17561 /* Global general purpose registers. */
17562 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17563 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17564 reg_alloc_order [pos++] = i;
17566 /* x87 registers come first in case we are doing FP math
17568 if (!TARGET_SSE_MATH)
17569 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17570 reg_alloc_order [pos++] = i;
17572 /* SSE registers. */
17573 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17574 reg_alloc_order [pos++] = i;
17575 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17576 reg_alloc_order [pos++] = i;
17578 /* x87 registers. */
17579 if (TARGET_SSE_MATH)
17580 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17581 reg_alloc_order [pos++] = i;
17583 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17584 reg_alloc_order [pos++] = i;
17586 /* Initialize the rest of array as we do not allocate some registers
17588 while (pos < FIRST_PSEUDO_REGISTER)
17589 reg_alloc_order [pos++] = 0;
17592 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17593 struct attribute_spec.handler. */
17595 ix86_handle_struct_attribute (tree *node, tree name,
17596 tree args ATTRIBUTE_UNUSED,
17597 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17600 if (DECL_P (*node))
17602 if (TREE_CODE (*node) == TYPE_DECL)
17603 type = &TREE_TYPE (*node);
17608 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17609 || TREE_CODE (*type) == UNION_TYPE)))
17611 warning (OPT_Wattributes, "%qs attribute ignored",
17612 IDENTIFIER_POINTER (name));
17613 *no_add_attrs = true;
17616 else if ((is_attribute_p ("ms_struct", name)
17617 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17618 || ((is_attribute_p ("gcc_struct", name)
17619 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17621 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17622 IDENTIFIER_POINTER (name));
17623 *no_add_attrs = true;
17630 ix86_ms_bitfield_layout_p (tree record_type)
17632 return (TARGET_MS_BITFIELD_LAYOUT &&
17633 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17634 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17637 /* Returns an expression indicating where the this parameter is
17638 located on entry to the FUNCTION. */
17641 x86_this_parameter (tree function)
17643 tree type = TREE_TYPE (function);
17647 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17648 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17651 if (ix86_function_regparm (type, function) > 0)
17655 parm = TYPE_ARG_TYPES (type);
17656 /* Figure out whether or not the function has a variable number of
17658 for (; parm; parm = TREE_CHAIN (parm))
17659 if (TREE_VALUE (parm) == void_type_node)
17661 /* If not, the this parameter is in the first argument. */
17665 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17667 return gen_rtx_REG (SImode, regno);
17671 if (aggregate_value_p (TREE_TYPE (type), type))
17672 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17674 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17677 /* Determine whether x86_output_mi_thunk can succeed. */
17680 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17681 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17682 HOST_WIDE_INT vcall_offset, tree function)
17684 /* 64-bit can handle anything. */
17688 /* For 32-bit, everything's fine if we have one free register. */
17689 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17692 /* Need a free register for vcall_offset. */
17696 /* Need a free register for GOT references. */
17697 if (flag_pic && !(*targetm.binds_local_p) (function))
17700 /* Otherwise ok. */
17704 /* Output the assembler code for a thunk function. THUNK_DECL is the
17705 declaration for the thunk function itself, FUNCTION is the decl for
17706 the target function. DELTA is an immediate constant offset to be
17707 added to THIS. If VCALL_OFFSET is nonzero, the word at
17708 *(*this + vcall_offset) should be added to THIS. */
17711 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17712 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17713 HOST_WIDE_INT vcall_offset, tree function)
17716 rtx this = x86_this_parameter (function);
17719 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17720 pull it in now and let DELTA benefit. */
17723 else if (vcall_offset)
17725 /* Put the this parameter into %eax. */
17727 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17728 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17731 this_reg = NULL_RTX;
17733 /* Adjust the this parameter by a fixed constant. */
17736 xops[0] = GEN_INT (delta);
17737 xops[1] = this_reg ? this_reg : this;
17740 if (!x86_64_general_operand (xops[0], DImode))
17742 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17744 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17748 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17751 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17754 /* Adjust the this parameter by a value stored in the vtable. */
17758 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17761 int tmp_regno = 2 /* ECX */;
17762 if (lookup_attribute ("fastcall",
17763 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17764 tmp_regno = 0 /* EAX */;
17765 tmp = gen_rtx_REG (SImode, tmp_regno);
17768 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17771 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17773 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17775 /* Adjust the this parameter. */
17776 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17777 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17779 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17780 xops[0] = GEN_INT (vcall_offset);
17782 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17783 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17785 xops[1] = this_reg;
17787 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17789 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17792 /* If necessary, drop THIS back to its stack slot. */
17793 if (this_reg && this_reg != this)
17795 xops[0] = this_reg;
17797 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17800 xops[0] = XEXP (DECL_RTL (function), 0);
17803 if (!flag_pic || (*targetm.binds_local_p) (function))
17804 output_asm_insn ("jmp\t%P0", xops);
17807 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17808 tmp = gen_rtx_CONST (Pmode, tmp);
17809 tmp = gen_rtx_MEM (QImode, tmp);
17811 output_asm_insn ("jmp\t%A0", xops);
17816 if (!flag_pic || (*targetm.binds_local_p) (function))
17817 output_asm_insn ("jmp\t%P0", xops);
17822 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17823 tmp = (gen_rtx_SYMBOL_REF
17825 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17826 tmp = gen_rtx_MEM (QImode, tmp);
17828 output_asm_insn ("jmp\t%0", xops);
17831 #endif /* TARGET_MACHO */
17833 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17834 output_set_got (tmp, NULL_RTX);
17837 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17838 output_asm_insn ("jmp\t{*}%1", xops);
17844 x86_file_start (void)
17846 default_file_start ();
17848 darwin_file_start ();
17850 if (X86_FILE_START_VERSION_DIRECTIVE)
17851 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17852 if (X86_FILE_START_FLTUSED)
17853 fputs ("\t.global\t__fltused\n", asm_out_file);
17854 if (ix86_asm_dialect == ASM_INTEL)
17855 fputs ("\t.intel_syntax\n", asm_out_file);
17859 x86_field_alignment (tree field, int computed)
17861 enum machine_mode mode;
17862 tree type = TREE_TYPE (field);
17864 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17866 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17867 ? get_inner_array_type (type) : type);
17868 if (mode == DFmode || mode == DCmode
17869 || GET_MODE_CLASS (mode) == MODE_INT
17870 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17871 return MIN (32, computed);
17875 /* Output assembler code to FILE to increment profiler label # LABELNO
17876 for profiling a function entry. */
17878 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17883 #ifndef NO_PROFILE_COUNTERS
17884 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17886 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17890 #ifndef NO_PROFILE_COUNTERS
17891 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17893 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17897 #ifndef NO_PROFILE_COUNTERS
17898 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17899 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17901 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17905 #ifndef NO_PROFILE_COUNTERS
17906 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17907 PROFILE_COUNT_REGISTER);
17909 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17913 /* We don't have exact information about the insn sizes, but we may assume
17914 quite safely that we are informed about all 1 byte insns and memory
17915 address sizes. This is enough to eliminate unnecessary padding in
17919 min_insn_size (rtx insn)
17923 if (!INSN_P (insn) || !active_insn_p (insn))
17926 /* Discard alignments we've emit and jump instructions. */
17927 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17928 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17930 if (GET_CODE (insn) == JUMP_INSN
17931 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17932 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17935 /* Important case - calls are always 5 bytes.
17936 It is common to have many calls in the row. */
17937 if (GET_CODE (insn) == CALL_INSN
17938 && symbolic_reference_mentioned_p (PATTERN (insn))
17939 && !SIBLING_CALL_P (insn))
17941 if (get_attr_length (insn) <= 1)
17944 /* For normal instructions we may rely on the sizes of addresses
17945 and the presence of symbol to require 4 bytes of encoding.
17946 This is not the case for jumps where references are PC relative. */
17947 if (GET_CODE (insn) != JUMP_INSN)
17949 l = get_attr_length_address (insn);
17950 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17959 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17963 ix86_avoid_jump_misspredicts (void)
17965 rtx insn, start = get_insns ();
17966 int nbytes = 0, njumps = 0;
17969 /* Look for all minimal intervals of instructions containing 4 jumps.
17970 The intervals are bounded by START and INSN. NBYTES is the total
17971 size of instructions in the interval including INSN and not including
17972 START. When the NBYTES is smaller than 16 bytes, it is possible
17973 that the end of START and INSN ends up in the same 16byte page.
17975 The smallest offset in the page INSN can start is the case where START
17976 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17977 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17979 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17982 nbytes += min_insn_size (insn);
17984 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17985 INSN_UID (insn), min_insn_size (insn));
17986 if ((GET_CODE (insn) == JUMP_INSN
17987 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17988 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17989 || GET_CODE (insn) == CALL_INSN)
17996 start = NEXT_INSN (start);
17997 if ((GET_CODE (start) == JUMP_INSN
17998 && GET_CODE (PATTERN (start)) != ADDR_VEC
17999 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
18000 || GET_CODE (start) == CALL_INSN)
18001 njumps--, isjump = 1;
18004 nbytes -= min_insn_size (start);
18006 gcc_assert (njumps >= 0);
18008 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
18009 INSN_UID (start), INSN_UID (insn), nbytes);
18011 if (njumps == 3 && isjump && nbytes < 16)
18013 int padsize = 15 - nbytes + min_insn_size (insn);
18016 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
18017 INSN_UID (insn), padsize);
18018 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
18023 /* AMD Athlon works faster
18024 when RET is not destination of conditional jump or directly preceded
18025 by other jump instruction. We avoid the penalty by inserting NOP just
18026 before the RET instructions in such cases. */
18028 ix86_pad_returns (void)
18033 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
18035 basic_block bb = e->src;
18036 rtx ret = BB_END (bb);
18038 bool replace = false;
18040 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
18041 || !maybe_hot_bb_p (bb))
18043 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
18044 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
18046 if (prev && GET_CODE (prev) == CODE_LABEL)
18051 FOR_EACH_EDGE (e, ei, bb->preds)
18052 if (EDGE_FREQUENCY (e) && e->src->index >= 0
18053 && !(e->flags & EDGE_FALLTHRU))
18058 prev = prev_active_insn (ret);
18060 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
18061 || GET_CODE (prev) == CALL_INSN))
18063 /* Empty functions get branch mispredict even when the jump destination
18064 is not visible to us. */
18065 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
18070 emit_insn_before (gen_return_internal_long (), ret);
18076 /* Implement machine specific optimizations. We implement padding of returns
18077 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
18081 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
18082 ix86_pad_returns ();
18083 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
18084 ix86_avoid_jump_misspredicts ();
18087 /* Return nonzero when QImode register that must be represented via REX prefix
18090 x86_extended_QIreg_mentioned_p (rtx insn)
18093 extract_insn_cached (insn);
18094 for (i = 0; i < recog_data.n_operands; i++)
18095 if (REG_P (recog_data.operand[i])
18096 && REGNO (recog_data.operand[i]) >= 4)
18101 /* Return nonzero when P points to register encoded via REX prefix.
18102 Called via for_each_rtx. */
18104 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
18106 unsigned int regno;
18109 regno = REGNO (*p);
18110 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
18113 /* Return true when INSN mentions register that must be encoded using REX
18116 x86_extended_reg_mentioned_p (rtx insn)
18118 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
18121 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
18122 optabs would emit if we didn't have TFmode patterns. */
18125 x86_emit_floatuns (rtx operands[2])
18127 rtx neglab, donelab, i0, i1, f0, in, out;
18128 enum machine_mode mode, inmode;
18130 inmode = GET_MODE (operands[1]);
18131 gcc_assert (inmode == SImode || inmode == DImode);
18134 in = force_reg (inmode, operands[1]);
18135 mode = GET_MODE (out);
18136 neglab = gen_label_rtx ();
18137 donelab = gen_label_rtx ();
18138 i1 = gen_reg_rtx (Pmode);
18139 f0 = gen_reg_rtx (mode);
18141 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
18143 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
18144 emit_jump_insn (gen_jump (donelab));
18147 emit_label (neglab);
18149 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18150 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
18151 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
18152 expand_float (f0, i0, 0);
18153 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
18155 emit_label (donelab);
18158 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18159 with all elements equal to VAR. Return true if successful. */
18162 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
18163 rtx target, rtx val)
18165 enum machine_mode smode, wsmode, wvmode;
18180 val = force_reg (GET_MODE_INNER (mode), val);
18181 x = gen_rtx_VEC_DUPLICATE (mode, val);
18182 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18188 if (TARGET_SSE || TARGET_3DNOW_A)
18190 val = gen_lowpart (SImode, val);
18191 x = gen_rtx_TRUNCATE (HImode, val);
18192 x = gen_rtx_VEC_DUPLICATE (mode, x);
18193 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18215 /* Extend HImode to SImode using a paradoxical SUBREG. */
18216 tmp1 = gen_reg_rtx (SImode);
18217 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18218 /* Insert the SImode value as low element of V4SImode vector. */
18219 tmp2 = gen_reg_rtx (V4SImode);
18220 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18221 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18222 CONST0_RTX (V4SImode),
18224 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18225 /* Cast the V4SImode vector back to a V8HImode vector. */
18226 tmp1 = gen_reg_rtx (V8HImode);
18227 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
18228 /* Duplicate the low short through the whole low SImode word. */
18229 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
18230 /* Cast the V8HImode vector back to a V4SImode vector. */
18231 tmp2 = gen_reg_rtx (V4SImode);
18232 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18233 /* Replicate the low element of the V4SImode vector. */
18234 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18235 /* Cast the V2SImode back to V8HImode, and store in target. */
18236 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
18247 /* Extend QImode to SImode using a paradoxical SUBREG. */
18248 tmp1 = gen_reg_rtx (SImode);
18249 emit_move_insn (tmp1, gen_lowpart (SImode, val));
18250 /* Insert the SImode value as low element of V4SImode vector. */
18251 tmp2 = gen_reg_rtx (V4SImode);
18252 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
18253 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
18254 CONST0_RTX (V4SImode),
18256 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
18257 /* Cast the V4SImode vector back to a V16QImode vector. */
18258 tmp1 = gen_reg_rtx (V16QImode);
18259 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
18260 /* Duplicate the low byte through the whole low SImode word. */
18261 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18262 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
18263 /* Cast the V16QImode vector back to a V4SImode vector. */
18264 tmp2 = gen_reg_rtx (V4SImode);
18265 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
18266 /* Replicate the low element of the V4SImode vector. */
18267 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
18268 /* Cast the V2SImode back to V16QImode, and store in target. */
18269 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
18277 /* Replicate the value once into the next wider mode and recurse. */
18278 val = convert_modes (wsmode, smode, val, true);
18279 x = expand_simple_binop (wsmode, ASHIFT, val,
18280 GEN_INT (GET_MODE_BITSIZE (smode)),
18281 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18282 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
18284 x = gen_reg_rtx (wvmode);
18285 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
18286 gcc_unreachable ();
18287 emit_move_insn (target, gen_lowpart (mode, x));
18295 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18296 whose ONE_VAR element is VAR, and other elements are zero. Return true
18300 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
18301 rtx target, rtx var, int one_var)
18303 enum machine_mode vsimode;
18319 var = force_reg (GET_MODE_INNER (mode), var);
18320 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
18321 emit_insn (gen_rtx_SET (VOIDmode, target, x));
18326 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
18327 new_target = gen_reg_rtx (mode);
18329 new_target = target;
18330 var = force_reg (GET_MODE_INNER (mode), var);
18331 x = gen_rtx_VEC_DUPLICATE (mode, var);
18332 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
18333 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
18336 /* We need to shuffle the value to the correct position, so
18337 create a new pseudo to store the intermediate result. */
18339 /* With SSE2, we can use the integer shuffle insns. */
18340 if (mode != V4SFmode && TARGET_SSE2)
18342 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
18344 GEN_INT (one_var == 1 ? 0 : 1),
18345 GEN_INT (one_var == 2 ? 0 : 1),
18346 GEN_INT (one_var == 3 ? 0 : 1)));
18347 if (target != new_target)
18348 emit_move_insn (target, new_target);
18352 /* Otherwise convert the intermediate result to V4SFmode and
18353 use the SSE1 shuffle instructions. */
18354 if (mode != V4SFmode)
18356 tmp = gen_reg_rtx (V4SFmode);
18357 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
18362 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
18364 GEN_INT (one_var == 1 ? 0 : 1),
18365 GEN_INT (one_var == 2 ? 0+4 : 1+4),
18366 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
18368 if (mode != V4SFmode)
18369 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
18370 else if (tmp != target)
18371 emit_move_insn (target, tmp);
18373 else if (target != new_target)
18374 emit_move_insn (target, new_target);
18379 vsimode = V4SImode;
18385 vsimode = V2SImode;
18391 /* Zero extend the variable element to SImode and recurse. */
18392 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
18394 x = gen_reg_rtx (vsimode);
18395 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
18397 gcc_unreachable ();
18399 emit_move_insn (target, gen_lowpart (mode, x));
18407 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
18408 consisting of the values in VALS. It is known that all elements
18409 except ONE_VAR are constants. Return true if successful. */
18412 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
18413 rtx target, rtx vals, int one_var)
18415 rtx var = XVECEXP (vals, 0, one_var);
18416 enum machine_mode wmode;
18419 const_vec = copy_rtx (vals);
18420 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
18421 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
18429 /* For the two element vectors, it's just as easy to use
18430 the general case. */
18446 /* There's no way to set one QImode entry easily. Combine
18447 the variable value with its adjacent constant value, and
18448 promote to an HImode set. */
18449 x = XVECEXP (vals, 0, one_var ^ 1);
18452 var = convert_modes (HImode, QImode, var, true);
18453 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
18454 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18455 x = GEN_INT (INTVAL (x) & 0xff);
18459 var = convert_modes (HImode, QImode, var, true);
18460 x = gen_int_mode (INTVAL (x) << 8, HImode);
18462 if (x != const0_rtx)
18463 var = expand_simple_binop (HImode, IOR, var, x, var,
18464 1, OPTAB_LIB_WIDEN);
18466 x = gen_reg_rtx (wmode);
18467 emit_move_insn (x, gen_lowpart (wmode, const_vec));
18468 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
18470 emit_move_insn (target, gen_lowpart (mode, x));
18477 emit_move_insn (target, const_vec);
18478 ix86_expand_vector_set (mmx_ok, target, var, one_var);
18482 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
18483 all values variable, and none identical. */
18486 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
18487 rtx target, rtx vals)
18489 enum machine_mode half_mode = GET_MODE_INNER (mode);
18490 rtx op0 = NULL, op1 = NULL;
18491 bool use_vec_concat = false;
18497 if (!mmx_ok && !TARGET_SSE)
18503 /* For the two element vectors, we always implement VEC_CONCAT. */
18504 op0 = XVECEXP (vals, 0, 0);
18505 op1 = XVECEXP (vals, 0, 1);
18506 use_vec_concat = true;
18510 half_mode = V2SFmode;
18513 half_mode = V2SImode;
18519 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18520 Recurse to load the two halves. */
18522 op0 = gen_reg_rtx (half_mode);
18523 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18524 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18526 op1 = gen_reg_rtx (half_mode);
18527 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18528 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18530 use_vec_concat = true;
18541 gcc_unreachable ();
18544 if (use_vec_concat)
18546 if (!register_operand (op0, half_mode))
18547 op0 = force_reg (half_mode, op0);
18548 if (!register_operand (op1, half_mode))
18549 op1 = force_reg (half_mode, op1);
18551 emit_insn (gen_rtx_SET (VOIDmode, target,
18552 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18556 int i, j, n_elts, n_words, n_elt_per_word;
18557 enum machine_mode inner_mode;
18558 rtx words[4], shift;
18560 inner_mode = GET_MODE_INNER (mode);
18561 n_elts = GET_MODE_NUNITS (mode);
18562 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18563 n_elt_per_word = n_elts / n_words;
18564 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18566 for (i = 0; i < n_words; ++i)
18568 rtx word = NULL_RTX;
18570 for (j = 0; j < n_elt_per_word; ++j)
18572 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18573 elt = convert_modes (word_mode, inner_mode, elt, true);
18579 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18580 word, 1, OPTAB_LIB_WIDEN);
18581 word = expand_simple_binop (word_mode, IOR, word, elt,
18582 word, 1, OPTAB_LIB_WIDEN);
18590 emit_move_insn (target, gen_lowpart (mode, words[0]));
18591 else if (n_words == 2)
18593 rtx tmp = gen_reg_rtx (mode);
18594 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18595 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18596 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18597 emit_move_insn (target, tmp);
18599 else if (n_words == 4)
18601 rtx tmp = gen_reg_rtx (V4SImode);
18602 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18603 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18604 emit_move_insn (target, gen_lowpart (mode, tmp));
18607 gcc_unreachable ();
18611 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18612 instructions unless MMX_OK is true. */
18615 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18617 enum machine_mode mode = GET_MODE (target);
18618 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18619 int n_elts = GET_MODE_NUNITS (mode);
18620 int n_var = 0, one_var = -1;
18621 bool all_same = true, all_const_zero = true;
18625 for (i = 0; i < n_elts; ++i)
18627 x = XVECEXP (vals, 0, i);
18628 if (!CONSTANT_P (x))
18629 n_var++, one_var = i;
18630 else if (x != CONST0_RTX (inner_mode))
18631 all_const_zero = false;
18632 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18636 /* Constants are best loaded from the constant pool. */
18639 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18643 /* If all values are identical, broadcast the value. */
18645 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18646 XVECEXP (vals, 0, 0)))
18649 /* Values where only one field is non-constant are best loaded from
18650 the pool and overwritten via move later. */
18654 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
18655 XVECEXP (vals, 0, one_var),
18659 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18663 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18667 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18669 enum machine_mode mode = GET_MODE (target);
18670 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18671 bool use_vec_merge = false;
18680 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18681 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18683 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18685 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18686 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18696 /* For the two element vectors, we implement a VEC_CONCAT with
18697 the extraction of the other element. */
18699 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18700 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18703 op0 = val, op1 = tmp;
18705 op0 = tmp, op1 = val;
18707 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18708 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18716 use_vec_merge = true;
18720 /* tmp = target = A B C D */
18721 tmp = copy_to_reg (target);
18722 /* target = A A B B */
18723 emit_insn (gen_sse_unpcklps (target, target, target));
18724 /* target = X A B B */
18725 ix86_expand_vector_set (false, target, val, 0);
18726 /* target = A X C D */
18727 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18728 GEN_INT (1), GEN_INT (0),
18729 GEN_INT (2+4), GEN_INT (3+4)));
18733 /* tmp = target = A B C D */
18734 tmp = copy_to_reg (target);
18735 /* tmp = X B C D */
18736 ix86_expand_vector_set (false, tmp, val, 0);
18737 /* target = A B X D */
18738 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18739 GEN_INT (0), GEN_INT (1),
18740 GEN_INT (0+4), GEN_INT (3+4)));
18744 /* tmp = target = A B C D */
18745 tmp = copy_to_reg (target);
18746 /* tmp = X B C D */
18747 ix86_expand_vector_set (false, tmp, val, 0);
18748 /* target = A B X D */
18749 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18750 GEN_INT (0), GEN_INT (1),
18751 GEN_INT (2+4), GEN_INT (0+4)));
18755 gcc_unreachable ();
18760 /* Element 0 handled by vec_merge below. */
18763 use_vec_merge = true;
18769 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18770 store into element 0, then shuffle them back. */
18774 order[0] = GEN_INT (elt);
18775 order[1] = const1_rtx;
18776 order[2] = const2_rtx;
18777 order[3] = GEN_INT (3);
18778 order[elt] = const0_rtx;
18780 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18781 order[1], order[2], order[3]));
18783 ix86_expand_vector_set (false, target, val, 0);
18785 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18786 order[1], order[2], order[3]));
18790 /* For SSE1, we have to reuse the V4SF code. */
18791 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18792 gen_lowpart (SFmode, val), elt);
18797 use_vec_merge = TARGET_SSE2;
18800 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18811 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18812 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18813 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18817 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18819 emit_move_insn (mem, target);
18821 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18822 emit_move_insn (tmp, val);
18824 emit_move_insn (target, mem);
18829 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18831 enum machine_mode mode = GET_MODE (vec);
18832 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18833 bool use_vec_extr = false;
18846 use_vec_extr = true;
18858 tmp = gen_reg_rtx (mode);
18859 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18860 GEN_INT (elt), GEN_INT (elt),
18861 GEN_INT (elt+4), GEN_INT (elt+4)));
18865 tmp = gen_reg_rtx (mode);
18866 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18870 gcc_unreachable ();
18873 use_vec_extr = true;
18888 tmp = gen_reg_rtx (mode);
18889 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18890 GEN_INT (elt), GEN_INT (elt),
18891 GEN_INT (elt), GEN_INT (elt)));
18895 tmp = gen_reg_rtx (mode);
18896 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18900 gcc_unreachable ();
18903 use_vec_extr = true;
18908 /* For SSE1, we have to reuse the V4SF code. */
18909 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18910 gen_lowpart (V4SFmode, vec), elt);
18916 use_vec_extr = TARGET_SSE2;
18919 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18924 /* ??? Could extract the appropriate HImode element and shift. */
18931 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18932 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18934 /* Let the rtl optimizers know about the zero extension performed. */
18935 if (inner_mode == HImode)
18937 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18938 target = gen_lowpart (SImode, target);
18941 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18945 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18947 emit_move_insn (mem, vec);
18949 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18950 emit_move_insn (target, tmp);
18954 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18955 pattern to reduce; DEST is the destination; IN is the input vector. */
18958 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18960 rtx tmp1, tmp2, tmp3;
18962 tmp1 = gen_reg_rtx (V4SFmode);
18963 tmp2 = gen_reg_rtx (V4SFmode);
18964 tmp3 = gen_reg_rtx (V4SFmode);
18966 emit_insn (gen_sse_movhlps (tmp1, in, in));
18967 emit_insn (fn (tmp2, tmp1, in));
18969 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18970 GEN_INT (1), GEN_INT (1),
18971 GEN_INT (1+4), GEN_INT (1+4)));
18972 emit_insn (fn (dest, tmp2, tmp3));
18975 /* Target hook for scalar_mode_supported_p. */
18977 ix86_scalar_mode_supported_p (enum machine_mode mode)
18979 if (DECIMAL_FLOAT_MODE_P (mode))
18982 return default_scalar_mode_supported_p (mode);
18985 /* Implements target hook vector_mode_supported_p. */
18987 ix86_vector_mode_supported_p (enum machine_mode mode)
18989 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18991 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18993 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18995 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
19000 /* Worker function for TARGET_MD_ASM_CLOBBERS.
19002 We do this in the new i386 backend to maintain source compatibility
19003 with the old cc0-based compiler. */
19006 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
19007 tree inputs ATTRIBUTE_UNUSED,
19010 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
19012 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
19014 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
19019 /* Return true if this goes in small data/bss. */
19022 ix86_in_large_data_p (tree exp)
19024 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
19027 /* Functions are never large data. */
19028 if (TREE_CODE (exp) == FUNCTION_DECL)
19031 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
19033 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
19034 if (strcmp (section, ".ldata") == 0
19035 || strcmp (section, ".lbss") == 0)
19041 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
19043 /* If this is an incomplete type with size 0, then we can't put it
19044 in data because it might be too big when completed. */
19045 if (!size || size > ix86_section_threshold)
19052 ix86_encode_section_info (tree decl, rtx rtl, int first)
19054 default_encode_section_info (decl, rtl, first);
19056 if (TREE_CODE (decl) == VAR_DECL
19057 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
19058 && ix86_in_large_data_p (decl))
19059 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
19062 /* Worker function for REVERSE_CONDITION. */
19065 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
19067 return (mode != CCFPmode && mode != CCFPUmode
19068 ? reverse_condition (code)
19069 : reverse_condition_maybe_unordered (code));
19072 /* Output code to perform an x87 FP register move, from OPERANDS[1]
19076 output_387_reg_move (rtx insn, rtx *operands)
19078 if (REG_P (operands[1])
19079 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
19081 if (REGNO (operands[0]) == FIRST_STACK_REG)
19082 return output_387_ffreep (operands, 0);
19083 return "fstp\t%y0";
19085 if (STACK_TOP_P (operands[0]))
19086 return "fld%z1\t%y1";
19090 /* Output code to perform a conditional jump to LABEL, if C2 flag in
19091 FP status register is set. */
19094 ix86_emit_fp_unordered_jump (rtx label)
19096 rtx reg = gen_reg_rtx (HImode);
19099 emit_insn (gen_x86_fnstsw_1 (reg));
19101 if (TARGET_USE_SAHF)
19103 emit_insn (gen_x86_sahf_1 (reg));
19105 temp = gen_rtx_REG (CCmode, FLAGS_REG);
19106 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
19110 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
19112 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19113 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
19116 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
19117 gen_rtx_LABEL_REF (VOIDmode, label),
19119 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
19120 emit_jump_insn (temp);
19123 /* Output code to perform a log1p XFmode calculation. */
19125 void ix86_emit_i387_log1p (rtx op0, rtx op1)
19127 rtx label1 = gen_label_rtx ();
19128 rtx label2 = gen_label_rtx ();
19130 rtx tmp = gen_reg_rtx (XFmode);
19131 rtx tmp2 = gen_reg_rtx (XFmode);
19133 emit_insn (gen_absxf2 (tmp, op1));
19134 emit_insn (gen_cmpxf (tmp,
19135 CONST_DOUBLE_FROM_REAL_VALUE (
19136 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
19138 emit_jump_insn (gen_bge (label1));
19140 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19141 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
19142 emit_jump (label2);
19144 emit_label (label1);
19145 emit_move_insn (tmp, CONST1_RTX (XFmode));
19146 emit_insn (gen_addxf3 (tmp, op1, tmp));
19147 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
19148 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
19150 emit_label (label2);
19153 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
19156 i386_solaris_elf_named_section (const char *name, unsigned int flags,
19159 /* With Binutils 2.15, the "@unwind" marker must be specified on
19160 every occurrence of the ".eh_frame" section, not just the first
19163 && strcmp (name, ".eh_frame") == 0)
19165 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
19166 flags & SECTION_WRITE ? "aw" : "a");
19169 default_elf_asm_named_section (name, flags, decl);
19172 /* Return the mangling of TYPE if it is an extended fundamental type. */
19174 static const char *
19175 ix86_mangle_fundamental_type (tree type)
19177 switch (TYPE_MODE (type))
19180 /* __float128 is "g". */
19183 /* "long double" or __float80 is "e". */
19190 /* For 32-bit code we can save PIC register setup by using
19191 __stack_chk_fail_local hidden function instead of calling
19192 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
19193 register, so it is better to call __stack_chk_fail directly. */
19196 ix86_stack_protect_fail (void)
19198 return TARGET_64BIT
19199 ? default_external_stack_protect_fail ()
19200 : default_hidden_stack_protect_fail ();
19203 /* Select a format to encode pointers in exception handling data. CODE
19204 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
19205 true if the symbol may be affected by dynamic relocations.
19207 ??? All x86 object file formats are capable of representing this.
19208 After all, the relocation needed is the same as for the call insn.
19209 Whether or not a particular assembler allows us to enter such, I
19210 guess we'll have to see. */
19212 asm_preferred_eh_data_format (int code, int global)
19216 int type = DW_EH_PE_sdata8;
19218 || ix86_cmodel == CM_SMALL_PIC
19219 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
19220 type = DW_EH_PE_sdata4;
19221 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
19223 if (ix86_cmodel == CM_SMALL
19224 || (ix86_cmodel == CM_MEDIUM && code))
19225 return DW_EH_PE_udata4;
19226 return DW_EH_PE_absptr;
19229 /* Expand copysign from SIGN to the positive value ABS_VALUE
19230 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
19233 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
19235 enum machine_mode mode = GET_MODE (sign);
19236 rtx sgn = gen_reg_rtx (mode);
19237 if (mask == NULL_RTX)
19239 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
19240 if (!VECTOR_MODE_P (mode))
19242 /* We need to generate a scalar mode mask in this case. */
19243 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19244 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19245 mask = gen_reg_rtx (mode);
19246 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19250 mask = gen_rtx_NOT (mode, mask);
19251 emit_insn (gen_rtx_SET (VOIDmode, sgn,
19252 gen_rtx_AND (mode, mask, sign)));
19253 emit_insn (gen_rtx_SET (VOIDmode, result,
19254 gen_rtx_IOR (mode, abs_value, sgn)));
19257 /* Expand fabs (OP0) and return a new rtx that holds the result. The
19258 mask for masking out the sign-bit is stored in *SMASK, if that is
19261 ix86_expand_sse_fabs (rtx op0, rtx *smask)
19263 enum machine_mode mode = GET_MODE (op0);
19266 xa = gen_reg_rtx (mode);
19267 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
19268 if (!VECTOR_MODE_P (mode))
19270 /* We need to generate a scalar mode mask in this case. */
19271 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
19272 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
19273 mask = gen_reg_rtx (mode);
19274 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
19276 emit_insn (gen_rtx_SET (VOIDmode, xa,
19277 gen_rtx_AND (mode, op0, mask)));
19285 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
19286 swapping the operands if SWAP_OPERANDS is true. The expanded
19287 code is a forward jump to a newly created label in case the
19288 comparison is true. The generated label rtx is returned. */
19290 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
19291 bool swap_operands)
19302 label = gen_label_rtx ();
19303 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
19304 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19305 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
19306 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
19307 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19308 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
19309 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19310 JUMP_LABEL (tmp) = label;
19315 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
19316 using comparison code CODE. Operands are swapped for the comparison if
19317 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
19319 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
19320 bool swap_operands)
19322 enum machine_mode mode = GET_MODE (op0);
19323 rtx mask = gen_reg_rtx (mode);
19332 if (mode == DFmode)
19333 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
19334 gen_rtx_fmt_ee (code, mode, op0, op1)));
19336 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
19337 gen_rtx_fmt_ee (code, mode, op0, op1)));
19342 /* Generate and return a rtx of mode MODE for 2**n where n is the number
19343 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
19345 ix86_gen_TWO52 (enum machine_mode mode)
19347 REAL_VALUE_TYPE TWO52r;
19350 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
19351 TWO52 = const_double_from_real_value (TWO52r, mode);
19352 TWO52 = force_reg (mode, TWO52);
19357 /* Expand SSE sequence for computing lround from OP1 storing
19360 ix86_expand_lround (rtx op0, rtx op1)
19362 /* C code for the stuff we're doing below:
19363 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
19366 enum machine_mode mode = GET_MODE (op1);
19367 const struct real_format *fmt;
19368 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19371 /* load nextafter (0.5, 0.0) */
19372 fmt = REAL_MODE_FORMAT (mode);
19373 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19374 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19376 /* adj = copysign (0.5, op1) */
19377 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
19378 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
19380 /* adj = op1 + adj */
19381 expand_simple_binop (mode, PLUS, adj, op1, adj, 0, OPTAB_DIRECT);
19383 /* op0 = (imode)adj */
19384 expand_fix (op0, adj, 0);
19387 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
19390 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
19392 /* C code for the stuff we're doing below (for do_floor):
19394 xi -= (double)xi > op1 ? 1 : 0;
19397 enum machine_mode fmode = GET_MODE (op1);
19398 enum machine_mode imode = GET_MODE (op0);
19399 rtx ireg, freg, label;
19401 /* reg = (long)op1 */
19402 ireg = gen_reg_rtx (imode);
19403 expand_fix (ireg, op1, 0);
19405 /* freg = (double)reg */
19406 freg = gen_reg_rtx (fmode);
19407 expand_float (freg, ireg, 0);
19409 /* ireg = (freg > op1) ? ireg - 1 : ireg */
19410 label = ix86_expand_sse_compare_and_jump (UNLE,
19411 freg, op1, !do_floor);
19412 expand_simple_binop (imode, do_floor ? MINUS : PLUS,
19413 ireg, const1_rtx, ireg, 0, OPTAB_DIRECT);
19414 emit_label (label);
19415 LABEL_NUSES (label) = 1;
19417 emit_move_insn (op0, ireg);
19420 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
19421 result in OPERAND0. */
19423 ix86_expand_rint (rtx operand0, rtx operand1)
19425 /* C code for the stuff we're doing below:
19426 if (!isless (fabs (operand1), 2**52))
19428 tmp = copysign (2**52, operand1);
19429 return operand1 + tmp - tmp;
19431 enum machine_mode mode = GET_MODE (operand0);
19432 rtx res, xa, label, TWO52, mask;
19434 res = gen_reg_rtx (mode);
19435 emit_move_insn (res, operand1);
19437 /* xa = abs (operand1) */
19438 xa = ix86_expand_sse_fabs (res, &mask);
19440 /* if (!isless (xa, TWO52)) goto label; */
19441 TWO52 = ix86_gen_TWO52 (mode);
19442 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19444 ix86_sse_copysign_to_positive (TWO52, TWO52, res, mask);
19446 expand_simple_binop (mode, PLUS, res, TWO52, res, 0, OPTAB_DIRECT);
19447 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19449 emit_label (label);
19450 LABEL_NUSES (label) = 1;
19452 emit_move_insn (operand0, res);
19455 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19458 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
19460 /* C code for the stuff we expand below.
19461 double xa = fabs (x), x2;
19462 if (!isless (xa, TWO52))
19464 xa = xa + TWO52 - TWO52;
19465 x2 = copysign (xa, x);
19474 enum machine_mode mode = GET_MODE (operand0);
19475 rtx xa, TWO52, tmp, label, one, res, mask;
19477 TWO52 = ix86_gen_TWO52 (mode);
19479 /* Temporary for holding the result, initialized to the input
19480 operand to ease control flow. */
19481 res = gen_reg_rtx (mode);
19482 emit_move_insn (res, operand1);
19484 /* xa = abs (operand1) */
19485 xa = ix86_expand_sse_fabs (res, &mask);
19487 /* if (!isless (xa, TWO52)) goto label; */
19488 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19490 /* xa = xa + TWO52 - TWO52; */
19491 expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19492 expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
19494 /* xa = copysign (xa, operand1) */
19495 ix86_sse_copysign_to_positive (xa, xa, res, mask);
19498 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19500 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19501 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19502 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19503 gen_rtx_AND (mode, one, tmp)));
19504 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19505 xa, tmp, res, 0, OPTAB_DIRECT);
19507 emit_label (label);
19508 LABEL_NUSES (label) = 1;
19510 emit_move_insn (operand0, res);
19513 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
19516 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
19518 /* C code for the stuff we expand below.
19519 double xa = fabs (x), x2;
19520 if (!isless (xa, TWO52))
19522 x2 = (double)(long)x;
19531 enum machine_mode mode = GET_MODE (operand0);
19532 rtx xa, xi, TWO52, tmp, label, one, res;
19534 TWO52 = ix86_gen_TWO52 (mode);
19536 /* Temporary for holding the result, initialized to the input
19537 operand to ease control flow. */
19538 res = gen_reg_rtx (mode);
19539 emit_move_insn (res, operand1);
19541 /* xa = abs (operand1) */
19542 xa = ix86_expand_sse_fabs (res, NULL);
19544 /* if (!isless (xa, TWO52)) goto label; */
19545 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19547 /* xa = (double)(long)x */
19548 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19549 expand_fix (xi, res, 0);
19550 expand_float (xa, xi, 0);
19553 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19555 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
19556 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
19557 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19558 gen_rtx_AND (mode, one, tmp)));
19559 expand_simple_binop (mode, do_floor ? MINUS : PLUS,
19560 xa, tmp, res, 0, OPTAB_DIRECT);
19562 emit_label (label);
19563 LABEL_NUSES (label) = 1;
19565 emit_move_insn (operand0, res);
19568 /* Expand SSE sequence for computing round from OPERAND1 storing
19569 into OPERAND0. Sequence that works without relying on DImode truncation
19570 via cvttsd2siq that is only available on 64bit targets. */
19572 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
19574 /* C code for the stuff we expand below.
19575 double xa = fabs (x), xa2, x2;
19576 if (!isless (xa, TWO52))
19578 Using the absolute value and copying back sign makes
19579 -0.0 -> -0.0 correct.
19580 xa2 = xa + TWO52 - TWO52;
19585 else if (dxa > 0.5)
19587 x2 = copysign (xa2, x);
19590 enum machine_mode mode = GET_MODE (operand0);
19591 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
19593 TWO52 = ix86_gen_TWO52 (mode);
19595 /* Temporary for holding the result, initialized to the input
19596 operand to ease control flow. */
19597 res = gen_reg_rtx (mode);
19598 emit_move_insn (res, operand1);
19600 /* xa = abs (operand1) */
19601 xa = ix86_expand_sse_fabs (res, &mask);
19603 /* if (!isless (xa, TWO52)) goto label; */
19604 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19606 /* xa2 = xa + TWO52 - TWO52; */
19607 xa2 = gen_reg_rtx (mode);
19608 expand_simple_binop (mode, PLUS, xa, TWO52, xa2, 0, OPTAB_DIRECT);
19609 expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
19611 /* dxa = xa2 - xa; */
19612 dxa = gen_reg_rtx (mode);
19613 expand_simple_binop (mode, MINUS, xa2, xa, dxa, 0, OPTAB_DIRECT);
19615 /* generate 0.5, 1.0 and -0.5 */
19616 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
19617 one = gen_reg_rtx (mode);
19618 expand_simple_binop (mode, PLUS, half, half, one, 0, OPTAB_DIRECT);
19619 mhalf = gen_reg_rtx (mode);
19620 expand_simple_binop (mode, MINUS, half, one, mhalf, 0, OPTAB_DIRECT);
19623 tmp = gen_reg_rtx (mode);
19624 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
19625 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
19626 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19627 gen_rtx_AND (mode, one, tmp)));
19628 expand_simple_binop (mode, MINUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19629 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
19630 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
19631 emit_insn (gen_rtx_SET (VOIDmode, tmp,
19632 gen_rtx_AND (mode, one, tmp)));
19633 expand_simple_binop (mode, PLUS, xa2, tmp, xa2, 0, OPTAB_DIRECT);
19635 /* res = copysign (xa2, operand1) */
19636 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
19638 emit_label (label);
19639 LABEL_NUSES (label) = 1;
19641 emit_move_insn (operand0, res);
19644 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19647 ix86_expand_trunc (rtx operand0, rtx operand1)
19649 /* C code for SSE variant we expand below.
19650 double xa = fabs (x), x2;
19651 if (!isless (xa, TWO52))
19653 return (double)(long)x;
19655 enum machine_mode mode = GET_MODE (operand0);
19656 rtx xa, xi, TWO52, label, res;
19658 TWO52 = ix86_gen_TWO52 (mode);
19660 /* Temporary for holding the result, initialized to the input
19661 operand to ease control flow. */
19662 res = gen_reg_rtx (mode);
19663 emit_move_insn (res, operand1);
19665 /* xa = abs (operand1) */
19666 xa = ix86_expand_sse_fabs (res, NULL);
19668 /* if (!isless (xa, TWO52)) goto label; */
19669 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19671 /* x = (double)(long)x */
19672 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19673 expand_fix (xi, res, 0);
19674 expand_float (res, xi, 0);
19676 emit_label (label);
19677 LABEL_NUSES (label) = 1;
19679 emit_move_insn (operand0, res);
19682 /* Expand SSE sequence for computing trunc from OPERAND1 storing
19685 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
19687 enum machine_mode mode = GET_MODE (operand0);
19688 rtx xa, mask, TWO52, label, one, res, smask;
19690 /* C code for SSE variant we expand below.
19691 double xa = fabs (x), x2;
19692 if (!isless (xa, TWO52))
19694 xa2 = xa + TWO52 - TWO52;
19698 x2 = copysign (xa2, x);
19702 TWO52 = ix86_gen_TWO52 (mode);
19704 /* Temporary for holding the result, initialized to the input
19705 operand to ease control flow. */
19706 res = gen_reg_rtx (mode);
19707 emit_move_insn (res, operand1);
19709 /* xa = abs (operand1) */
19710 xa = ix86_expand_sse_fabs (res, &smask);
19712 /* if (!isless (xa, TWO52)) goto label; */
19713 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19715 /* res = xa + TWO52 - TWO52; */
19716 expand_simple_binop (mode, PLUS, xa, TWO52, res, 0, OPTAB_DIRECT);
19717 expand_simple_binop (mode, MINUS, res, TWO52, res, 0, OPTAB_DIRECT);
19720 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
19722 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
19723 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
19724 emit_insn (gen_rtx_SET (VOIDmode, mask,
19725 gen_rtx_AND (mode, mask, one)));
19726 expand_simple_binop (mode, MINUS,
19727 res, mask, res, 0, OPTAB_DIRECT);
19729 /* res = copysign (res, operand1) */
19730 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
19732 emit_label (label);
19733 LABEL_NUSES (label) = 1;
19735 emit_move_insn (operand0, res);
19738 /* Expand SSE sequence for computing round from OPERAND1 storing
19741 ix86_expand_round (rtx operand0, rtx operand1)
19743 /* C code for the stuff we're doing below:
19744 double xa = fabs (x);
19745 if (!isless (xa, TWO52))
19747 xa = (double)(long)(xa + nextafter (0.5, 0.0));
19748 return copysign (xa, x);
19750 enum machine_mode mode = GET_MODE (operand0);
19751 rtx res, TWO52, xa, label, xi, half, mask;
19752 const struct real_format *fmt;
19753 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19755 /* Temporary for holding the result, initialized to the input
19756 operand to ease control flow. */
19757 res = gen_reg_rtx (mode);
19758 emit_move_insn (res, operand1);
19760 TWO52 = ix86_gen_TWO52 (mode);
19761 xa = ix86_expand_sse_fabs (res, &mask);
19762 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
19764 /* load nextafter (0.5, 0.0) */
19765 fmt = REAL_MODE_FORMAT (mode);
19766 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
19767 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
19769 /* xa = xa + 0.5 */
19770 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
19771 expand_simple_binop (mode, PLUS, xa, half, xa, 0, OPTAB_DIRECT);
19773 /* xa = (double)(int64_t)xa */
19774 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
19775 expand_fix (xi, xa, 0);
19776 expand_float (xa, xi, 0);
19778 /* res = copysign (xa, operand1) */
19779 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
19781 emit_label (label);
19782 LABEL_NUSES (label) = 1;
19784 emit_move_insn (operand0, res);
19787 #include "gt-i386.h"