1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1227 /* X86_TUNE_USE_BIT_TEST */
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1288 /* X86_TUNE_READ_MODIFY */
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1376 /* X86_TUNE_USE_FFREEP */
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1440 /* Feature tests against the various architecture variations. */
1441 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1442 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1443 ~(m_386 | m_486 | m_PENT | m_K6),
1445 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1448 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1451 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1454 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1458 static const unsigned int x86_accumulate_outgoing_args
1459 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1461 static const unsigned int x86_arch_always_fancy_math_387
1462 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1463 | m_NOCONA | m_CORE2 | m_GENERIC;
1465 static enum stringop_alg stringop_alg = no_stringop;
1467 /* In case the average insn count for single function invocation is
1468 lower than this constant, emit fast (but longer) prologue and
1470 #define FAST_PROLOGUE_INSN_COUNT 20
1472 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1473 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1474 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1475 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1477 /* Array of the smallest class containing reg number REGNO, indexed by
1478 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1480 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1482 /* ax, dx, cx, bx */
1483 AREG, DREG, CREG, BREG,
1484 /* si, di, bp, sp */
1485 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1487 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1488 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1491 /* flags, fpsr, fpcr, frame */
1492 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1494 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1497 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1502 /* SSE REX registers */
1503 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1507 /* The "default" register map used in 32bit mode. */
1509 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1511 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1512 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1513 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1514 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1515 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1517 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1520 static int const x86_64_int_parameter_registers[6] =
1522 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1523 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1526 static int const x86_64_ms_abi_int_parameter_registers[4] =
1528 2 /*RCX*/, 1 /*RDX*/,
1529 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1532 static int const x86_64_int_return_registers[4] =
1534 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1537 /* The "default" register map used in 64bit mode. */
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1614 /* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1621 /* Size of the register save area. */
1622 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1624 /* Define the structure for the machine field in struct function. */
1626 struct stack_local_entry GTY(())
1628 unsigned short mode;
1631 struct stack_local_entry *next;
1634 /* Structure describing stack frame layout.
1635 Stack grows downward:
1641 saved frame pointer if frame_pointer_needed
1642 <- HARD_FRAME_POINTER
1647 [va_arg registers] (
1648 > to_allocate <- FRAME_POINTER
1658 HOST_WIDE_INT frame;
1660 int outgoing_arguments_size;
1663 HOST_WIDE_INT to_allocate;
1664 /* The offsets relative to ARG_POINTER. */
1665 HOST_WIDE_INT frame_pointer_offset;
1666 HOST_WIDE_INT hard_frame_pointer_offset;
1667 HOST_WIDE_INT stack_pointer_offset;
1669 /* When save_regs_using_mov is set, emit prologue using
1670 move instead of push instructions. */
1671 bool save_regs_using_mov;
1674 /* Code model option. */
1675 enum cmodel ix86_cmodel;
1677 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1679 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1681 /* Which unit we are generating floating point math for. */
1682 enum fpmath_unit ix86_fpmath;
1684 /* Which cpu are we scheduling for. */
1685 enum processor_type ix86_tune;
1687 /* Which instruction set architecture to use. */
1688 enum processor_type ix86_arch;
1690 /* true if sse prefetch instruction is not NOOP. */
1691 int x86_prefetch_sse;
1693 /* ix86_regparm_string as a number */
1694 static int ix86_regparm;
1696 /* -mstackrealign option */
1697 extern int ix86_force_align_arg_pointer;
1698 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1700 /* Preferred alignment for stack boundary in bits. */
1701 unsigned int ix86_preferred_stack_boundary;
1703 /* Values 1-5: see jump.c */
1704 int ix86_branch_cost;
1706 /* Variables which are this size or smaller are put in the data/bss
1707 or ldata/lbss sections. */
1709 int ix86_section_threshold = 65536;
1711 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1712 char internal_label_prefix[16];
1713 int internal_label_prefix_len;
1715 /* Fence to use after loop using movnt. */
1718 /* Register class used for passing given 64bit part of the argument.
1719 These represent classes as documented by the PS ABI, with the exception
1720 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1721 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1723 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1724 whenever possible (upper half does contain padding). */
1725 enum x86_64_reg_class
1728 X86_64_INTEGER_CLASS,
1729 X86_64_INTEGERSI_CLASS,
1736 X86_64_COMPLEX_X87_CLASS,
1739 static const char * const x86_64_reg_class_name[] =
1741 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1742 "sseup", "x87", "x87up", "cplx87", "no"
1745 #define MAX_CLASSES 4
1747 /* Table of constants used by fldpi, fldln2, etc.... */
1748 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1749 static bool ext_80387_constants_init = 0;
1752 static struct machine_function * ix86_init_machine_status (void);
1753 static rtx ix86_function_value (const_tree, const_tree, bool);
1754 static int ix86_function_regparm (const_tree, const_tree);
1755 static void ix86_compute_frame_layout (struct ix86_frame *);
1756 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1760 /* The svr4 ABI for the i386 says that records and unions are returned
1762 #ifndef DEFAULT_PCC_STRUCT_RETURN
1763 #define DEFAULT_PCC_STRUCT_RETURN 1
1766 /* Bit flags that specify the ISA we are compiling for. */
1767 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1769 /* A mask of ix86_isa_flags that includes bit X if X
1770 was set or cleared on the command line. */
1771 static int ix86_isa_flags_explicit;
1773 /* Define a set of ISAs which are available when a given ISA is
1774 enabled. MMX and SSE ISAs are handled separately. */
1776 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1777 #define OPTION_MASK_ISA_3DNOW_SET \
1778 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1780 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1781 #define OPTION_MASK_ISA_SSE2_SET \
1782 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1783 #define OPTION_MASK_ISA_SSE3_SET \
1784 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1785 #define OPTION_MASK_ISA_SSSE3_SET \
1786 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1787 #define OPTION_MASK_ISA_SSE4_1_SET \
1788 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1789 #define OPTION_MASK_ISA_SSE4_2_SET \
1790 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1792 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1794 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1796 #define OPTION_MASK_ISA_SSE4A_SET \
1797 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1798 #define OPTION_MASK_ISA_SSE5_SET \
1799 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1801 /* Define a set of ISAs which aren't available when a given ISA is
1802 disabled. MMX and SSE ISAs are handled separately. */
1804 #define OPTION_MASK_ISA_MMX_UNSET \
1805 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1806 #define OPTION_MASK_ISA_3DNOW_UNSET \
1807 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1808 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1810 #define OPTION_MASK_ISA_SSE_UNSET \
1811 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1812 #define OPTION_MASK_ISA_SSE2_UNSET \
1813 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1814 #define OPTION_MASK_ISA_SSE3_UNSET \
1815 (OPTION_MASK_ISA_SSE3 \
1816 | OPTION_MASK_ISA_SSSE3_UNSET \
1817 | OPTION_MASK_ISA_SSE4A_UNSET )
1818 #define OPTION_MASK_ISA_SSSE3_UNSET \
1819 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1820 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1821 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1822 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1824 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1826 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1828 #define OPTION_MASK_ISA_SSE4A_UNSET \
1829 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1831 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1833 /* Vectorization library interface and handlers. */
1834 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1835 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1836 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1838 /* Implement TARGET_HANDLE_OPTION. */
1841 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1848 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1853 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1861 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1877 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1882 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1883 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1953 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1958 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1965 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1966 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1970 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1971 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1993 /* Sometimes certain combinations of command options do not make
1994 sense on a particular target machine. You can define a macro
1995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1996 defined, is executed once just after all the command options have
1999 Don't use this macro to turn on various extra optimizations for
2000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2003 override_options (void)
2006 int ix86_tune_defaulted = 0;
2007 int ix86_arch_specified = 0;
2008 unsigned int ix86_arch_mask, ix86_tune_mask;
2010 /* Comes from final.c -- no real reason to change it. */
2011 #define MAX_CODE_ALIGN 16
2015 const struct processor_costs *cost; /* Processor costs */
2016 const int align_loop; /* Default alignments. */
2017 const int align_loop_max_skip;
2018 const int align_jump;
2019 const int align_jump_max_skip;
2020 const int align_func;
2022 const processor_target_table[PROCESSOR_max] =
2024 {&i386_cost, 4, 3, 4, 3, 4},
2025 {&i486_cost, 16, 15, 16, 15, 16},
2026 {&pentium_cost, 16, 7, 16, 7, 16},
2027 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2028 {&geode_cost, 0, 0, 0, 0, 0},
2029 {&k6_cost, 32, 7, 32, 7, 32},
2030 {&athlon_cost, 16, 7, 16, 7, 16},
2031 {&pentium4_cost, 0, 0, 0, 0, 0},
2032 {&k8_cost, 16, 7, 16, 7, 16},
2033 {&nocona_cost, 0, 0, 0, 0, 0},
2034 {&core2_cost, 16, 10, 16, 10, 16},
2035 {&generic32_cost, 16, 7, 16, 7, 16},
2036 {&generic64_cost, 16, 10, 16, 10, 16},
2037 {&amdfam10_cost, 32, 24, 32, 7, 32}
2040 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2071 PTA_PREFETCH_SSE = 1 << 4,
2073 PTA_3DNOW_A = 1 << 6,
2077 PTA_POPCNT = 1 << 10,
2079 PTA_SSE4A = 1 << 12,
2080 PTA_NO_SAHF = 1 << 13,
2081 PTA_SSE4_1 = 1 << 14,
2082 PTA_SSE4_2 = 1 << 15,
2085 PTA_PCLMUL = 1 << 18
2090 const char *const name; /* processor name or nickname. */
2091 const enum processor_type processor;
2092 const unsigned /*enum pta_flags*/ flags;
2094 const processor_alias_table[] =
2096 {"i386", PROCESSOR_I386, 0},
2097 {"i486", PROCESSOR_I486, 0},
2098 {"i586", PROCESSOR_PENTIUM, 0},
2099 {"pentium", PROCESSOR_PENTIUM, 0},
2100 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2101 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2102 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2104 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2105 {"i686", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2107 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2108 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2110 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2111 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2112 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2113 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2114 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2115 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116 | PTA_CX16 | PTA_NO_SAHF)},
2117 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2118 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2121 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122 |PTA_PREFETCH_SSE)},
2123 {"k6", PROCESSOR_K6, PTA_MMX},
2124 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2126 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127 | PTA_PREFETCH_SSE)},
2128 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129 | PTA_PREFETCH_SSE)},
2130 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2132 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2134 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2136 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2137 | PTA_MMX | PTA_SSE | PTA_SSE2
2139 {"k8", PROCESSOR_K8, (PTA_64BIT
2140 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2141 | PTA_SSE | PTA_SSE2
2143 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2144 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2147 {"opteron", PROCESSOR_K8, (PTA_64BIT
2148 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 | PTA_SSE | PTA_SSE2
2151 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2152 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2155 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2156 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2157 | PTA_SSE | PTA_SSE2
2159 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2160 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2161 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2163 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2164 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2165 | PTA_SSE | PTA_SSE2
2167 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2168 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2171 | PTA_CX16 | PTA_ABM)},
2172 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2176 | PTA_CX16 | PTA_ABM)},
2177 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2178 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2181 int const pta_size = ARRAY_SIZE (processor_alias_table);
2183 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2184 SUBTARGET_OVERRIDE_OPTIONS;
2187 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2188 SUBSUBTARGET_OVERRIDE_OPTIONS;
2191 /* -fPIC is the default for x86_64. */
2192 if (TARGET_MACHO && TARGET_64BIT)
2195 /* Set the default values for switches whose default depends on TARGET_64BIT
2196 in case they weren't overwritten by command line options. */
2199 /* Mach-O doesn't support omitting the frame pointer for now. */
2200 if (flag_omit_frame_pointer == 2)
2201 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2202 if (flag_asynchronous_unwind_tables == 2)
2203 flag_asynchronous_unwind_tables = 1;
2204 if (flag_pcc_struct_return == 2)
2205 flag_pcc_struct_return = 0;
2209 if (flag_omit_frame_pointer == 2)
2210 flag_omit_frame_pointer = 0;
2211 if (flag_asynchronous_unwind_tables == 2)
2212 flag_asynchronous_unwind_tables = 0;
2213 if (flag_pcc_struct_return == 2)
2214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2217 /* Need to check -mtune=generic first. */
2218 if (ix86_tune_string)
2220 if (!strcmp (ix86_tune_string, "generic")
2221 || !strcmp (ix86_tune_string, "i686")
2222 /* As special support for cross compilers we read -mtune=native
2223 as -mtune=generic. With native compilers we won't see the
2224 -mtune=native, as it was changed by the driver. */
2225 || !strcmp (ix86_tune_string, "native"))
2228 ix86_tune_string = "generic64";
2230 ix86_tune_string = "generic32";
2232 else if (!strncmp (ix86_tune_string, "generic", 7))
2233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2237 if (ix86_arch_string)
2238 ix86_tune_string = ix86_arch_string;
2239 if (!ix86_tune_string)
2241 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2242 ix86_tune_defaulted = 1;
2245 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2246 need to use a sensible tune option. */
2247 if (!strcmp (ix86_tune_string, "generic")
2248 || !strcmp (ix86_tune_string, "x86-64")
2249 || !strcmp (ix86_tune_string, "i686"))
2252 ix86_tune_string = "generic64";
2254 ix86_tune_string = "generic32";
2257 if (ix86_stringop_string)
2259 if (!strcmp (ix86_stringop_string, "rep_byte"))
2260 stringop_alg = rep_prefix_1_byte;
2261 else if (!strcmp (ix86_stringop_string, "libcall"))
2262 stringop_alg = libcall;
2263 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2264 stringop_alg = rep_prefix_4_byte;
2265 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2266 stringop_alg = rep_prefix_8_byte;
2267 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2268 stringop_alg = loop_1_byte;
2269 else if (!strcmp (ix86_stringop_string, "loop"))
2270 stringop_alg = loop;
2271 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2272 stringop_alg = unrolled_loop;
2274 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2276 if (!strcmp (ix86_tune_string, "x86-64"))
2277 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2278 "-mtune=generic instead as appropriate.");
2280 if (!ix86_arch_string)
2281 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2283 ix86_arch_specified = 1;
2285 if (!strcmp (ix86_arch_string, "generic"))
2286 error ("generic CPU can be used only for -mtune= switch");
2287 if (!strncmp (ix86_arch_string, "generic", 7))
2288 error ("bad value (%s) for -march= switch", ix86_arch_string);
2290 if (ix86_cmodel_string != 0)
2292 if (!strcmp (ix86_cmodel_string, "small"))
2293 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2294 else if (!strcmp (ix86_cmodel_string, "medium"))
2295 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2296 else if (!strcmp (ix86_cmodel_string, "large"))
2297 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2299 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2300 else if (!strcmp (ix86_cmodel_string, "32"))
2301 ix86_cmodel = CM_32;
2302 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2303 ix86_cmodel = CM_KERNEL;
2305 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2309 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2310 use of rip-relative addressing. This eliminates fixups that
2311 would otherwise be needed if this object is to be placed in a
2312 DLL, and is essentially just as efficient as direct addressing. */
2313 if (TARGET_64BIT_MS_ABI)
2314 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2315 else if (TARGET_64BIT)
2316 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2318 ix86_cmodel = CM_32;
2320 if (ix86_asm_string != 0)
2323 && !strcmp (ix86_asm_string, "intel"))
2324 ix86_asm_dialect = ASM_INTEL;
2325 else if (!strcmp (ix86_asm_string, "att"))
2326 ix86_asm_dialect = ASM_ATT;
2328 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2330 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2331 error ("code model %qs not supported in the %s bit mode",
2332 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2333 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2334 sorry ("%i-bit mode not compiled in",
2335 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2337 for (i = 0; i < pta_size; i++)
2338 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2340 ix86_arch = processor_alias_table[i].processor;
2341 /* Default cpu tuning to the architecture. */
2342 ix86_tune = ix86_arch;
2344 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2345 error ("CPU you selected does not support x86-64 "
2348 if (processor_alias_table[i].flags & PTA_MMX
2349 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2350 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2351 if (processor_alias_table[i].flags & PTA_3DNOW
2352 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2353 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2354 if (processor_alias_table[i].flags & PTA_3DNOW_A
2355 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2356 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2357 if (processor_alias_table[i].flags & PTA_SSE
2358 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2360 if (processor_alias_table[i].flags & PTA_SSE2
2361 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2362 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2363 if (processor_alias_table[i].flags & PTA_SSE3
2364 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2366 if (processor_alias_table[i].flags & PTA_SSSE3
2367 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2368 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2369 if (processor_alias_table[i].flags & PTA_SSE4_1
2370 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2372 if (processor_alias_table[i].flags & PTA_SSE4_2
2373 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2374 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2375 if (processor_alias_table[i].flags & PTA_SSE4A
2376 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2377 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2378 if (processor_alias_table[i].flags & PTA_SSE5
2379 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2380 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2382 if (processor_alias_table[i].flags & PTA_ABM)
2384 if (processor_alias_table[i].flags & PTA_CX16)
2385 x86_cmpxchg16b = true;
2386 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2388 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2389 x86_prefetch_sse = true;
2390 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2392 if (processor_alias_table[i].flags & PTA_AES)
2394 if (processor_alias_table[i].flags & PTA_PCLMUL)
2401 error ("bad value (%s) for -march= switch", ix86_arch_string);
2403 ix86_arch_mask = 1u << ix86_arch;
2404 for (i = 0; i < X86_ARCH_LAST; ++i)
2405 ix86_arch_features[i] &= ix86_arch_mask;
2407 for (i = 0; i < pta_size; i++)
2408 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2410 ix86_tune = processor_alias_table[i].processor;
2411 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2413 if (ix86_tune_defaulted)
2415 ix86_tune_string = "x86-64";
2416 for (i = 0; i < pta_size; i++)
2417 if (! strcmp (ix86_tune_string,
2418 processor_alias_table[i].name))
2420 ix86_tune = processor_alias_table[i].processor;
2423 error ("CPU you selected does not support x86-64 "
2426 /* Intel CPUs have always interpreted SSE prefetch instructions as
2427 NOPs; so, we can enable SSE prefetch instructions even when
2428 -mtune (rather than -march) points us to a processor that has them.
2429 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2430 higher processors. */
2432 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2433 x86_prefetch_sse = true;
2437 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2439 /* Enable SSE2 if AES or PCLMUL is enabled. */
2440 if ((x86_aes || x86_pclmul)
2441 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2443 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2447 ix86_tune_mask = 1u << ix86_tune;
2448 for (i = 0; i < X86_TUNE_LAST; ++i)
2449 ix86_tune_features[i] &= ix86_tune_mask;
2452 ix86_cost = &size_cost;
2454 ix86_cost = processor_target_table[ix86_tune].cost;
2456 /* Arrange to set up i386_stack_locals for all functions. */
2457 init_machine_status = ix86_init_machine_status;
2459 /* Validate -mregparm= value. */
2460 if (ix86_regparm_string)
2463 warning (0, "-mregparm is ignored in 64-bit mode");
2464 i = atoi (ix86_regparm_string);
2465 if (i < 0 || i > REGPARM_MAX)
2466 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2471 ix86_regparm = REGPARM_MAX;
2473 /* If the user has provided any of the -malign-* options,
2474 warn and use that value only if -falign-* is not set.
2475 Remove this code in GCC 3.2 or later. */
2476 if (ix86_align_loops_string)
2478 warning (0, "-malign-loops is obsolete, use -falign-loops");
2479 if (align_loops == 0)
2481 i = atoi (ix86_align_loops_string);
2482 if (i < 0 || i > MAX_CODE_ALIGN)
2483 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2485 align_loops = 1 << i;
2489 if (ix86_align_jumps_string)
2491 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2492 if (align_jumps == 0)
2494 i = atoi (ix86_align_jumps_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2498 align_jumps = 1 << i;
2502 if (ix86_align_funcs_string)
2504 warning (0, "-malign-functions is obsolete, use -falign-functions");
2505 if (align_functions == 0)
2507 i = atoi (ix86_align_funcs_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2511 align_functions = 1 << i;
2515 /* Default align_* from the processor table. */
2516 if (align_loops == 0)
2518 align_loops = processor_target_table[ix86_tune].align_loop;
2519 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2521 if (align_jumps == 0)
2523 align_jumps = processor_target_table[ix86_tune].align_jump;
2524 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2526 if (align_functions == 0)
2528 align_functions = processor_target_table[ix86_tune].align_func;
2531 /* Validate -mbranch-cost= value, or provide default. */
2532 ix86_branch_cost = ix86_cost->branch_cost;
2533 if (ix86_branch_cost_string)
2535 i = atoi (ix86_branch_cost_string);
2537 error ("-mbranch-cost=%d is not between 0 and 5", i);
2539 ix86_branch_cost = i;
2541 if (ix86_section_threshold_string)
2543 i = atoi (ix86_section_threshold_string);
2545 error ("-mlarge-data-threshold=%d is negative", i);
2547 ix86_section_threshold = i;
2550 if (ix86_tls_dialect_string)
2552 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2553 ix86_tls_dialect = TLS_DIALECT_GNU;
2554 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2555 ix86_tls_dialect = TLS_DIALECT_GNU2;
2556 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2557 ix86_tls_dialect = TLS_DIALECT_SUN;
2559 error ("bad value (%s) for -mtls-dialect= switch",
2560 ix86_tls_dialect_string);
2563 if (ix87_precision_string)
2565 i = atoi (ix87_precision_string);
2566 if (i != 32 && i != 64 && i != 80)
2567 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2572 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2574 /* Enable by default the SSE and MMX builtins. Do allow the user to
2575 explicitly disable any of these. In particular, disabling SSE and
2576 MMX for kernel code is extremely useful. */
2577 if (!ix86_arch_specified)
2579 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2580 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2583 warning (0, "-mrtd is ignored in 64bit mode");
2587 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2589 if (!ix86_arch_specified)
2591 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2593 /* i386 ABI does not specify red zone. It still makes sense to use it
2594 when programmer takes care to stack from being destroyed. */
2595 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2596 target_flags |= MASK_NO_RED_ZONE;
2599 /* Keep nonleaf frame pointers. */
2600 if (flag_omit_frame_pointer)
2601 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2602 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2603 flag_omit_frame_pointer = 1;
2605 /* If we're doing fast math, we don't care about comparison order
2606 wrt NaNs. This lets us use a shorter comparison sequence. */
2607 if (flag_finite_math_only)
2608 target_flags &= ~MASK_IEEE_FP;
2610 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2611 since the insns won't need emulation. */
2612 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2613 target_flags &= ~MASK_NO_FANCY_MATH_387;
2615 /* Likewise, if the target doesn't have a 387, or we've specified
2616 software floating point, don't use 387 inline intrinsics. */
2618 target_flags |= MASK_NO_FANCY_MATH_387;
2620 /* Turn on MMX builtins for -msse. */
2623 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2624 x86_prefetch_sse = true;
2627 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2628 if (TARGET_SSE4_2 || TARGET_ABM)
2631 /* Validate -mpreferred-stack-boundary= value, or provide default.
2632 The default of 128 bits is for Pentium III's SSE __m128. We can't
2633 change it because of optimize_size. Otherwise, we can't mix object
2634 files compiled with -Os and -On. */
2635 ix86_preferred_stack_boundary = 128;
2636 if (ix86_preferred_stack_boundary_string)
2638 i = atoi (ix86_preferred_stack_boundary_string);
2639 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2640 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2641 TARGET_64BIT ? 4 : 2);
2643 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2646 /* Accept -msseregparm only if at least SSE support is enabled. */
2647 if (TARGET_SSEREGPARM
2649 error ("-msseregparm used without SSE enabled");
2651 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2652 if (ix86_fpmath_string != 0)
2654 if (! strcmp (ix86_fpmath_string, "387"))
2655 ix86_fpmath = FPMATH_387;
2656 else if (! strcmp (ix86_fpmath_string, "sse"))
2660 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2661 ix86_fpmath = FPMATH_387;
2664 ix86_fpmath = FPMATH_SSE;
2666 else if (! strcmp (ix86_fpmath_string, "387,sse")
2667 || ! strcmp (ix86_fpmath_string, "sse,387"))
2671 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2672 ix86_fpmath = FPMATH_387;
2674 else if (!TARGET_80387)
2676 warning (0, "387 instruction set disabled, using SSE arithmetics");
2677 ix86_fpmath = FPMATH_SSE;
2680 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2683 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2686 /* If the i387 is disabled, then do not return values in it. */
2688 target_flags &= ~MASK_FLOAT_RETURNS;
2690 /* Use external vectorized library in vectorizing intrinsics. */
2691 if (ix86_veclibabi_string)
2693 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2694 ix86_veclib_handler = ix86_veclibabi_svml;
2695 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2696 ix86_veclib_handler = ix86_veclibabi_acml;
2698 error ("unknown vectorization library ABI type (%s) for "
2699 "-mveclibabi= switch", ix86_veclibabi_string);
2702 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2703 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2705 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2707 /* ??? Unwind info is not correct around the CFG unless either a frame
2708 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2709 unwind info generation to be aware of the CFG and propagating states
2711 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2712 || flag_exceptions || flag_non_call_exceptions)
2713 && flag_omit_frame_pointer
2714 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2716 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 warning (0, "unwind tables currently require either a frame pointer "
2718 "or -maccumulate-outgoing-args for correctness");
2719 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2722 /* If stack probes are required, the space used for large function
2723 arguments on the stack must also be probed, so enable
2724 -maccumulate-outgoing-args so this happens in the prologue. */
2725 if (TARGET_STACK_PROBE
2726 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2728 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2729 warning (0, "stack probing requires -maccumulate-outgoing-args "
2731 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2734 /* For sane SSE instruction set generation we need fcomi instruction.
2735 It is safe to enable all CMOVE instructions. */
2739 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2742 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2743 p = strchr (internal_label_prefix, 'X');
2744 internal_label_prefix_len = p - internal_label_prefix;
2748 /* When scheduling description is not available, disable scheduler pass
2749 so it won't slow down the compilation and make x87 code slower. */
2750 if (!TARGET_SCHEDULE)
2751 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2753 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2754 set_param_value ("simultaneous-prefetches",
2755 ix86_cost->simultaneous_prefetches);
2756 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2757 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2758 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2759 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2760 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2761 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2763 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2764 can be optimized to ap = __builtin_next_arg (0). */
2765 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2766 targetm.expand_builtin_va_start = NULL;
2769 /* Return true if this goes in large data/bss. */
2772 ix86_in_large_data_p (tree exp)
2774 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2777 /* Functions are never large data. */
2778 if (TREE_CODE (exp) == FUNCTION_DECL)
2781 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2783 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2784 if (strcmp (section, ".ldata") == 0
2785 || strcmp (section, ".lbss") == 0)
2791 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2793 /* If this is an incomplete type with size 0, then we can't put it
2794 in data because it might be too big when completed. */
2795 if (!size || size > ix86_section_threshold)
2802 /* Switch to the appropriate section for output of DECL.
2803 DECL is either a `VAR_DECL' node or a constant of some sort.
2804 RELOC indicates whether forming the initial value of DECL requires
2805 link-time relocations. */
2807 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2811 x86_64_elf_select_section (tree decl, int reloc,
2812 unsigned HOST_WIDE_INT align)
2814 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2815 && ix86_in_large_data_p (decl))
2817 const char *sname = NULL;
2818 unsigned int flags = SECTION_WRITE;
2819 switch (categorize_decl_for_section (decl, reloc))
2824 case SECCAT_DATA_REL:
2825 sname = ".ldata.rel";
2827 case SECCAT_DATA_REL_LOCAL:
2828 sname = ".ldata.rel.local";
2830 case SECCAT_DATA_REL_RO:
2831 sname = ".ldata.rel.ro";
2833 case SECCAT_DATA_REL_RO_LOCAL:
2834 sname = ".ldata.rel.ro.local";
2838 flags |= SECTION_BSS;
2841 case SECCAT_RODATA_MERGE_STR:
2842 case SECCAT_RODATA_MERGE_STR_INIT:
2843 case SECCAT_RODATA_MERGE_CONST:
2847 case SECCAT_SRODATA:
2854 /* We don't split these for medium model. Place them into
2855 default sections and hope for best. */
2857 case SECCAT_EMUTLS_VAR:
2858 case SECCAT_EMUTLS_TMPL:
2863 /* We might get called with string constants, but get_named_section
2864 doesn't like them as they are not DECLs. Also, we need to set
2865 flags in that case. */
2867 return get_section (sname, flags, NULL);
2868 return get_named_section (decl, sname, reloc);
2871 return default_elf_select_section (decl, reloc, align);
2874 /* Build up a unique section name, expressed as a
2875 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2876 RELOC indicates whether the initial value of EXP requires
2877 link-time relocations. */
2879 static void ATTRIBUTE_UNUSED
2880 x86_64_elf_unique_section (tree decl, int reloc)
2882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2883 && ix86_in_large_data_p (decl))
2885 const char *prefix = NULL;
2886 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2887 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2889 switch (categorize_decl_for_section (decl, reloc))
2892 case SECCAT_DATA_REL:
2893 case SECCAT_DATA_REL_LOCAL:
2894 case SECCAT_DATA_REL_RO:
2895 case SECCAT_DATA_REL_RO_LOCAL:
2896 prefix = one_only ? ".ld" : ".ldata";
2899 prefix = one_only ? ".lb" : ".lbss";
2902 case SECCAT_RODATA_MERGE_STR:
2903 case SECCAT_RODATA_MERGE_STR_INIT:
2904 case SECCAT_RODATA_MERGE_CONST:
2905 prefix = one_only ? ".lr" : ".lrodata";
2907 case SECCAT_SRODATA:
2914 /* We don't split these for medium model. Place them into
2915 default sections and hope for best. */
2917 case SECCAT_EMUTLS_VAR:
2918 prefix = targetm.emutls.var_section;
2920 case SECCAT_EMUTLS_TMPL:
2921 prefix = targetm.emutls.tmpl_section;
2926 const char *name, *linkonce;
2929 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2930 name = targetm.strip_name_encoding (name);
2932 /* If we're using one_only, then there needs to be a .gnu.linkonce
2933 prefix to the section name. */
2934 linkonce = one_only ? ".gnu.linkonce" : "";
2936 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2938 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2942 default_unique_section (decl, reloc);
2945 #ifdef COMMON_ASM_OP
2946 /* This says how to output assembler code to declare an
2947 uninitialized external linkage data object.
2949 For medium model x86-64 we need to use .largecomm opcode for
2952 x86_elf_aligned_common (FILE *file,
2953 const char *name, unsigned HOST_WIDE_INT size,
2956 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2957 && size > (unsigned int)ix86_section_threshold)
2958 fprintf (file, ".largecomm\t");
2960 fprintf (file, "%s", COMMON_ASM_OP);
2961 assemble_name (file, name);
2962 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2963 size, align / BITS_PER_UNIT);
2967 /* Utility function for targets to use in implementing
2968 ASM_OUTPUT_ALIGNED_BSS. */
2971 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2972 const char *name, unsigned HOST_WIDE_INT size,
2975 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2976 && size > (unsigned int)ix86_section_threshold)
2977 switch_to_section (get_named_section (decl, ".lbss", 0));
2979 switch_to_section (bss_section);
2980 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2981 #ifdef ASM_DECLARE_OBJECT_NAME
2982 last_assemble_variable_decl = decl;
2983 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2985 /* Standard thing is just output label for the object. */
2986 ASM_OUTPUT_LABEL (file, name);
2987 #endif /* ASM_DECLARE_OBJECT_NAME */
2988 ASM_OUTPUT_SKIP (file, size ? size : 1);
2992 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2994 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2995 make the problem with not enough registers even worse. */
2996 #ifdef INSN_SCHEDULING
2998 flag_schedule_insns = 0;
3002 /* The Darwin libraries never set errno, so we might as well
3003 avoid calling them when that's the only reason we would. */
3004 flag_errno_math = 0;
3006 /* The default values of these switches depend on the TARGET_64BIT
3007 that is not known at this moment. Mark these values with 2 and
3008 let user the to override these. In case there is no command line option
3009 specifying them, we will set the defaults in override_options. */
3011 flag_omit_frame_pointer = 2;
3012 flag_pcc_struct_return = 2;
3013 flag_asynchronous_unwind_tables = 2;
3014 flag_vect_cost_model = 1;
3015 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3016 SUBTARGET_OPTIMIZATION_OPTIONS;
3020 /* Decide whether we can make a sibling call to a function. DECL is the
3021 declaration of the function being targeted by the call and EXP is the
3022 CALL_EXPR representing the call. */
3025 ix86_function_ok_for_sibcall (tree decl, tree exp)
3030 /* If we are generating position-independent code, we cannot sibcall
3031 optimize any indirect call, or a direct call to a global function,
3032 as the PLT requires %ebx be live. */
3033 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3040 func = TREE_TYPE (CALL_EXPR_FN (exp));
3041 if (POINTER_TYPE_P (func))
3042 func = TREE_TYPE (func);
3045 /* Check that the return value locations are the same. Like
3046 if we are returning floats on the 80387 register stack, we cannot
3047 make a sibcall from a function that doesn't return a float to a
3048 function that does or, conversely, from a function that does return
3049 a float to a function that doesn't; the necessary stack adjustment
3050 would not be executed. This is also the place we notice
3051 differences in the return value ABI. Note that it is ok for one
3052 of the functions to have void return type as long as the return
3053 value of the other is passed in a register. */
3054 a = ix86_function_value (TREE_TYPE (exp), func, false);
3055 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3057 if (STACK_REG_P (a) || STACK_REG_P (b))
3059 if (!rtx_equal_p (a, b))
3062 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3064 else if (!rtx_equal_p (a, b))
3067 /* If this call is indirect, we'll need to be able to use a call-clobbered
3068 register for the address of the target function. Make sure that all
3069 such registers are not used for passing parameters. */
3070 if (!decl && !TARGET_64BIT)
3074 /* We're looking at the CALL_EXPR, we need the type of the function. */
3075 type = CALL_EXPR_FN (exp); /* pointer expression */
3076 type = TREE_TYPE (type); /* pointer type */
3077 type = TREE_TYPE (type); /* function type */
3079 if (ix86_function_regparm (type, NULL) >= 3)
3081 /* ??? Need to count the actual number of registers to be used,
3082 not the possible number of registers. Fix later. */
3087 /* Dllimport'd functions are also called indirectly. */
3088 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3089 && decl && DECL_DLLIMPORT_P (decl)
3090 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3093 /* If we forced aligned the stack, then sibcalling would unalign the
3094 stack, which may break the called function. */
3095 if (cfun->machine->force_align_arg_pointer)
3098 /* Otherwise okay. That also includes certain types of indirect calls. */
3102 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3103 calling convention attributes;
3104 arguments as in struct attribute_spec.handler. */
3107 ix86_handle_cconv_attribute (tree *node, tree name,
3109 int flags ATTRIBUTE_UNUSED,
3112 if (TREE_CODE (*node) != FUNCTION_TYPE
3113 && TREE_CODE (*node) != METHOD_TYPE
3114 && TREE_CODE (*node) != FIELD_DECL
3115 && TREE_CODE (*node) != TYPE_DECL)
3117 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3118 IDENTIFIER_POINTER (name));
3119 *no_add_attrs = true;
3123 /* Can combine regparm with all attributes but fastcall. */
3124 if (is_attribute_p ("regparm", name))
3128 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3130 error ("fastcall and regparm attributes are not compatible");
3133 cst = TREE_VALUE (args);
3134 if (TREE_CODE (cst) != INTEGER_CST)
3136 warning (OPT_Wattributes,
3137 "%qs attribute requires an integer constant argument",
3138 IDENTIFIER_POINTER (name));
3139 *no_add_attrs = true;
3141 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3143 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3144 IDENTIFIER_POINTER (name), REGPARM_MAX);
3145 *no_add_attrs = true;
3149 && lookup_attribute (ix86_force_align_arg_pointer_string,
3150 TYPE_ATTRIBUTES (*node))
3151 && compare_tree_int (cst, REGPARM_MAX-1))
3153 error ("%s functions limited to %d register parameters",
3154 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3162 /* Do not warn when emulating the MS ABI. */
3163 if (!TARGET_64BIT_MS_ABI)
3164 warning (OPT_Wattributes, "%qs attribute ignored",
3165 IDENTIFIER_POINTER (name));
3166 *no_add_attrs = true;
3170 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3171 if (is_attribute_p ("fastcall", name))
3173 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3175 error ("fastcall and cdecl attributes are not compatible");
3177 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3179 error ("fastcall and stdcall attributes are not compatible");
3181 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3183 error ("fastcall and regparm attributes are not compatible");
3187 /* Can combine stdcall with fastcall (redundant), regparm and
3189 else if (is_attribute_p ("stdcall", name))
3191 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3193 error ("stdcall and cdecl attributes are not compatible");
3195 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3197 error ("stdcall and fastcall attributes are not compatible");
3201 /* Can combine cdecl with regparm and sseregparm. */
3202 else if (is_attribute_p ("cdecl", name))
3204 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3206 error ("stdcall and cdecl attributes are not compatible");
3208 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3210 error ("fastcall and cdecl attributes are not compatible");
3214 /* Can combine sseregparm with all attributes. */
3219 /* Return 0 if the attributes for two types are incompatible, 1 if they
3220 are compatible, and 2 if they are nearly compatible (which causes a
3221 warning to be generated). */
3224 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3226 /* Check for mismatch of non-default calling convention. */
3227 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3229 if (TREE_CODE (type1) != FUNCTION_TYPE
3230 && TREE_CODE (type1) != METHOD_TYPE)
3233 /* Check for mismatched fastcall/regparm types. */
3234 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3235 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3236 || (ix86_function_regparm (type1, NULL)
3237 != ix86_function_regparm (type2, NULL)))
3240 /* Check for mismatched sseregparm types. */
3241 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3242 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3245 /* Check for mismatched return types (cdecl vs stdcall). */
3246 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3247 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3253 /* Return the regparm value for a function with the indicated TYPE and DECL.
3254 DECL may be NULL when calling function indirectly
3255 or considering a libcall. */
3258 ix86_function_regparm (const_tree type, const_tree decl)
3261 int regparm = ix86_regparm;
3263 static bool error_issued;
3268 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3272 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3274 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3276 /* We can't use regparm(3) for nested functions because
3277 these pass static chain pointer in %ecx register. */
3278 if (!error_issued && regparm == 3
3279 && decl_function_context (decl)
3280 && !DECL_NO_STATIC_CHAIN (decl))
3282 error ("nested functions are limited to 2 register parameters");
3283 error_issued = true;
3291 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3294 /* Use register calling convention for local functions when possible. */
3295 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3296 && flag_unit_at_a_time && !profile_flag)
3298 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3299 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3302 int local_regparm, globals = 0, regno;
3305 /* Make sure no regparm register is taken by a
3306 fixed register variable. */
3307 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3308 if (fixed_regs[local_regparm])
3311 /* We can't use regparm(3) for nested functions as these use
3312 static chain pointer in third argument. */
3313 if (local_regparm == 3
3314 && (decl_function_context (decl)
3315 || ix86_force_align_arg_pointer)
3316 && !DECL_NO_STATIC_CHAIN (decl))
3319 /* If the function realigns its stackpointer, the prologue will
3320 clobber %ecx. If we've already generated code for the callee,
3321 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3322 scanning the attributes for the self-realigning property. */
3323 f = DECL_STRUCT_FUNCTION (decl);
3324 if (local_regparm == 3
3325 && (f ? !!f->machine->force_align_arg_pointer
3326 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3327 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3330 /* Each fixed register usage increases register pressure,
3331 so less registers should be used for argument passing.
3332 This functionality can be overriden by an explicit
3334 for (regno = 0; regno <= DI_REG; regno++)
3335 if (fixed_regs[regno])
3339 = globals < local_regparm ? local_regparm - globals : 0;
3341 if (local_regparm > regparm)
3342 regparm = local_regparm;
3349 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3350 DFmode (2) arguments in SSE registers for a function with the
3351 indicated TYPE and DECL. DECL may be NULL when calling function
3352 indirectly or considering a libcall. Otherwise return 0. */
3355 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3357 gcc_assert (!TARGET_64BIT);
3359 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3360 by the sseregparm attribute. */
3361 if (TARGET_SSEREGPARM
3362 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3369 error ("Calling %qD with attribute sseregparm without "
3370 "SSE/SSE2 enabled", decl);
3372 error ("Calling %qT with attribute sseregparm without "
3373 "SSE/SSE2 enabled", type);
3381 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3382 (and DFmode for SSE2) arguments in SSE registers. */
3383 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3385 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3386 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3388 return TARGET_SSE2 ? 2 : 1;
3394 /* Return true if EAX is live at the start of the function. Used by
3395 ix86_expand_prologue to determine if we need special help before
3396 calling allocate_stack_worker. */
3399 ix86_eax_live_at_start_p (void)
3401 /* Cheat. Don't bother working forward from ix86_function_regparm
3402 to the function type to whether an actual argument is located in
3403 eax. Instead just look at cfg info, which is still close enough
3404 to correct at this point. This gives false positives for broken
3405 functions that might use uninitialized data that happens to be
3406 allocated in eax, but who cares? */
3407 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3410 /* Value is the number of bytes of arguments automatically
3411 popped when returning from a subroutine call.
3412 FUNDECL is the declaration node of the function (as a tree),
3413 FUNTYPE is the data type of the function (as a tree),
3414 or for a library call it is an identifier node for the subroutine name.
3415 SIZE is the number of bytes of arguments passed on the stack.
3417 On the 80386, the RTD insn may be used to pop them if the number
3418 of args is fixed, but if the number is variable then the caller
3419 must pop them all. RTD can't be used for library calls now
3420 because the library is compiled with the Unix compiler.
3421 Use of RTD is a selectable option, since it is incompatible with
3422 standard Unix calling sequences. If the option is not selected,
3423 the caller must always pop the args.
3425 The attribute stdcall is equivalent to RTD on a per module basis. */
3428 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3432 /* None of the 64-bit ABIs pop arguments. */
3436 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3438 /* Cdecl functions override -mrtd, and never pop the stack. */
3439 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3441 /* Stdcall and fastcall functions will pop the stack if not
3443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3444 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3447 if (rtd && ! stdarg_p (funtype))
3451 /* Lose any fake structure return argument if it is passed on the stack. */
3452 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3453 && !KEEP_AGGREGATE_RETURN_POINTER)
3455 int nregs = ix86_function_regparm (funtype, fundecl);
3457 return GET_MODE_SIZE (Pmode);
3463 /* Argument support functions. */
3465 /* Return true when register may be used to pass function parameters. */
3467 ix86_function_arg_regno_p (int regno)
3470 const int *parm_regs;
3475 return (regno < REGPARM_MAX
3476 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3478 return (regno < REGPARM_MAX
3479 || (TARGET_MMX && MMX_REGNO_P (regno)
3480 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3481 || (TARGET_SSE && SSE_REGNO_P (regno)
3482 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3487 if (SSE_REGNO_P (regno) && TARGET_SSE)
3492 if (TARGET_SSE && SSE_REGNO_P (regno)
3493 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3497 /* RAX is used as hidden argument to va_arg functions. */
3498 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3501 if (TARGET_64BIT_MS_ABI)
3502 parm_regs = x86_64_ms_abi_int_parameter_registers;
3504 parm_regs = x86_64_int_parameter_registers;
3505 for (i = 0; i < REGPARM_MAX; i++)
3506 if (regno == parm_regs[i])
3511 /* Return if we do not know how to pass TYPE solely in registers. */
3514 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3516 if (must_pass_in_stack_var_size_or_pad (mode, type))
3519 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3520 The layout_type routine is crafty and tries to trick us into passing
3521 currently unsupported vector types on the stack by using TImode. */
3522 return (!TARGET_64BIT && mode == TImode
3523 && type && TREE_CODE (type) != VECTOR_TYPE);
3526 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3527 for a call to a function whose data type is FNTYPE.
3528 For a library call, FNTYPE is 0. */
3531 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3532 tree fntype, /* tree ptr for function decl */
3533 rtx libname, /* SYMBOL_REF of library name or 0 */
3536 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3537 memset (cum, 0, sizeof (*cum));
3539 /* Set up the number of registers to use for passing arguments. */
3540 cum->nregs = ix86_regparm;
3542 cum->sse_nregs = SSE_REGPARM_MAX;
3544 cum->mmx_nregs = MMX_REGPARM_MAX;
3545 cum->warn_sse = true;
3546 cum->warn_mmx = true;
3548 /* Because type might mismatch in between caller and callee, we need to
3549 use actual type of function for local calls.
3550 FIXME: cgraph_analyze can be told to actually record if function uses
3551 va_start so for local functions maybe_vaarg can be made aggressive
3553 FIXME: once typesytem is fixed, we won't need this code anymore. */
3555 fntype = TREE_TYPE (fndecl);
3556 cum->maybe_vaarg = (fntype
3557 ? (!prototype_p (fntype) || stdarg_p (fntype))
3562 /* If there are variable arguments, then we won't pass anything
3563 in registers in 32-bit mode. */
3564 if (stdarg_p (fntype))
3574 /* Use ecx and edx registers if function has fastcall attribute,
3575 else look for regparm information. */
3578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3584 cum->nregs = ix86_function_regparm (fntype, fndecl);
3587 /* Set up the number of SSE registers used for passing SFmode
3588 and DFmode arguments. Warn for mismatching ABI. */
3589 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3593 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3594 But in the case of vector types, it is some vector mode.
3596 When we have only some of our vector isa extensions enabled, then there
3597 are some modes for which vector_mode_supported_p is false. For these
3598 modes, the generic vector support in gcc will choose some non-vector mode
3599 in order to implement the type. By computing the natural mode, we'll
3600 select the proper ABI location for the operand and not depend on whatever
3601 the middle-end decides to do with these vector types. */
3603 static enum machine_mode
3604 type_natural_mode (const_tree type)
3606 enum machine_mode mode = TYPE_MODE (type);
3608 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3610 HOST_WIDE_INT size = int_size_in_bytes (type);
3611 if ((size == 8 || size == 16)
3612 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3613 && TYPE_VECTOR_SUBPARTS (type) > 1)
3615 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3617 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3618 mode = MIN_MODE_VECTOR_FLOAT;
3620 mode = MIN_MODE_VECTOR_INT;
3622 /* Get the mode which has this inner mode and number of units. */
3623 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3624 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3625 && GET_MODE_INNER (mode) == innermode)
3635 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3636 this may not agree with the mode that the type system has chosen for the
3637 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3638 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3641 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3646 if (orig_mode != BLKmode)
3647 tmp = gen_rtx_REG (orig_mode, regno);
3650 tmp = gen_rtx_REG (mode, regno);
3651 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3652 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3658 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3659 of this code is to classify each 8bytes of incoming argument by the register
3660 class and assign registers accordingly. */
3662 /* Return the union class of CLASS1 and CLASS2.
3663 See the x86-64 PS ABI for details. */
3665 static enum x86_64_reg_class
3666 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3668 /* Rule #1: If both classes are equal, this is the resulting class. */
3669 if (class1 == class2)
3672 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3674 if (class1 == X86_64_NO_CLASS)
3676 if (class2 == X86_64_NO_CLASS)
3679 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3680 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3681 return X86_64_MEMORY_CLASS;
3683 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3684 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3685 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3686 return X86_64_INTEGERSI_CLASS;
3687 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3688 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3689 return X86_64_INTEGER_CLASS;
3691 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3693 if (class1 == X86_64_X87_CLASS
3694 || class1 == X86_64_X87UP_CLASS
3695 || class1 == X86_64_COMPLEX_X87_CLASS
3696 || class2 == X86_64_X87_CLASS
3697 || class2 == X86_64_X87UP_CLASS
3698 || class2 == X86_64_COMPLEX_X87_CLASS)
3699 return X86_64_MEMORY_CLASS;
3701 /* Rule #6: Otherwise class SSE is used. */
3702 return X86_64_SSE_CLASS;
3705 /* Classify the argument of type TYPE and mode MODE.
3706 CLASSES will be filled by the register class used to pass each word
3707 of the operand. The number of words is returned. In case the parameter
3708 should be passed in memory, 0 is returned. As a special case for zero
3709 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3711 BIT_OFFSET is used internally for handling records and specifies offset
3712 of the offset in bits modulo 256 to avoid overflow cases.
3714 See the x86-64 PS ABI for details.
3718 classify_argument (enum machine_mode mode, const_tree type,
3719 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3721 HOST_WIDE_INT bytes =
3722 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3723 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3725 /* Variable sized entities are always passed/returned in memory. */
3729 if (mode != VOIDmode
3730 && targetm.calls.must_pass_in_stack (mode, type))
3733 if (type && AGGREGATE_TYPE_P (type))
3737 enum x86_64_reg_class subclasses[MAX_CLASSES];
3739 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3743 for (i = 0; i < words; i++)
3744 classes[i] = X86_64_NO_CLASS;
3746 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3747 signalize memory class, so handle it as special case. */
3750 classes[0] = X86_64_NO_CLASS;
3754 /* Classify each field of record and merge classes. */
3755 switch (TREE_CODE (type))
3758 /* And now merge the fields of structure. */
3759 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3761 if (TREE_CODE (field) == FIELD_DECL)
3765 if (TREE_TYPE (field) == error_mark_node)
3768 /* Bitfields are always classified as integer. Handle them
3769 early, since later code would consider them to be
3770 misaligned integers. */
3771 if (DECL_BIT_FIELD (field))
3773 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3774 i < ((int_bit_position (field) + (bit_offset % 64))
3775 + tree_low_cst (DECL_SIZE (field), 0)
3778 merge_classes (X86_64_INTEGER_CLASS,
3783 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3784 TREE_TYPE (field), subclasses,
3785 (int_bit_position (field)
3786 + bit_offset) % 256);
3789 for (i = 0; i < num; i++)
3792 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3794 merge_classes (subclasses[i], classes[i + pos]);
3802 /* Arrays are handled as small records. */
3805 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3806 TREE_TYPE (type), subclasses, bit_offset);
3810 /* The partial classes are now full classes. */
3811 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3812 subclasses[0] = X86_64_SSE_CLASS;
3813 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3814 subclasses[0] = X86_64_INTEGER_CLASS;
3816 for (i = 0; i < words; i++)
3817 classes[i] = subclasses[i % num];
3822 case QUAL_UNION_TYPE:
3823 /* Unions are similar to RECORD_TYPE but offset is always 0.
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3827 if (TREE_CODE (field) == FIELD_DECL)
3831 if (TREE_TYPE (field) == error_mark_node)
3834 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3835 TREE_TYPE (field), subclasses,
3839 for (i = 0; i < num; i++)
3840 classes[i] = merge_classes (subclasses[i], classes[i]);
3849 /* Final merger cleanup. */
3850 for (i = 0; i < words; i++)
3852 /* If one class is MEMORY, everything should be passed in
3854 if (classes[i] == X86_64_MEMORY_CLASS)
3857 /* The X86_64_SSEUP_CLASS should be always preceded by
3858 X86_64_SSE_CLASS. */
3859 if (classes[i] == X86_64_SSEUP_CLASS
3860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3861 classes[i] = X86_64_SSE_CLASS;
3863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3864 if (classes[i] == X86_64_X87UP_CLASS
3865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3866 classes[i] = X86_64_SSE_CLASS;
3871 /* Compute alignment needed. We align all types to natural boundaries with
3872 exception of XFmode that is aligned to 64bits. */
3873 if (mode != VOIDmode && mode != BLKmode)
3875 int mode_alignment = GET_MODE_BITSIZE (mode);
3878 mode_alignment = 128;
3879 else if (mode == XCmode)
3880 mode_alignment = 256;
3881 if (COMPLEX_MODE_P (mode))
3882 mode_alignment /= 2;
3883 /* Misaligned fields are always returned in memory. */
3884 if (bit_offset % mode_alignment)
3888 /* for V1xx modes, just use the base mode */
3889 if (VECTOR_MODE_P (mode) && mode != V1DImode
3890 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3891 mode = GET_MODE_INNER (mode);
3893 /* Classification of atomic types. */
3898 classes[0] = X86_64_SSE_CLASS;
3901 classes[0] = X86_64_SSE_CLASS;
3902 classes[1] = X86_64_SSEUP_CLASS;
3911 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3912 classes[0] = X86_64_INTEGERSI_CLASS;
3914 classes[0] = X86_64_INTEGER_CLASS;
3918 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3923 if (!(bit_offset % 64))
3924 classes[0] = X86_64_SSESF_CLASS;
3926 classes[0] = X86_64_SSE_CLASS;
3929 classes[0] = X86_64_SSEDF_CLASS;
3932 classes[0] = X86_64_X87_CLASS;
3933 classes[1] = X86_64_X87UP_CLASS;
3936 classes[0] = X86_64_SSE_CLASS;
3937 classes[1] = X86_64_SSEUP_CLASS;
3940 classes[0] = X86_64_SSE_CLASS;
3943 classes[0] = X86_64_SSEDF_CLASS;
3944 classes[1] = X86_64_SSEDF_CLASS;
3947 classes[0] = X86_64_COMPLEX_X87_CLASS;
3950 /* This modes is larger than 16 bytes. */
3958 classes[0] = X86_64_SSE_CLASS;
3959 classes[1] = X86_64_SSEUP_CLASS;
3966 classes[0] = X86_64_SSE_CLASS;
3972 gcc_assert (VECTOR_MODE_P (mode));
3977 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3979 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3980 classes[0] = X86_64_INTEGERSI_CLASS;
3982 classes[0] = X86_64_INTEGER_CLASS;
3983 classes[1] = X86_64_INTEGER_CLASS;
3984 return 1 + (bytes > 8);
3988 /* Examine the argument and return set number of register required in each
3989 class. Return 0 iff parameter should be passed in memory. */
3991 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3992 int *int_nregs, int *sse_nregs)
3994 enum x86_64_reg_class regclass[MAX_CLASSES];
3995 int n = classify_argument (mode, type, regclass, 0);
4001 for (n--; n >= 0; n--)
4002 switch (regclass[n])
4004 case X86_64_INTEGER_CLASS:
4005 case X86_64_INTEGERSI_CLASS:
4008 case X86_64_SSE_CLASS:
4009 case X86_64_SSESF_CLASS:
4010 case X86_64_SSEDF_CLASS:
4013 case X86_64_NO_CLASS:
4014 case X86_64_SSEUP_CLASS:
4016 case X86_64_X87_CLASS:
4017 case X86_64_X87UP_CLASS:
4021 case X86_64_COMPLEX_X87_CLASS:
4022 return in_return ? 2 : 0;
4023 case X86_64_MEMORY_CLASS:
4029 /* Construct container for the argument used by GCC interface. See
4030 FUNCTION_ARG for the detailed description. */
4033 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4034 const_tree type, int in_return, int nintregs, int nsseregs,
4035 const int *intreg, int sse_regno)
4037 /* The following variables hold the static issued_error state. */
4038 static bool issued_sse_arg_error;
4039 static bool issued_sse_ret_error;
4040 static bool issued_x87_ret_error;
4042 enum machine_mode tmpmode;
4044 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4045 enum x86_64_reg_class regclass[MAX_CLASSES];
4049 int needed_sseregs, needed_intregs;
4050 rtx exp[MAX_CLASSES];
4053 n = classify_argument (mode, type, regclass, 0);
4056 if (!examine_argument (mode, type, in_return, &needed_intregs,
4059 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4062 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4063 some less clueful developer tries to use floating-point anyway. */
4064 if (needed_sseregs && !TARGET_SSE)
4068 if (!issued_sse_ret_error)
4070 error ("SSE register return with SSE disabled");
4071 issued_sse_ret_error = true;
4074 else if (!issued_sse_arg_error)
4076 error ("SSE register argument with SSE disabled");
4077 issued_sse_arg_error = true;
4082 /* Likewise, error if the ABI requires us to return values in the
4083 x87 registers and the user specified -mno-80387. */
4084 if (!TARGET_80387 && in_return)
4085 for (i = 0; i < n; i++)
4086 if (regclass[i] == X86_64_X87_CLASS
4087 || regclass[i] == X86_64_X87UP_CLASS
4088 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4090 if (!issued_x87_ret_error)
4092 error ("x87 register return with x87 disabled");
4093 issued_x87_ret_error = true;
4098 /* First construct simple cases. Avoid SCmode, since we want to use
4099 single register to pass this type. */
4100 if (n == 1 && mode != SCmode)
4101 switch (regclass[0])
4103 case X86_64_INTEGER_CLASS:
4104 case X86_64_INTEGERSI_CLASS:
4105 return gen_rtx_REG (mode, intreg[0]);
4106 case X86_64_SSE_CLASS:
4107 case X86_64_SSESF_CLASS:
4108 case X86_64_SSEDF_CLASS:
4109 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4110 case X86_64_X87_CLASS:
4111 case X86_64_COMPLEX_X87_CLASS:
4112 return gen_rtx_REG (mode, FIRST_STACK_REG);
4113 case X86_64_NO_CLASS:
4114 /* Zero sized array, struct or class. */
4119 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4120 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4121 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4124 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4125 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4126 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4127 && regclass[1] == X86_64_INTEGER_CLASS
4128 && (mode == CDImode || mode == TImode || mode == TFmode)
4129 && intreg[0] + 1 == intreg[1])
4130 return gen_rtx_REG (mode, intreg[0]);
4132 /* Otherwise figure out the entries of the PARALLEL. */
4133 for (i = 0; i < n; i++)
4135 switch (regclass[i])
4137 case X86_64_NO_CLASS:
4139 case X86_64_INTEGER_CLASS:
4140 case X86_64_INTEGERSI_CLASS:
4141 /* Merge TImodes on aligned occasions here too. */
4142 if (i * 8 + 8 > bytes)
4143 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4144 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4148 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4149 if (tmpmode == BLKmode)
4151 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4152 gen_rtx_REG (tmpmode, *intreg),
4156 case X86_64_SSESF_CLASS:
4157 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4158 gen_rtx_REG (SFmode,
4159 SSE_REGNO (sse_regno)),
4163 case X86_64_SSEDF_CLASS:
4164 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4165 gen_rtx_REG (DFmode,
4166 SSE_REGNO (sse_regno)),
4170 case X86_64_SSE_CLASS:
4171 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4175 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4176 gen_rtx_REG (tmpmode,
4177 SSE_REGNO (sse_regno)),
4179 if (tmpmode == TImode)
4188 /* Empty aligned struct, union or class. */
4192 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4193 for (i = 0; i < nexps; i++)
4194 XVECEXP (ret, 0, i) = exp [i];
4198 /* Update the data in CUM to advance over an argument of mode MODE
4199 and data type TYPE. (TYPE is null for libcalls where that information
4200 may not be available.) */
4203 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4204 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4220 cum->words += words;
4221 cum->nregs -= words;
4222 cum->regno += words;
4224 if (cum->nregs <= 0)
4232 if (cum->float_in_sse < 2)
4235 if (cum->float_in_sse < 1)
4246 if (!type || !AGGREGATE_TYPE_P (type))
4248 cum->sse_words += words;
4249 cum->sse_nregs -= 1;
4250 cum->sse_regno += 1;
4251 if (cum->sse_nregs <= 0)
4264 if (!type || !AGGREGATE_TYPE_P (type))
4266 cum->mmx_words += words;
4267 cum->mmx_nregs -= 1;
4268 cum->mmx_regno += 1;
4269 if (cum->mmx_nregs <= 0)
4280 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4281 tree type, HOST_WIDE_INT words)
4283 int int_nregs, sse_nregs;
4285 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4286 cum->words += words;
4287 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4289 cum->nregs -= int_nregs;
4290 cum->sse_nregs -= sse_nregs;
4291 cum->regno += int_nregs;
4292 cum->sse_regno += sse_nregs;
4295 cum->words += words;
4299 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4300 HOST_WIDE_INT words)
4302 /* Otherwise, this should be passed indirect. */
4303 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4305 cum->words += words;
4314 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4315 tree type, int named ATTRIBUTE_UNUSED)
4317 HOST_WIDE_INT bytes, words;
4319 if (mode == BLKmode)
4320 bytes = int_size_in_bytes (type);
4322 bytes = GET_MODE_SIZE (mode);
4323 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4326 mode = type_natural_mode (type);
4328 if (TARGET_64BIT_MS_ABI)
4329 function_arg_advance_ms_64 (cum, bytes, words);
4330 else if (TARGET_64BIT)
4331 function_arg_advance_64 (cum, mode, type, words);
4333 function_arg_advance_32 (cum, mode, type, bytes, words);
4336 /* Define where to put the arguments to a function.
4337 Value is zero to push the argument on the stack,
4338 or a hard register in which to store the argument.
4340 MODE is the argument's machine mode.
4341 TYPE is the data type of the argument (as a tree).
4342 This is null for libcalls where that information may
4344 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4345 the preceding args and about the function being called.
4346 NAMED is nonzero if this argument is a named parameter
4347 (otherwise it is an extra parameter matching an ellipsis). */
4350 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4351 enum machine_mode orig_mode, tree type,
4352 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4354 static bool warnedsse, warnedmmx;
4356 /* Avoid the AL settings for the Unix64 ABI. */
4357 if (mode == VOIDmode)
4373 if (words <= cum->nregs)
4375 int regno = cum->regno;
4377 /* Fastcall allocates the first two DWORD (SImode) or
4378 smaller arguments to ECX and EDX if it isn't an
4384 || (type && AGGREGATE_TYPE_P (type)))
4387 /* ECX not EAX is the first allocated register. */
4388 if (regno == AX_REG)
4391 return gen_rtx_REG (mode, regno);
4396 if (cum->float_in_sse < 2)
4399 if (cum->float_in_sse < 1)
4409 if (!type || !AGGREGATE_TYPE_P (type))
4411 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4414 warning (0, "SSE vector argument without SSE enabled "
4418 return gen_reg_or_parallel (mode, orig_mode,
4419 cum->sse_regno + FIRST_SSE_REG);
4428 if (!type || !AGGREGATE_TYPE_P (type))
4430 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4433 warning (0, "MMX vector argument without MMX enabled "
4437 return gen_reg_or_parallel (mode, orig_mode,
4438 cum->mmx_regno + FIRST_MMX_REG);
4447 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4448 enum machine_mode orig_mode, tree type)
4450 /* Handle a hidden AL argument containing number of registers
4451 for varargs x86-64 functions. */
4452 if (mode == VOIDmode)
4453 return GEN_INT (cum->maybe_vaarg
4454 ? (cum->sse_nregs < 0
4459 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4461 &x86_64_int_parameter_registers [cum->regno],
4466 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4467 enum machine_mode orig_mode, int named,
4468 HOST_WIDE_INT bytes)
4472 /* Avoid the AL settings for the Unix64 ABI. */
4473 if (mode == VOIDmode)
4476 /* If we've run out of registers, it goes on the stack. */
4477 if (cum->nregs == 0)
4480 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4482 /* Only floating point modes are passed in anything but integer regs. */
4483 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4486 regno = cum->regno + FIRST_SSE_REG;
4491 /* Unnamed floating parameters are passed in both the
4492 SSE and integer registers. */
4493 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4494 t2 = gen_rtx_REG (mode, regno);
4495 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4496 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4497 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4500 /* Handle aggregated types passed in register. */
4501 if (orig_mode == BLKmode)
4503 if (bytes > 0 && bytes <= 8)
4504 mode = (bytes > 4 ? DImode : SImode);
4505 if (mode == BLKmode)
4509 return gen_reg_or_parallel (mode, orig_mode, regno);
4513 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4514 tree type, int named)
4516 enum machine_mode mode = omode;
4517 HOST_WIDE_INT bytes, words;
4519 if (mode == BLKmode)
4520 bytes = int_size_in_bytes (type);
4522 bytes = GET_MODE_SIZE (mode);
4523 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4525 /* To simplify the code below, represent vector types with a vector mode
4526 even if MMX/SSE are not active. */
4527 if (type && TREE_CODE (type) == VECTOR_TYPE)
4528 mode = type_natural_mode (type);
4530 if (TARGET_64BIT_MS_ABI)
4531 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4532 else if (TARGET_64BIT)
4533 return function_arg_64 (cum, mode, omode, type);
4535 return function_arg_32 (cum, mode, omode, type, bytes, words);
4538 /* A C expression that indicates when an argument must be passed by
4539 reference. If nonzero for an argument, a copy of that argument is
4540 made in memory and a pointer to the argument is passed instead of
4541 the argument itself. The pointer is passed in whatever way is
4542 appropriate for passing a pointer to that type. */
4545 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4546 enum machine_mode mode ATTRIBUTE_UNUSED,
4547 const_tree type, bool named ATTRIBUTE_UNUSED)
4549 /* See Windows x64 Software Convention. */
4550 if (TARGET_64BIT_MS_ABI)
4552 int msize = (int) GET_MODE_SIZE (mode);
4555 /* Arrays are passed by reference. */
4556 if (TREE_CODE (type) == ARRAY_TYPE)
4559 if (AGGREGATE_TYPE_P (type))
4561 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4562 are passed by reference. */
4563 msize = int_size_in_bytes (type);
4567 /* __m128 is passed by reference. */
4569 case 1: case 2: case 4: case 8:
4575 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4581 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4584 contains_aligned_value_p (tree type)
4586 enum machine_mode mode = TYPE_MODE (type);
4587 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
4588 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4590 if (TYPE_ALIGN (type) < 128)
4593 if (AGGREGATE_TYPE_P (type))
4595 /* Walk the aggregates recursively. */
4596 switch (TREE_CODE (type))
4600 case QUAL_UNION_TYPE:
4604 /* Walk all the structure fields. */
4605 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4607 if (TREE_CODE (field) == FIELD_DECL
4608 && contains_aligned_value_p (TREE_TYPE (field)))
4615 /* Just for use if some languages passes arrays by value. */
4616 if (contains_aligned_value_p (TREE_TYPE (type)))
4627 /* Gives the alignment boundary, in bits, of an argument with the
4628 specified mode and type. */
4631 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4635 align = TYPE_ALIGN (type);
4637 align = GET_MODE_ALIGNMENT (mode);
4638 if (align < PARM_BOUNDARY)
4639 align = PARM_BOUNDARY;
4640 /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
4641 if (!TARGET_64BIT && mode != TDmode)
4643 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4644 make an exception for SSE modes since these require 128bit
4647 The handling here differs from field_alignment. ICC aligns MMX
4648 arguments to 4 byte boundaries, while structure fields are aligned
4649 to 8 byte boundaries. */
4652 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
4653 align = PARM_BOUNDARY;
4657 if (!contains_aligned_value_p (type))
4658 align = PARM_BOUNDARY;
4661 if (align > BIGGEST_ALIGNMENT)
4662 align = BIGGEST_ALIGNMENT;
4666 /* Return true if N is a possible register number of function value. */
4669 ix86_function_value_regno_p (int regno)
4676 case FIRST_FLOAT_REG:
4677 if (TARGET_64BIT_MS_ABI)
4679 return TARGET_FLOAT_RETURNS_IN_80387;
4685 if (TARGET_MACHO || TARGET_64BIT)
4693 /* Define how to find the value returned by a function.
4694 VALTYPE is the data type of the value (as a tree).
4695 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4696 otherwise, FUNC is 0. */
4699 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4700 const_tree fntype, const_tree fn)
4704 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4705 we normally prevent this case when mmx is not available. However
4706 some ABIs may require the result to be returned like DImode. */
4707 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4708 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4710 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4711 we prevent this case when sse is not available. However some ABIs
4712 may require the result to be returned like integer TImode. */
4713 else if (mode == TImode
4714 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4715 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4717 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4718 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4719 regno = FIRST_FLOAT_REG;
4721 /* Most things go in %eax. */
4724 /* Override FP return register with %xmm0 for local functions when
4725 SSE math is enabled or for functions with sseregparm attribute. */
4726 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4728 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4729 if ((sse_level >= 1 && mode == SFmode)
4730 || (sse_level == 2 && mode == DFmode))
4731 regno = FIRST_SSE_REG;
4734 return gen_rtx_REG (orig_mode, regno);
4738 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4743 /* Handle libcalls, which don't provide a type node. */
4744 if (valtype == NULL)
4756 return gen_rtx_REG (mode, FIRST_SSE_REG);
4759 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4763 return gen_rtx_REG (mode, AX_REG);
4767 ret = construct_container (mode, orig_mode, valtype, 1,
4768 REGPARM_MAX, SSE_REGPARM_MAX,
4769 x86_64_int_return_registers, 0);
4771 /* For zero sized structures, construct_container returns NULL, but we
4772 need to keep rest of compiler happy by returning meaningful value. */
4774 ret = gen_rtx_REG (orig_mode, AX_REG);
4780 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4782 unsigned int regno = AX_REG;
4786 switch (GET_MODE_SIZE (mode))
4789 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4790 && !COMPLEX_MODE_P (mode))
4791 regno = FIRST_SSE_REG;
4795 if (mode == SFmode || mode == DFmode)
4796 regno = FIRST_SSE_REG;
4802 return gen_rtx_REG (orig_mode, regno);
4806 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4807 enum machine_mode orig_mode, enum machine_mode mode)
4809 const_tree fn, fntype;
4812 if (fntype_or_decl && DECL_P (fntype_or_decl))
4813 fn = fntype_or_decl;
4814 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4816 if (TARGET_64BIT_MS_ABI)
4817 return function_value_ms_64 (orig_mode, mode);
4818 else if (TARGET_64BIT)
4819 return function_value_64 (orig_mode, mode, valtype);
4821 return function_value_32 (orig_mode, mode, fntype, fn);
4825 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4826 bool outgoing ATTRIBUTE_UNUSED)
4828 enum machine_mode mode, orig_mode;
4830 orig_mode = TYPE_MODE (valtype);
4831 mode = type_natural_mode (valtype);
4832 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4836 ix86_libcall_value (enum machine_mode mode)
4838 return ix86_function_value_1 (NULL, NULL, mode, mode);
4841 /* Return true iff type is returned in memory. */
4844 return_in_memory_32 (const_tree type, enum machine_mode mode)
4848 if (mode == BLKmode)
4851 size = int_size_in_bytes (type);
4853 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4856 if (VECTOR_MODE_P (mode) || mode == TImode)
4858 /* User-created vectors small enough to fit in EAX. */
4862 /* MMX/3dNow values are returned in MM0,
4863 except when it doesn't exits. */
4865 return (TARGET_MMX ? 0 : 1);
4867 /* SSE values are returned in XMM0, except when it doesn't exist. */
4869 return (TARGET_SSE ? 0 : 1);
4884 return_in_memory_64 (const_tree type, enum machine_mode mode)
4886 int needed_intregs, needed_sseregs;
4887 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4891 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4893 HOST_WIDE_INT size = int_size_in_bytes (type);
4895 /* __m128 is returned in xmm0. */
4896 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4897 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
4900 /* Otherwise, the size must be exactly in [1248]. */
4901 return (size != 1 && size != 2 && size != 4 && size != 8);
4905 ix86_return_in_memory (const_tree type)
4907 const enum machine_mode mode = type_natural_mode (type);
4909 if (TARGET_64BIT_MS_ABI)
4910 return return_in_memory_ms_64 (type, mode);
4911 else if (TARGET_64BIT)
4912 return return_in_memory_64 (type, mode);
4914 return return_in_memory_32 (type, mode);
4917 /* Return false iff TYPE is returned in memory. This version is used
4918 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4919 but differs notably in that when MMX is available, 8-byte vectors
4920 are returned in memory, rather than in MMX registers. */
4923 ix86_sol10_return_in_memory (const_tree type)
4926 enum machine_mode mode = type_natural_mode (type);
4929 return return_in_memory_64 (type, mode);
4931 if (mode == BLKmode)
4934 size = int_size_in_bytes (type);
4936 if (VECTOR_MODE_P (mode))
4938 /* Return in memory only if MMX registers *are* available. This
4939 seems backwards, but it is consistent with the existing
4946 else if (mode == TImode)
4948 else if (mode == XFmode)
4954 /* When returning SSE vector types, we have a choice of either
4955 (1) being abi incompatible with a -march switch, or
4956 (2) generating an error.
4957 Given no good solution, I think the safest thing is one warning.
4958 The user won't be able to use -Werror, but....
4960 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4961 called in response to actually generating a caller or callee that
4962 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4963 via aggregate_value_p for general type probing from tree-ssa. */
4966 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4968 static bool warnedsse, warnedmmx;
4970 if (!TARGET_64BIT && type)
4972 /* Look at the return type of the function, not the function type. */
4973 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4975 if (!TARGET_SSE && !warnedsse)
4978 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4981 warning (0, "SSE vector return without SSE enabled "
4986 if (!TARGET_MMX && !warnedmmx)
4988 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4991 warning (0, "MMX vector return without MMX enabled "
5001 /* Create the va_list data type. */
5004 ix86_build_builtin_va_list (void)
5006 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5008 /* For i386 we use plain pointer to argument area. */
5009 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5010 return build_pointer_type (char_type_node);
5012 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5013 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5015 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5016 unsigned_type_node);
5017 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5018 unsigned_type_node);
5019 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5021 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5024 va_list_gpr_counter_field = f_gpr;
5025 va_list_fpr_counter_field = f_fpr;
5027 DECL_FIELD_CONTEXT (f_gpr) = record;
5028 DECL_FIELD_CONTEXT (f_fpr) = record;
5029 DECL_FIELD_CONTEXT (f_ovf) = record;
5030 DECL_FIELD_CONTEXT (f_sav) = record;
5032 TREE_CHAIN (record) = type_decl;
5033 TYPE_NAME (record) = type_decl;
5034 TYPE_FIELDS (record) = f_gpr;
5035 TREE_CHAIN (f_gpr) = f_fpr;
5036 TREE_CHAIN (f_fpr) = f_ovf;
5037 TREE_CHAIN (f_ovf) = f_sav;
5039 layout_type (record);
5041 /* The correct type is an array type of one element. */
5042 return build_array_type (record, build_index_type (size_zero_node));
5045 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5048 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5058 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5061 /* Indicate to allocate space on the stack for varargs save area. */
5062 ix86_save_varrargs_registers = 1;
5063 /* We need 16-byte stack alignment to save SSE registers. If user
5064 asked for lower preferred_stack_boundary, lets just hope that he knows
5065 what he is doing and won't varargs SSE values.
5067 We also may end up assuming that only 64bit values are stored in SSE
5068 register let some floating point program work. */
5069 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5070 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5072 save_area = frame_pointer_rtx;
5073 set = get_varargs_alias_set ();
5075 for (i = cum->regno;
5077 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5080 mem = gen_rtx_MEM (Pmode,
5081 plus_constant (save_area, i * UNITS_PER_WORD));
5082 MEM_NOTRAP_P (mem) = 1;
5083 set_mem_alias_set (mem, set);
5084 emit_move_insn (mem, gen_rtx_REG (Pmode,
5085 x86_64_int_parameter_registers[i]));
5088 if (cum->sse_nregs && cfun->va_list_fpr_size)
5090 /* Now emit code to save SSE registers. The AX parameter contains number
5091 of SSE parameter registers used to call this function. We use
5092 sse_prologue_save insn template that produces computed jump across
5093 SSE saves. We need some preparation work to get this working. */
5095 label = gen_label_rtx ();
5096 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5098 /* Compute address to jump to :
5099 label - 5*eax + nnamed_sse_arguments*5 */
5100 tmp_reg = gen_reg_rtx (Pmode);
5101 nsse_reg = gen_reg_rtx (Pmode);
5102 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5103 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5104 gen_rtx_MULT (Pmode, nsse_reg,
5109 gen_rtx_CONST (DImode,
5110 gen_rtx_PLUS (DImode,
5112 GEN_INT (cum->sse_regno * 4))));
5114 emit_move_insn (nsse_reg, label_ref);
5115 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5117 /* Compute address of memory block we save into. We always use pointer
5118 pointing 127 bytes after first byte to store - this is needed to keep
5119 instruction size limited by 4 bytes. */
5120 tmp_reg = gen_reg_rtx (Pmode);
5121 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5122 plus_constant (save_area,
5123 8 * REGPARM_MAX + 127)));
5124 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5125 MEM_NOTRAP_P (mem) = 1;
5126 set_mem_alias_set (mem, set);
5127 set_mem_align (mem, BITS_PER_WORD);
5129 /* And finally do the dirty job! */
5130 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5131 GEN_INT (cum->sse_regno), label));
5136 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5138 alias_set_type set = get_varargs_alias_set ();
5141 for (i = cum->regno; i < REGPARM_MAX; i++)
5145 mem = gen_rtx_MEM (Pmode,
5146 plus_constant (virtual_incoming_args_rtx,
5147 i * UNITS_PER_WORD));
5148 MEM_NOTRAP_P (mem) = 1;
5149 set_mem_alias_set (mem, set);
5151 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5152 emit_move_insn (mem, reg);
5157 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5158 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5161 CUMULATIVE_ARGS next_cum;
5164 /* This argument doesn't appear to be used anymore. Which is good,
5165 because the old code here didn't suppress rtl generation. */
5166 gcc_assert (!no_rtl);
5171 fntype = TREE_TYPE (current_function_decl);
5173 /* For varargs, we do not want to skip the dummy va_dcl argument.
5174 For stdargs, we do want to skip the last named argument. */
5176 if (stdarg_p (fntype))
5177 function_arg_advance (&next_cum, mode, type, 1);
5179 if (TARGET_64BIT_MS_ABI)
5180 setup_incoming_varargs_ms_64 (&next_cum);
5182 setup_incoming_varargs_64 (&next_cum);
5185 /* Implement va_start. */
5188 ix86_va_start (tree valist, rtx nextarg)
5190 HOST_WIDE_INT words, n_gpr, n_fpr;
5191 tree f_gpr, f_fpr, f_ovf, f_sav;
5192 tree gpr, fpr, ovf, sav, t;
5195 /* Only 64bit target needs something special. */
5196 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5198 std_expand_builtin_va_start (valist, nextarg);
5202 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5203 f_fpr = TREE_CHAIN (f_gpr);
5204 f_ovf = TREE_CHAIN (f_fpr);
5205 f_sav = TREE_CHAIN (f_ovf);
5207 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5208 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5209 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5210 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5211 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5213 /* Count number of gp and fp argument registers used. */
5214 words = crtl->args.info.words;
5215 n_gpr = crtl->args.info.regno;
5216 n_fpr = crtl->args.info.sse_regno;
5218 if (cfun->va_list_gpr_size)
5220 type = TREE_TYPE (gpr);
5221 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5222 build_int_cst (type, n_gpr * 8));
5223 TREE_SIDE_EFFECTS (t) = 1;
5224 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5227 if (cfun->va_list_fpr_size)
5229 type = TREE_TYPE (fpr);
5230 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5231 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5232 TREE_SIDE_EFFECTS (t) = 1;
5233 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5236 /* Find the overflow area. */
5237 type = TREE_TYPE (ovf);
5238 t = make_tree (type, virtual_incoming_args_rtx);
5240 t = build2 (POINTER_PLUS_EXPR, type, t,
5241 size_int (words * UNITS_PER_WORD));
5242 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5243 TREE_SIDE_EFFECTS (t) = 1;
5244 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5248 /* Find the register save area.
5249 Prologue of the function save it right above stack frame. */
5250 type = TREE_TYPE (sav);
5251 t = make_tree (type, frame_pointer_rtx);
5252 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5253 TREE_SIDE_EFFECTS (t) = 1;
5254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5258 /* Implement va_arg. */
5261 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5263 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5264 tree f_gpr, f_fpr, f_ovf, f_sav;
5265 tree gpr, fpr, ovf, sav, t;
5267 tree lab_false, lab_over = NULL_TREE;
5272 enum machine_mode nat_mode;
5274 /* Only 64bit target needs something special. */
5275 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5276 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5278 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5279 f_fpr = TREE_CHAIN (f_gpr);
5280 f_ovf = TREE_CHAIN (f_fpr);
5281 f_sav = TREE_CHAIN (f_ovf);
5283 valist = build_va_arg_indirect_ref (valist);
5284 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5285 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5286 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5287 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5289 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5291 type = build_pointer_type (type);
5292 size = int_size_in_bytes (type);
5293 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5295 nat_mode = type_natural_mode (type);
5296 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5297 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5299 /* Pull the value out of the saved registers. */
5301 addr = create_tmp_var (ptr_type_node, "addr");
5302 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5306 int needed_intregs, needed_sseregs;
5308 tree int_addr, sse_addr;
5310 lab_false = create_artificial_label ();
5311 lab_over = create_artificial_label ();
5313 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5315 need_temp = (!REG_P (container)
5316 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5317 || TYPE_ALIGN (type) > 128));
5319 /* In case we are passing structure, verify that it is consecutive block
5320 on the register save area. If not we need to do moves. */
5321 if (!need_temp && !REG_P (container))
5323 /* Verify that all registers are strictly consecutive */
5324 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5328 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5330 rtx slot = XVECEXP (container, 0, i);
5331 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5332 || INTVAL (XEXP (slot, 1)) != i * 16)
5340 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5342 rtx slot = XVECEXP (container, 0, i);
5343 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5344 || INTVAL (XEXP (slot, 1)) != i * 8)
5356 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5357 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5358 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5359 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5362 /* First ensure that we fit completely in registers. */
5365 t = build_int_cst (TREE_TYPE (gpr),
5366 (REGPARM_MAX - needed_intregs + 1) * 8);
5367 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5368 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5369 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5370 gimplify_and_add (t, pre_p);
5374 t = build_int_cst (TREE_TYPE (fpr),
5375 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5377 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5378 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5379 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5380 gimplify_and_add (t, pre_p);
5383 /* Compute index to start of area used for integer regs. */
5386 /* int_addr = gpr + sav; */
5387 t = fold_convert (sizetype, gpr);
5388 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5389 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5390 gimplify_and_add (t, pre_p);
5394 /* sse_addr = fpr + sav; */
5395 t = fold_convert (sizetype, fpr);
5396 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5397 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5398 gimplify_and_add (t, pre_p);
5403 tree temp = create_tmp_var (type, "va_arg_tmp");
5406 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5407 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5408 gimplify_and_add (t, pre_p);
5410 for (i = 0; i < XVECLEN (container, 0); i++)
5412 rtx slot = XVECEXP (container, 0, i);
5413 rtx reg = XEXP (slot, 0);
5414 enum machine_mode mode = GET_MODE (reg);
5415 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5416 tree addr_type = build_pointer_type (piece_type);
5419 tree dest_addr, dest;
5421 if (SSE_REGNO_P (REGNO (reg)))
5423 src_addr = sse_addr;
5424 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5428 src_addr = int_addr;
5429 src_offset = REGNO (reg) * 8;
5431 src_addr = fold_convert (addr_type, src_addr);
5432 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5433 size_int (src_offset));
5434 src = build_va_arg_indirect_ref (src_addr);
5436 dest_addr = fold_convert (addr_type, addr);
5437 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5438 size_int (INTVAL (XEXP (slot, 1))));
5439 dest = build_va_arg_indirect_ref (dest_addr);
5441 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5442 gimplify_and_add (t, pre_p);
5448 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5449 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5450 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5451 gimplify_and_add (t, pre_p);
5455 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5456 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5457 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5458 gimplify_and_add (t, pre_p);
5461 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5462 gimplify_and_add (t, pre_p);
5464 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5465 append_to_statement_list (t, pre_p);
5468 /* ... otherwise out of the overflow area. */
5470 /* Care for on-stack alignment if needed. */
5471 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5472 || integer_zerop (TYPE_SIZE (type)))
5476 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5477 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5478 size_int (align - 1));
5479 t = fold_convert (sizetype, t);
5480 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5482 t = fold_convert (TREE_TYPE (ovf), t);
5484 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5486 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5487 gimplify_and_add (t2, pre_p);
5489 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5490 size_int (rsize * UNITS_PER_WORD));
5491 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5492 gimplify_and_add (t, pre_p);
5496 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5497 append_to_statement_list (t, pre_p);
5500 ptrtype = build_pointer_type (type);
5501 addr = fold_convert (ptrtype, addr);
5504 addr = build_va_arg_indirect_ref (addr);
5505 return build_va_arg_indirect_ref (addr);
5508 /* Return nonzero if OPNUM's MEM should be matched
5509 in movabs* patterns. */
5512 ix86_check_movabs (rtx insn, int opnum)
5516 set = PATTERN (insn);
5517 if (GET_CODE (set) == PARALLEL)
5518 set = XVECEXP (set, 0, 0);
5519 gcc_assert (GET_CODE (set) == SET);
5520 mem = XEXP (set, opnum);
5521 while (GET_CODE (mem) == SUBREG)
5522 mem = SUBREG_REG (mem);
5523 gcc_assert (MEM_P (mem));
5524 return (volatile_ok || !MEM_VOLATILE_P (mem));
5527 /* Initialize the table of extra 80387 mathematical constants. */
5530 init_ext_80387_constants (void)
5532 static const char * cst[5] =
5534 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5535 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5536 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5537 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5538 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5542 for (i = 0; i < 5; i++)
5544 real_from_string (&ext_80387_constants_table[i], cst[i]);
5545 /* Ensure each constant is rounded to XFmode precision. */
5546 real_convert (&ext_80387_constants_table[i],
5547 XFmode, &ext_80387_constants_table[i]);
5550 ext_80387_constants_init = 1;
5553 /* Return true if the constant is something that can be loaded with
5554 a special instruction. */
5557 standard_80387_constant_p (rtx x)
5559 enum machine_mode mode = GET_MODE (x);
5563 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5566 if (x == CONST0_RTX (mode))
5568 if (x == CONST1_RTX (mode))
5571 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5573 /* For XFmode constants, try to find a special 80387 instruction when
5574 optimizing for size or on those CPUs that benefit from them. */
5576 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5580 if (! ext_80387_constants_init)
5581 init_ext_80387_constants ();
5583 for (i = 0; i < 5; i++)
5584 if (real_identical (&r, &ext_80387_constants_table[i]))
5588 /* Load of the constant -0.0 or -1.0 will be split as
5589 fldz;fchs or fld1;fchs sequence. */
5590 if (real_isnegzero (&r))
5592 if (real_identical (&r, &dconstm1))
5598 /* Return the opcode of the special instruction to be used to load
5602 standard_80387_constant_opcode (rtx x)
5604 switch (standard_80387_constant_p (x))
5628 /* Return the CONST_DOUBLE representing the 80387 constant that is
5629 loaded by the specified special instruction. The argument IDX
5630 matches the return value from standard_80387_constant_p. */
5633 standard_80387_constant_rtx (int idx)
5637 if (! ext_80387_constants_init)
5638 init_ext_80387_constants ();
5654 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5658 /* Return 1 if mode is a valid mode for sse. */
5660 standard_sse_mode_p (enum machine_mode mode)
5677 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5680 standard_sse_constant_p (rtx x)
5682 enum machine_mode mode = GET_MODE (x);
5684 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5686 if (vector_all_ones_operand (x, mode)
5687 && standard_sse_mode_p (mode))
5688 return TARGET_SSE2 ? 2 : -1;
5693 /* Return the opcode of the special instruction to be used to load
5697 standard_sse_constant_opcode (rtx insn, rtx x)
5699 switch (standard_sse_constant_p (x))
5702 if (get_attr_mode (insn) == MODE_V4SF)
5703 return "xorps\t%0, %0";
5704 else if (get_attr_mode (insn) == MODE_V2DF)
5705 return "xorpd\t%0, %0";
5707 return "pxor\t%0, %0";
5709 return "pcmpeqd\t%0, %0";
5714 /* Returns 1 if OP contains a symbol reference */
5717 symbolic_reference_mentioned_p (rtx op)
5722 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5725 fmt = GET_RTX_FORMAT (GET_CODE (op));
5726 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5732 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5733 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5737 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5744 /* Return 1 if it is appropriate to emit `ret' instructions in the
5745 body of a function. Do this only if the epilogue is simple, needing a
5746 couple of insns. Prior to reloading, we can't tell how many registers
5747 must be saved, so return 0 then. Return 0 if there is no frame
5748 marker to de-allocate. */
5751 ix86_can_use_return_insn_p (void)
5753 struct ix86_frame frame;
5755 if (! reload_completed || frame_pointer_needed)
5758 /* Don't allow more than 32 pop, since that's all we can do
5759 with one instruction. */
5760 if (crtl->args.pops_args
5761 && crtl->args.size >= 32768)
5764 ix86_compute_frame_layout (&frame);
5765 return frame.to_allocate == 0 && frame.nregs == 0;
5768 /* Value should be nonzero if functions must have frame pointers.
5769 Zero means the frame pointer need not be set up (and parms may
5770 be accessed via the stack pointer) in functions that seem suitable. */
5773 ix86_frame_pointer_required (void)
5775 /* If we accessed previous frames, then the generated code expects
5776 to be able to access the saved ebp value in our frame. */
5777 if (cfun->machine->accesses_prev_frame)
5780 /* Several x86 os'es need a frame pointer for other reasons,
5781 usually pertaining to setjmp. */
5782 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5785 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5786 the frame pointer by default. Turn it back on now if we've not
5787 got a leaf function. */
5788 if (TARGET_OMIT_LEAF_FRAME_POINTER
5789 && (!current_function_is_leaf
5790 || ix86_current_function_calls_tls_descriptor))
5799 /* Record that the current function accesses previous call frames. */
5802 ix86_setup_frame_addresses (void)
5804 cfun->machine->accesses_prev_frame = 1;
5807 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5808 # define USE_HIDDEN_LINKONCE 1
5810 # define USE_HIDDEN_LINKONCE 0
5813 static int pic_labels_used;
5815 /* Fills in the label name that should be used for a pc thunk for
5816 the given register. */
5819 get_pc_thunk_name (char name[32], unsigned int regno)
5821 gcc_assert (!TARGET_64BIT);
5823 if (USE_HIDDEN_LINKONCE)
5824 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5826 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5830 /* This function generates code for -fpic that loads %ebx with
5831 the return address of the caller and then returns. */
5834 ix86_file_end (void)
5839 for (regno = 0; regno < 8; ++regno)
5843 if (! ((pic_labels_used >> regno) & 1))
5846 get_pc_thunk_name (name, regno);
5851 switch_to_section (darwin_sections[text_coal_section]);
5852 fputs ("\t.weak_definition\t", asm_out_file);
5853 assemble_name (asm_out_file, name);
5854 fputs ("\n\t.private_extern\t", asm_out_file);
5855 assemble_name (asm_out_file, name);
5856 fputs ("\n", asm_out_file);
5857 ASM_OUTPUT_LABEL (asm_out_file, name);
5861 if (USE_HIDDEN_LINKONCE)
5865 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5867 TREE_PUBLIC (decl) = 1;
5868 TREE_STATIC (decl) = 1;
5869 DECL_ONE_ONLY (decl) = 1;
5871 (*targetm.asm_out.unique_section) (decl, 0);
5872 switch_to_section (get_named_section (decl, NULL, 0));
5874 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5875 fputs ("\t.hidden\t", asm_out_file);
5876 assemble_name (asm_out_file, name);
5877 fputc ('\n', asm_out_file);
5878 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5882 switch_to_section (text_section);
5883 ASM_OUTPUT_LABEL (asm_out_file, name);
5886 xops[0] = gen_rtx_REG (Pmode, regno);
5887 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5889 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
5891 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5892 output_asm_insn ("ret", xops);
5895 if (NEED_INDICATE_EXEC_STACK)
5896 file_end_indicate_exec_stack ();
5899 /* Emit code for the SET_GOT patterns. */
5902 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5908 if (TARGET_VXWORKS_RTP && flag_pic)
5910 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5911 xops[2] = gen_rtx_MEM (Pmode,
5912 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5913 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5915 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5916 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5917 an unadorned address. */
5918 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5919 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5920 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5924 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5926 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5928 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5933 output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
5935 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5938 output_asm_insn ("call\t%a2", xops);
5941 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5942 is what will be referenced by the Mach-O PIC subsystem. */
5944 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5947 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5948 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5953 output_asm_insn ("pop{q}\t%0", xops);
5955 output_asm_insn ("pop{l}\t%0", xops);
5961 get_pc_thunk_name (name, REGNO (dest));
5962 pic_labels_used |= 1 << REGNO (dest);
5964 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5965 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5966 output_asm_insn ("call\t%X2", xops);
5967 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5968 is what will be referenced by the Mach-O PIC subsystem. */
5971 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5973 targetm.asm_out.internal_label (asm_out_file, "L",
5974 CODE_LABEL_NUMBER (label));
5981 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5984 output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
5986 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5991 output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5993 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5999 /* Generate an "push" pattern for input ARG. */
6004 return gen_rtx_SET (VOIDmode,
6006 gen_rtx_PRE_DEC (Pmode,
6007 stack_pointer_rtx)),
6011 /* Return >= 0 if there is an unused call-clobbered register available
6012 for the entire function. */
6015 ix86_select_alt_pic_regnum (void)
6017 if (current_function_is_leaf && !crtl->profile
6018 && !ix86_current_function_calls_tls_descriptor)
6021 for (i = 2; i >= 0; --i)
6022 if (!df_regs_ever_live_p (i))
6026 return INVALID_REGNUM;
6029 /* Return 1 if we need to save REGNO. */
6031 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6033 if (pic_offset_table_rtx
6034 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6035 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6037 || crtl->calls_eh_return
6038 || crtl->uses_const_pool))
6040 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6045 if (crtl->calls_eh_return && maybe_eh_return)
6050 unsigned test = EH_RETURN_DATA_REGNO (i);
6051 if (test == INVALID_REGNUM)
6058 if (cfun->machine->force_align_arg_pointer
6059 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6062 return (df_regs_ever_live_p (regno)
6063 && !call_used_regs[regno]
6064 && !fixed_regs[regno]
6065 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6068 /* Return number of registers to be saved on the stack. */
6071 ix86_nsaved_regs (void)
6076 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6077 if (ix86_save_reg (regno, true))
6082 /* Return the offset between two registers, one to be eliminated, and the other
6083 its replacement, at the start of a routine. */
6086 ix86_initial_elimination_offset (int from, int to)
6088 struct ix86_frame frame;
6089 ix86_compute_frame_layout (&frame);
6091 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6092 return frame.hard_frame_pointer_offset;
6093 else if (from == FRAME_POINTER_REGNUM
6094 && to == HARD_FRAME_POINTER_REGNUM)
6095 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6098 gcc_assert (to == STACK_POINTER_REGNUM);
6100 if (from == ARG_POINTER_REGNUM)
6101 return frame.stack_pointer_offset;
6103 gcc_assert (from == FRAME_POINTER_REGNUM);
6104 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6108 /* Fill structure ix86_frame about frame of currently computed function. */
6111 ix86_compute_frame_layout (struct ix86_frame *frame)
6113 HOST_WIDE_INT total_size;
6114 unsigned int stack_alignment_needed;
6115 HOST_WIDE_INT offset;
6116 unsigned int preferred_alignment;
6117 HOST_WIDE_INT size = get_frame_size ();
6119 frame->nregs = ix86_nsaved_regs ();
6122 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6123 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6125 /* During reload iteration the amount of registers saved can change.
6126 Recompute the value as needed. Do not recompute when amount of registers
6127 didn't change as reload does multiple calls to the function and does not
6128 expect the decision to change within single iteration. */
6130 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6132 int count = frame->nregs;
6134 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6135 /* The fast prologue uses move instead of push to save registers. This
6136 is significantly longer, but also executes faster as modern hardware
6137 can execute the moves in parallel, but can't do that for push/pop.
6139 Be careful about choosing what prologue to emit: When function takes
6140 many instructions to execute we may use slow version as well as in
6141 case function is known to be outside hot spot (this is known with
6142 feedback only). Weight the size of function by number of registers
6143 to save as it is cheap to use one or two push instructions but very
6144 slow to use many of them. */
6146 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6147 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6148 || (flag_branch_probabilities
6149 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6150 cfun->machine->use_fast_prologue_epilogue = false;
6152 cfun->machine->use_fast_prologue_epilogue
6153 = !expensive_function_p (count);
6155 if (TARGET_PROLOGUE_USING_MOVE
6156 && cfun->machine->use_fast_prologue_epilogue)
6157 frame->save_regs_using_mov = true;
6159 frame->save_regs_using_mov = false;
6162 /* Skip return address and saved base pointer. */
6163 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6165 frame->hard_frame_pointer_offset = offset;
6167 /* Do some sanity checking of stack_alignment_needed and
6168 preferred_alignment, since i386 port is the only using those features
6169 that may break easily. */
6171 gcc_assert (!size || stack_alignment_needed);
6172 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6173 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6174 gcc_assert (stack_alignment_needed
6175 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6177 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6178 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6180 /* Register save area */
6181 offset += frame->nregs * UNITS_PER_WORD;
6184 if (ix86_save_varrargs_registers)
6186 offset += X86_64_VARARGS_SIZE;
6187 frame->va_arg_size = X86_64_VARARGS_SIZE;
6190 frame->va_arg_size = 0;
6192 /* Align start of frame for local function. */
6193 frame->padding1 = ((offset + stack_alignment_needed - 1)
6194 & -stack_alignment_needed) - offset;
6196 offset += frame->padding1;
6198 /* Frame pointer points here. */
6199 frame->frame_pointer_offset = offset;
6203 /* Add outgoing arguments area. Can be skipped if we eliminated
6204 all the function calls as dead code.
6205 Skipping is however impossible when function calls alloca. Alloca
6206 expander assumes that last crtl->outgoing_args_size
6207 of stack frame are unused. */
6208 if (ACCUMULATE_OUTGOING_ARGS
6209 && (!current_function_is_leaf || cfun->calls_alloca
6210 || ix86_current_function_calls_tls_descriptor))
6212 offset += crtl->outgoing_args_size;
6213 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6216 frame->outgoing_arguments_size = 0;
6218 /* Align stack boundary. Only needed if we're calling another function
6220 if (!current_function_is_leaf || cfun->calls_alloca
6221 || ix86_current_function_calls_tls_descriptor)
6222 frame->padding2 = ((offset + preferred_alignment - 1)
6223 & -preferred_alignment) - offset;
6225 frame->padding2 = 0;
6227 offset += frame->padding2;
6229 /* We've reached end of stack frame. */
6230 frame->stack_pointer_offset = offset;
6232 /* Size prologue needs to allocate. */
6233 frame->to_allocate =
6234 (size + frame->padding1 + frame->padding2
6235 + frame->outgoing_arguments_size + frame->va_arg_size);
6237 if ((!frame->to_allocate && frame->nregs <= 1)
6238 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6239 frame->save_regs_using_mov = false;
6241 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6242 && current_function_is_leaf
6243 && !ix86_current_function_calls_tls_descriptor)
6245 frame->red_zone_size = frame->to_allocate;
6246 if (frame->save_regs_using_mov)
6247 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6248 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6249 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6252 frame->red_zone_size = 0;
6253 frame->to_allocate -= frame->red_zone_size;
6254 frame->stack_pointer_offset -= frame->red_zone_size;
6256 fprintf (stderr, "\n");
6257 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6258 fprintf (stderr, "size: %ld\n", (long)size);
6259 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6260 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6261 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6262 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6263 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6264 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6265 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6266 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6267 (long)frame->hard_frame_pointer_offset);
6268 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6269 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6270 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6271 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6275 /* Emit code to save registers in the prologue. */
6278 ix86_emit_save_regs (void)
6283 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6284 if (ix86_save_reg (regno, true))
6286 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6287 RTX_FRAME_RELATED_P (insn) = 1;
6291 /* Emit code to save registers using MOV insns. First register
6292 is restored from POINTER + OFFSET. */
6294 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6299 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6300 if (ix86_save_reg (regno, true))
6302 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6304 gen_rtx_REG (Pmode, regno));
6305 RTX_FRAME_RELATED_P (insn) = 1;
6306 offset += UNITS_PER_WORD;
6310 /* Expand prologue or epilogue stack adjustment.
6311 The pattern exist to put a dependency on all ebp-based memory accesses.
6312 STYLE should be negative if instructions should be marked as frame related,
6313 zero if %r11 register is live and cannot be freely used and positive
6317 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6322 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6323 else if (x86_64_immediate_operand (offset, DImode))
6324 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6328 /* r11 is used by indirect sibcall return as well, set before the
6329 epilogue and used after the epilogue. ATM indirect sibcall
6330 shouldn't be used together with huge frame sizes in one
6331 function because of the frame_size check in sibcall.c. */
6333 r11 = gen_rtx_REG (DImode, R11_REG);
6334 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6336 RTX_FRAME_RELATED_P (insn) = 1;
6337 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6341 RTX_FRAME_RELATED_P (insn) = 1;
6344 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6347 ix86_internal_arg_pointer (void)
6349 bool has_force_align_arg_pointer =
6350 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6351 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6352 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6353 && DECL_NAME (current_function_decl)
6354 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6355 && DECL_FILE_SCOPE_P (current_function_decl))
6356 || ix86_force_align_arg_pointer
6357 || has_force_align_arg_pointer)
6359 /* Nested functions can't realign the stack due to a register
6361 if (DECL_CONTEXT (current_function_decl)
6362 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6364 if (ix86_force_align_arg_pointer)
6365 warning (0, "-mstackrealign ignored for nested functions");
6366 if (has_force_align_arg_pointer)
6367 error ("%s not supported for nested functions",
6368 ix86_force_align_arg_pointer_string);
6369 return virtual_incoming_args_rtx;
6371 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6372 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6375 return virtual_incoming_args_rtx;
6378 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6379 This is called from dwarf2out.c to emit call frame instructions
6380 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6382 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6384 rtx unspec = SET_SRC (pattern);
6385 gcc_assert (GET_CODE (unspec) == UNSPEC);
6389 case UNSPEC_REG_SAVE:
6390 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6391 SET_DEST (pattern));
6393 case UNSPEC_DEF_CFA:
6394 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6395 INTVAL (XVECEXP (unspec, 0, 0)));
6402 /* Expand the prologue into a bunch of separate insns. */
6405 ix86_expand_prologue (void)
6409 struct ix86_frame frame;
6410 HOST_WIDE_INT allocate;
6412 ix86_compute_frame_layout (&frame);
6414 if (cfun->machine->force_align_arg_pointer)
6418 /* Grab the argument pointer. */
6419 x = plus_constant (stack_pointer_rtx, 4);
6420 y = cfun->machine->force_align_arg_pointer;
6421 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6422 RTX_FRAME_RELATED_P (insn) = 1;
6424 /* The unwind info consists of two parts: install the fafp as the cfa,
6425 and record the fafp as the "save register" of the stack pointer.
6426 The later is there in order that the unwinder can see where it
6427 should restore the stack pointer across the and insn. */
6428 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6429 x = gen_rtx_SET (VOIDmode, y, x);
6430 RTX_FRAME_RELATED_P (x) = 1;
6431 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6433 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6434 RTX_FRAME_RELATED_P (y) = 1;
6435 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6436 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6437 REG_NOTES (insn) = x;
6439 /* Align the stack. */
6440 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6443 /* And here we cheat like madmen with the unwind info. We force the
6444 cfa register back to sp+4, which is exactly what it was at the
6445 start of the function. Re-pushing the return address results in
6446 the return at the same spot relative to the cfa, and thus is
6447 correct wrt the unwind info. */
6448 x = cfun->machine->force_align_arg_pointer;
6449 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6450 insn = emit_insn (gen_push (x));
6451 RTX_FRAME_RELATED_P (insn) = 1;
6454 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6455 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6456 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6457 REG_NOTES (insn) = x;
6460 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6461 slower on all targets. Also sdb doesn't like it. */
6463 if (frame_pointer_needed)
6465 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6466 RTX_FRAME_RELATED_P (insn) = 1;
6468 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6469 RTX_FRAME_RELATED_P (insn) = 1;
6472 allocate = frame.to_allocate;
6474 if (!frame.save_regs_using_mov)
6475 ix86_emit_save_regs ();
6477 allocate += frame.nregs * UNITS_PER_WORD;
6479 /* When using red zone we may start register saving before allocating
6480 the stack frame saving one cycle of the prologue. However I will
6481 avoid doing this if I am going to have to probe the stack since
6482 at least on x86_64 the stack probe can turn into a call that clobbers
6483 a red zone location */
6484 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6485 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6486 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6487 : stack_pointer_rtx,
6488 -frame.nregs * UNITS_PER_WORD);
6492 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6493 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6494 GEN_INT (-allocate), -1);
6497 /* Only valid for Win32. */
6498 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6502 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6504 if (TARGET_64BIT_MS_ABI)
6507 eax_live = ix86_eax_live_at_start_p ();
6511 emit_insn (gen_push (eax));
6512 allocate -= UNITS_PER_WORD;
6515 emit_move_insn (eax, GEN_INT (allocate));
6518 insn = gen_allocate_stack_worker_64 (eax);
6520 insn = gen_allocate_stack_worker_32 (eax);
6521 insn = emit_insn (insn);
6522 RTX_FRAME_RELATED_P (insn) = 1;
6523 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6524 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6525 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6526 t, REG_NOTES (insn));
6530 if (frame_pointer_needed)
6531 t = plus_constant (hard_frame_pointer_rtx,
6534 - frame.nregs * UNITS_PER_WORD);
6536 t = plus_constant (stack_pointer_rtx, allocate);
6537 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6541 if (frame.save_regs_using_mov
6542 && !(TARGET_RED_ZONE
6543 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6545 if (!frame_pointer_needed || !frame.to_allocate)
6546 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6548 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6549 -frame.nregs * UNITS_PER_WORD);
6552 pic_reg_used = false;
6553 if (pic_offset_table_rtx
6554 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6557 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6559 if (alt_pic_reg_used != INVALID_REGNUM)
6560 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6562 pic_reg_used = true;
6569 if (ix86_cmodel == CM_LARGE_PIC)
6571 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6572 rtx label = gen_label_rtx ();
6574 LABEL_PRESERVE_P (label) = 1;
6575 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6576 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6577 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6578 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6579 pic_offset_table_rtx, tmp_reg));
6582 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6585 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6588 /* Prevent function calls from being scheduled before the call to mcount.
6589 In the pic_reg_used case, make sure that the got load isn't deleted. */
6593 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6594 emit_insn (gen_blockage ());
6598 /* Emit code to restore saved registers using MOV insns. First register
6599 is restored from POINTER + OFFSET. */
6601 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6602 int maybe_eh_return)
6605 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6607 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6608 if (ix86_save_reg (regno, maybe_eh_return))
6610 /* Ensure that adjust_address won't be forced to produce pointer
6611 out of range allowed by x86-64 instruction set. */
6612 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6616 r11 = gen_rtx_REG (DImode, R11_REG);
6617 emit_move_insn (r11, GEN_INT (offset));
6618 emit_insn (gen_adddi3 (r11, r11, pointer));
6619 base_address = gen_rtx_MEM (Pmode, r11);
6622 emit_move_insn (gen_rtx_REG (Pmode, regno),
6623 adjust_address (base_address, Pmode, offset));
6624 offset += UNITS_PER_WORD;
6628 /* Restore function stack, frame, and registers. */
6631 ix86_expand_epilogue (int style)
6634 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6635 struct ix86_frame frame;
6636 HOST_WIDE_INT offset;
6638 ix86_compute_frame_layout (&frame);
6640 /* Calculate start of saved registers relative to ebp. Special care
6641 must be taken for the normal return case of a function using
6642 eh_return: the eax and edx registers are marked as saved, but not
6643 restored along this path. */
6644 offset = frame.nregs;
6645 if (crtl->calls_eh_return && style != 2)
6647 offset *= -UNITS_PER_WORD;
6649 /* If we're only restoring one register and sp is not valid then
6650 using a move instruction to restore the register since it's
6651 less work than reloading sp and popping the register.
6653 The default code result in stack adjustment using add/lea instruction,
6654 while this code results in LEAVE instruction (or discrete equivalent),
6655 so it is profitable in some other cases as well. Especially when there
6656 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6657 and there is exactly one register to pop. This heuristic may need some
6658 tuning in future. */
6659 if ((!sp_valid && frame.nregs <= 1)
6660 || (TARGET_EPILOGUE_USING_MOVE
6661 && cfun->machine->use_fast_prologue_epilogue
6662 && (frame.nregs > 1 || frame.to_allocate))
6663 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6664 || (frame_pointer_needed && TARGET_USE_LEAVE
6665 && cfun->machine->use_fast_prologue_epilogue
6666 && frame.nregs == 1)
6667 || crtl->calls_eh_return)
6669 /* Restore registers. We can use ebp or esp to address the memory
6670 locations. If both are available, default to ebp, since offsets
6671 are known to be small. Only exception is esp pointing directly to the
6672 end of block of saved registers, where we may simplify addressing
6675 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6676 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6677 frame.to_allocate, style == 2);
6679 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6680 offset, style == 2);
6682 /* eh_return epilogues need %ecx added to the stack pointer. */
6685 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6687 if (frame_pointer_needed)
6689 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6690 tmp = plus_constant (tmp, UNITS_PER_WORD);
6691 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6693 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6694 emit_move_insn (hard_frame_pointer_rtx, tmp);
6696 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6701 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6702 tmp = plus_constant (tmp, (frame.to_allocate
6703 + frame.nregs * UNITS_PER_WORD));
6704 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6707 else if (!frame_pointer_needed)
6708 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6709 GEN_INT (frame.to_allocate
6710 + frame.nregs * UNITS_PER_WORD),
6712 /* If not an i386, mov & pop is faster than "leave". */
6713 else if (TARGET_USE_LEAVE || optimize_size
6714 || !cfun->machine->use_fast_prologue_epilogue)
6715 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6718 pro_epilogue_adjust_stack (stack_pointer_rtx,
6719 hard_frame_pointer_rtx,
6722 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6724 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6729 /* First step is to deallocate the stack frame so that we can
6730 pop the registers. */
6733 gcc_assert (frame_pointer_needed);
6734 pro_epilogue_adjust_stack (stack_pointer_rtx,
6735 hard_frame_pointer_rtx,
6736 GEN_INT (offset), style);
6738 else if (frame.to_allocate)
6739 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6740 GEN_INT (frame.to_allocate), style);
6742 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6743 if (ix86_save_reg (regno, false))
6746 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6748 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6750 if (frame_pointer_needed)
6752 /* Leave results in shorter dependency chains on CPUs that are
6753 able to grok it fast. */
6754 if (TARGET_USE_LEAVE)
6755 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6756 else if (TARGET_64BIT)
6757 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6759 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6763 if (cfun->machine->force_align_arg_pointer)
6765 emit_insn (gen_addsi3 (stack_pointer_rtx,
6766 cfun->machine->force_align_arg_pointer,
6770 /* Sibcall epilogues don't want a return instruction. */
6774 if (crtl->args.pops_args && crtl->args.size)
6776 rtx popc = GEN_INT (crtl->args.pops_args);
6778 /* i386 can only pop 64K bytes. If asked to pop more, pop
6779 return address, do explicit add, and jump indirectly to the
6782 if (crtl->args.pops_args >= 65536)
6784 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6786 /* There is no "pascal" calling convention in any 64bit ABI. */
6787 gcc_assert (!TARGET_64BIT);
6789 emit_insn (gen_popsi1 (ecx));
6790 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6791 emit_jump_insn (gen_return_indirect_internal (ecx));
6794 emit_jump_insn (gen_return_pop_internal (popc));
6797 emit_jump_insn (gen_return_internal ());
6800 /* Reset from the function's potential modifications. */
6803 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6804 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6806 if (pic_offset_table_rtx)
6807 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6809 /* Mach-O doesn't support labels at the end of objects, so if
6810 it looks like we might want one, insert a NOP. */
6812 rtx insn = get_last_insn ();
6815 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6816 insn = PREV_INSN (insn);
6820 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6821 fputs ("\tnop\n", file);
6827 /* Extract the parts of an RTL expression that is a valid memory address
6828 for an instruction. Return 0 if the structure of the address is
6829 grossly off. Return -1 if the address contains ASHIFT, so it is not
6830 strictly valid, but still used for computing length of lea instruction. */
6833 ix86_decompose_address (rtx addr, struct ix86_address *out)
6835 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6836 rtx base_reg, index_reg;
6837 HOST_WIDE_INT scale = 1;
6838 rtx scale_rtx = NULL_RTX;
6840 enum ix86_address_seg seg = SEG_DEFAULT;
6842 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6844 else if (GET_CODE (addr) == PLUS)
6854 addends[n++] = XEXP (op, 1);
6857 while (GET_CODE (op) == PLUS);
6862 for (i = n; i >= 0; --i)
6865 switch (GET_CODE (op))
6870 index = XEXP (op, 0);
6871 scale_rtx = XEXP (op, 1);
6875 if (XINT (op, 1) == UNSPEC_TP
6876 && TARGET_TLS_DIRECT_SEG_REFS
6877 && seg == SEG_DEFAULT)
6878 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6907 else if (GET_CODE (addr) == MULT)
6909 index = XEXP (addr, 0); /* index*scale */
6910 scale_rtx = XEXP (addr, 1);
6912 else if (GET_CODE (addr) == ASHIFT)
6916 /* We're called for lea too, which implements ashift on occasion. */
6917 index = XEXP (addr, 0);
6918 tmp = XEXP (addr, 1);
6919 if (!CONST_INT_P (tmp))
6921 scale = INTVAL (tmp);
6922 if ((unsigned HOST_WIDE_INT) scale > 3)
6928 disp = addr; /* displacement */
6930 /* Extract the integral value of scale. */
6933 if (!CONST_INT_P (scale_rtx))
6935 scale = INTVAL (scale_rtx);
6938 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6939 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6941 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6942 if (base_reg && index_reg && scale == 1
6943 && (index_reg == arg_pointer_rtx
6944 || index_reg == frame_pointer_rtx
6945 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6948 tmp = base, base = index, index = tmp;
6949 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6952 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6953 if ((base_reg == hard_frame_pointer_rtx
6954 || base_reg == frame_pointer_rtx
6955 || base_reg == arg_pointer_rtx) && !disp)
6958 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6959 Avoid this by transforming to [%esi+0]. */
6960 if (TARGET_K6 && !optimize_size
6961 && base_reg && !index_reg && !disp
6963 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6966 /* Special case: encode reg+reg instead of reg*2. */
6967 if (!base && index && scale && scale == 2)
6968 base = index, base_reg = index_reg, scale = 1;
6970 /* Special case: scaling cannot be encoded without base or displacement. */
6971 if (!base && !disp && index && scale != 1)
6983 /* Return cost of the memory address x.
6984 For i386, it is better to use a complex address than let gcc copy
6985 the address into a reg and make a new pseudo. But not if the address
6986 requires to two regs - that would mean more pseudos with longer
6989 ix86_address_cost (rtx x)
6991 struct ix86_address parts;
6993 int ok = ix86_decompose_address (x, &parts);
6997 if (parts.base && GET_CODE (parts.base) == SUBREG)
6998 parts.base = SUBREG_REG (parts.base);
6999 if (parts.index && GET_CODE (parts.index) == SUBREG)
7000 parts.index = SUBREG_REG (parts.index);
7002 /* Attempt to minimize number of registers in the address. */
7004 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7006 && (!REG_P (parts.index)
7007 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7011 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7013 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7014 && parts.base != parts.index)
7017 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7018 since it's predecode logic can't detect the length of instructions
7019 and it degenerates to vector decoded. Increase cost of such
7020 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7021 to split such addresses or even refuse such addresses at all.
7023 Following addressing modes are affected:
7028 The first and last case may be avoidable by explicitly coding the zero in
7029 memory address, but I don't have AMD-K6 machine handy to check this
7033 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7034 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7035 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7041 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7042 this is used for to form addresses to local data when -fPIC is in
7046 darwin_local_data_pic (rtx disp)
7048 if (GET_CODE (disp) == MINUS)
7050 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7051 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7052 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7054 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7055 if (! strcmp (sym_name, "<pic base>"))
7063 /* Determine if a given RTX is a valid constant. We already know this
7064 satisfies CONSTANT_P. */
7067 legitimate_constant_p (rtx x)
7069 switch (GET_CODE (x))
7074 if (GET_CODE (x) == PLUS)
7076 if (!CONST_INT_P (XEXP (x, 1)))
7081 if (TARGET_MACHO && darwin_local_data_pic (x))
7084 /* Only some unspecs are valid as "constants". */
7085 if (GET_CODE (x) == UNSPEC)
7086 switch (XINT (x, 1))
7091 return TARGET_64BIT;
7094 x = XVECEXP (x, 0, 0);
7095 return (GET_CODE (x) == SYMBOL_REF
7096 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7098 x = XVECEXP (x, 0, 0);
7099 return (GET_CODE (x) == SYMBOL_REF
7100 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7105 /* We must have drilled down to a symbol. */
7106 if (GET_CODE (x) == LABEL_REF)
7108 if (GET_CODE (x) != SYMBOL_REF)
7113 /* TLS symbols are never valid. */
7114 if (SYMBOL_REF_TLS_MODEL (x))
7117 /* DLLIMPORT symbols are never valid. */
7118 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7119 && SYMBOL_REF_DLLIMPORT_P (x))
7124 if (GET_MODE (x) == TImode
7125 && x != CONST0_RTX (TImode)
7131 if (x == CONST0_RTX (GET_MODE (x)))
7139 /* Otherwise we handle everything else in the move patterns. */
7143 /* Determine if it's legal to put X into the constant pool. This
7144 is not possible for the address of thread-local symbols, which
7145 is checked above. */
7148 ix86_cannot_force_const_mem (rtx x)
7150 /* We can always put integral constants and vectors in memory. */
7151 switch (GET_CODE (x))
7161 return !legitimate_constant_p (x);
7164 /* Determine if a given RTX is a valid constant address. */
7167 constant_address_p (rtx x)
7169 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7172 /* Nonzero if the constant value X is a legitimate general operand
7173 when generating PIC code. It is given that flag_pic is on and
7174 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7177 legitimate_pic_operand_p (rtx x)
7181 switch (GET_CODE (x))
7184 inner = XEXP (x, 0);
7185 if (GET_CODE (inner) == PLUS
7186 && CONST_INT_P (XEXP (inner, 1)))
7187 inner = XEXP (inner, 0);
7189 /* Only some unspecs are valid as "constants". */
7190 if (GET_CODE (inner) == UNSPEC)
7191 switch (XINT (inner, 1))
7196 return TARGET_64BIT;
7198 x = XVECEXP (inner, 0, 0);
7199 return (GET_CODE (x) == SYMBOL_REF
7200 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7208 return legitimate_pic_address_disp_p (x);
7215 /* Determine if a given CONST RTX is a valid memory displacement
7219 legitimate_pic_address_disp_p (rtx disp)
7223 /* In 64bit mode we can allow direct addresses of symbols and labels
7224 when they are not dynamic symbols. */
7227 rtx op0 = disp, op1;
7229 switch (GET_CODE (disp))
7235 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7237 op0 = XEXP (XEXP (disp, 0), 0);
7238 op1 = XEXP (XEXP (disp, 0), 1);
7239 if (!CONST_INT_P (op1)
7240 || INTVAL (op1) >= 16*1024*1024
7241 || INTVAL (op1) < -16*1024*1024)
7243 if (GET_CODE (op0) == LABEL_REF)
7245 if (GET_CODE (op0) != SYMBOL_REF)
7250 /* TLS references should always be enclosed in UNSPEC. */
7251 if (SYMBOL_REF_TLS_MODEL (op0))
7253 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7254 && ix86_cmodel != CM_LARGE_PIC)
7262 if (GET_CODE (disp) != CONST)
7264 disp = XEXP (disp, 0);
7268 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7269 of GOT tables. We should not need these anyway. */
7270 if (GET_CODE (disp) != UNSPEC
7271 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7272 && XINT (disp, 1) != UNSPEC_GOTOFF
7273 && XINT (disp, 1) != UNSPEC_PLTOFF))
7276 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7277 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7283 if (GET_CODE (disp) == PLUS)
7285 if (!CONST_INT_P (XEXP (disp, 1)))
7287 disp = XEXP (disp, 0);
7291 if (TARGET_MACHO && darwin_local_data_pic (disp))
7294 if (GET_CODE (disp) != UNSPEC)
7297 switch (XINT (disp, 1))
7302 /* We need to check for both symbols and labels because VxWorks loads
7303 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7305 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7306 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7308 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7309 While ABI specify also 32bit relocation but we don't produce it in
7310 small PIC model at all. */
7311 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7312 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7314 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7316 case UNSPEC_GOTTPOFF:
7317 case UNSPEC_GOTNTPOFF:
7318 case UNSPEC_INDNTPOFF:
7321 disp = XVECEXP (disp, 0, 0);
7322 return (GET_CODE (disp) == SYMBOL_REF
7323 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7325 disp = XVECEXP (disp, 0, 0);
7326 return (GET_CODE (disp) == SYMBOL_REF
7327 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7329 disp = XVECEXP (disp, 0, 0);
7330 return (GET_CODE (disp) == SYMBOL_REF
7331 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7337 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7338 memory address for an instruction. The MODE argument is the machine mode
7339 for the MEM expression that wants to use this address.
7341 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7342 convert common non-canonical forms to canonical form so that they will
7346 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7347 rtx addr, int strict)
7349 struct ix86_address parts;
7350 rtx base, index, disp;
7351 HOST_WIDE_INT scale;
7352 const char *reason = NULL;
7353 rtx reason_rtx = NULL_RTX;
7355 if (ix86_decompose_address (addr, &parts) <= 0)
7357 reason = "decomposition failed";
7362 index = parts.index;
7364 scale = parts.scale;
7366 /* Validate base register.
7368 Don't allow SUBREG's that span more than a word here. It can lead to spill
7369 failures when the base is one word out of a two word structure, which is
7370 represented internally as a DImode int. */
7379 else if (GET_CODE (base) == SUBREG
7380 && REG_P (SUBREG_REG (base))
7381 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7383 reg = SUBREG_REG (base);
7386 reason = "base is not a register";
7390 if (GET_MODE (base) != Pmode)
7392 reason = "base is not in Pmode";
7396 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7397 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7399 reason = "base is not valid";
7404 /* Validate index register.
7406 Don't allow SUBREG's that span more than a word here -- same as above. */
7415 else if (GET_CODE (index) == SUBREG
7416 && REG_P (SUBREG_REG (index))
7417 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7419 reg = SUBREG_REG (index);
7422 reason = "index is not a register";
7426 if (GET_MODE (index) != Pmode)
7428 reason = "index is not in Pmode";
7432 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7433 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7435 reason = "index is not valid";
7440 /* Validate scale factor. */
7443 reason_rtx = GEN_INT (scale);
7446 reason = "scale without index";
7450 if (scale != 2 && scale != 4 && scale != 8)
7452 reason = "scale is not a valid multiplier";
7457 /* Validate displacement. */
7462 if (GET_CODE (disp) == CONST
7463 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7464 switch (XINT (XEXP (disp, 0), 1))
7466 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7467 used. While ABI specify also 32bit relocations, we don't produce
7468 them at all and use IP relative instead. */
7471 gcc_assert (flag_pic);
7473 goto is_legitimate_pic;
7474 reason = "64bit address unspec";
7477 case UNSPEC_GOTPCREL:
7478 gcc_assert (flag_pic);
7479 goto is_legitimate_pic;
7481 case UNSPEC_GOTTPOFF:
7482 case UNSPEC_GOTNTPOFF:
7483 case UNSPEC_INDNTPOFF:
7489 reason = "invalid address unspec";
7493 else if (SYMBOLIC_CONST (disp)
7497 && MACHOPIC_INDIRECT
7498 && !machopic_operand_p (disp)
7504 if (TARGET_64BIT && (index || base))
7506 /* foo@dtpoff(%rX) is ok. */
7507 if (GET_CODE (disp) != CONST
7508 || GET_CODE (XEXP (disp, 0)) != PLUS
7509 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7510 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7511 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7512 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7514 reason = "non-constant pic memory reference";
7518 else if (! legitimate_pic_address_disp_p (disp))
7520 reason = "displacement is an invalid pic construct";
7524 /* This code used to verify that a symbolic pic displacement
7525 includes the pic_offset_table_rtx register.
7527 While this is good idea, unfortunately these constructs may
7528 be created by "adds using lea" optimization for incorrect
7537 This code is nonsensical, but results in addressing
7538 GOT table with pic_offset_table_rtx base. We can't
7539 just refuse it easily, since it gets matched by
7540 "addsi3" pattern, that later gets split to lea in the
7541 case output register differs from input. While this
7542 can be handled by separate addsi pattern for this case
7543 that never results in lea, this seems to be easier and
7544 correct fix for crash to disable this test. */
7546 else if (GET_CODE (disp) != LABEL_REF
7547 && !CONST_INT_P (disp)
7548 && (GET_CODE (disp) != CONST
7549 || !legitimate_constant_p (disp))
7550 && (GET_CODE (disp) != SYMBOL_REF
7551 || !legitimate_constant_p (disp)))
7553 reason = "displacement is not constant";
7556 else if (TARGET_64BIT
7557 && !x86_64_immediate_operand (disp, VOIDmode))
7559 reason = "displacement is out of range";
7564 /* Everything looks valid. */
7571 /* Return a unique alias set for the GOT. */
7573 static alias_set_type
7574 ix86_GOT_alias_set (void)
7576 static alias_set_type set = -1;
7578 set = new_alias_set ();
7582 /* Return a legitimate reference for ORIG (an address) using the
7583 register REG. If REG is 0, a new pseudo is generated.
7585 There are two types of references that must be handled:
7587 1. Global data references must load the address from the GOT, via
7588 the PIC reg. An insn is emitted to do this load, and the reg is
7591 2. Static data references, constant pool addresses, and code labels
7592 compute the address as an offset from the GOT, whose base is in
7593 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7594 differentiate them from global data objects. The returned
7595 address is the PIC reg + an unspec constant.
7597 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7598 reg also appears in the address. */
7601 legitimize_pic_address (rtx orig, rtx reg)
7608 if (TARGET_MACHO && !TARGET_64BIT)
7611 reg = gen_reg_rtx (Pmode);
7612 /* Use the generic Mach-O PIC machinery. */
7613 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7617 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7619 else if (TARGET_64BIT
7620 && ix86_cmodel != CM_SMALL_PIC
7621 && gotoff_operand (addr, Pmode))
7624 /* This symbol may be referenced via a displacement from the PIC
7625 base address (@GOTOFF). */
7627 if (reload_in_progress)
7628 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7629 if (GET_CODE (addr) == CONST)
7630 addr = XEXP (addr, 0);
7631 if (GET_CODE (addr) == PLUS)
7633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7635 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7638 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7639 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7641 tmpreg = gen_reg_rtx (Pmode);
7644 emit_move_insn (tmpreg, new_rtx);
7648 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7649 tmpreg, 1, OPTAB_DIRECT);
7652 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7654 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7656 /* This symbol may be referenced via a displacement from the PIC
7657 base address (@GOTOFF). */
7659 if (reload_in_progress)
7660 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7661 if (GET_CODE (addr) == CONST)
7662 addr = XEXP (addr, 0);
7663 if (GET_CODE (addr) == PLUS)
7665 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7667 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7670 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7671 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7672 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7676 emit_move_insn (reg, new_rtx);
7680 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7681 /* We can't use @GOTOFF for text labels on VxWorks;
7682 see gotoff_operand. */
7683 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7685 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7687 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7688 return legitimize_dllimport_symbol (addr, true);
7689 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7690 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7691 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7693 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7694 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7698 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7700 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7701 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7702 new_rtx = gen_const_mem (Pmode, new_rtx);
7703 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7706 reg = gen_reg_rtx (Pmode);
7707 /* Use directly gen_movsi, otherwise the address is loaded
7708 into register for CSE. We don't want to CSE this addresses,
7709 instead we CSE addresses from the GOT table, so skip this. */
7710 emit_insn (gen_movsi (reg, new_rtx));
7715 /* This symbol must be referenced via a load from the
7716 Global Offset Table (@GOT). */
7718 if (reload_in_progress)
7719 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7720 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7721 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7723 new_rtx = force_reg (Pmode, new_rtx);
7724 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7725 new_rtx = gen_const_mem (Pmode, new_rtx);
7726 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7729 reg = gen_reg_rtx (Pmode);
7730 emit_move_insn (reg, new_rtx);
7736 if (CONST_INT_P (addr)
7737 && !x86_64_immediate_operand (addr, VOIDmode))
7741 emit_move_insn (reg, addr);
7745 new_rtx = force_reg (Pmode, addr);
7747 else if (GET_CODE (addr) == CONST)
7749 addr = XEXP (addr, 0);
7751 /* We must match stuff we generate before. Assume the only
7752 unspecs that can get here are ours. Not that we could do
7753 anything with them anyway.... */
7754 if (GET_CODE (addr) == UNSPEC
7755 || (GET_CODE (addr) == PLUS
7756 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7758 gcc_assert (GET_CODE (addr) == PLUS);
7760 if (GET_CODE (addr) == PLUS)
7762 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7764 /* Check first to see if this is a constant offset from a @GOTOFF
7765 symbol reference. */
7766 if (gotoff_operand (op0, Pmode)
7767 && CONST_INT_P (op1))
7771 if (reload_in_progress)
7772 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7773 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7775 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7776 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7777 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7781 emit_move_insn (reg, new_rtx);
7787 if (INTVAL (op1) < -16*1024*1024
7788 || INTVAL (op1) >= 16*1024*1024)
7790 if (!x86_64_immediate_operand (op1, Pmode))
7791 op1 = force_reg (Pmode, op1);
7792 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7798 base = legitimize_pic_address (XEXP (addr, 0), reg);
7799 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7800 base == reg ? NULL_RTX : reg);
7802 if (CONST_INT_P (new_rtx))
7803 new_rtx = plus_constant (base, INTVAL (new_rtx));
7806 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7808 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7809 new_rtx = XEXP (new_rtx, 1);
7811 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7819 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7822 get_thread_pointer (int to_reg)
7826 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7830 reg = gen_reg_rtx (Pmode);
7831 insn = gen_rtx_SET (VOIDmode, reg, tp);
7832 insn = emit_insn (insn);
7837 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7838 false if we expect this to be used for a memory address and true if
7839 we expect to load the address into a register. */
7842 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7844 rtx dest, base, off, pic, tp;
7849 case TLS_MODEL_GLOBAL_DYNAMIC:
7850 dest = gen_reg_rtx (Pmode);
7851 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7853 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7855 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7858 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7859 insns = get_insns ();
7862 RTL_CONST_CALL_P (insns) = 1;
7863 emit_libcall_block (insns, dest, rax, x);
7865 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7866 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7868 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7870 if (TARGET_GNU2_TLS)
7872 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7874 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7878 case TLS_MODEL_LOCAL_DYNAMIC:
7879 base = gen_reg_rtx (Pmode);
7880 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7882 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7884 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7887 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7888 insns = get_insns ();
7891 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7892 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7893 RTL_CONST_CALL_P (insns) = 1;
7894 emit_libcall_block (insns, base, rax, note);
7896 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7897 emit_insn (gen_tls_local_dynamic_base_64 (base));
7899 emit_insn (gen_tls_local_dynamic_base_32 (base));
7901 if (TARGET_GNU2_TLS)
7903 rtx x = ix86_tls_module_base ();
7905 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7906 gen_rtx_MINUS (Pmode, x, tp));
7909 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7910 off = gen_rtx_CONST (Pmode, off);
7912 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7914 if (TARGET_GNU2_TLS)
7916 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7918 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7923 case TLS_MODEL_INITIAL_EXEC:
7927 type = UNSPEC_GOTNTPOFF;
7931 if (reload_in_progress)
7932 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7933 pic = pic_offset_table_rtx;
7934 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7936 else if (!TARGET_ANY_GNU_TLS)
7938 pic = gen_reg_rtx (Pmode);
7939 emit_insn (gen_set_got (pic));
7940 type = UNSPEC_GOTTPOFF;
7945 type = UNSPEC_INDNTPOFF;
7948 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7949 off = gen_rtx_CONST (Pmode, off);
7951 off = gen_rtx_PLUS (Pmode, pic, off);
7952 off = gen_const_mem (Pmode, off);
7953 set_mem_alias_set (off, ix86_GOT_alias_set ());
7955 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7957 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7958 off = force_reg (Pmode, off);
7959 return gen_rtx_PLUS (Pmode, base, off);
7963 base = get_thread_pointer (true);
7964 dest = gen_reg_rtx (Pmode);
7965 emit_insn (gen_subsi3 (dest, base, off));
7969 case TLS_MODEL_LOCAL_EXEC:
7970 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7971 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7972 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7973 off = gen_rtx_CONST (Pmode, off);
7975 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7977 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7978 return gen_rtx_PLUS (Pmode, base, off);
7982 base = get_thread_pointer (true);
7983 dest = gen_reg_rtx (Pmode);
7984 emit_insn (gen_subsi3 (dest, base, off));
7995 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7998 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7999 htab_t dllimport_map;
8002 get_dllimport_decl (tree decl)
8004 struct tree_map *h, in;
8008 size_t namelen, prefixlen;
8014 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8016 in.hash = htab_hash_pointer (decl);
8017 in.base.from = decl;
8018 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8019 h = (struct tree_map *) *loc;
8023 *loc = h = GGC_NEW (struct tree_map);
8025 h->base.from = decl;
8026 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8027 DECL_ARTIFICIAL (to) = 1;
8028 DECL_IGNORED_P (to) = 1;
8029 DECL_EXTERNAL (to) = 1;
8030 TREE_READONLY (to) = 1;
8032 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8033 name = targetm.strip_name_encoding (name);
8034 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8035 namelen = strlen (name);
8036 prefixlen = strlen (prefix);
8037 imp_name = (char *) alloca (namelen + prefixlen + 1);
8038 memcpy (imp_name, prefix, prefixlen);
8039 memcpy (imp_name + prefixlen, name, namelen + 1);
8041 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8042 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8043 SET_SYMBOL_REF_DECL (rtl, to);
8044 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8046 rtl = gen_const_mem (Pmode, rtl);
8047 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8049 SET_DECL_RTL (to, rtl);
8050 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8055 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8056 true if we require the result be a register. */
8059 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8064 gcc_assert (SYMBOL_REF_DECL (symbol));
8065 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8067 x = DECL_RTL (imp_decl);
8069 x = force_reg (Pmode, x);
8073 /* Try machine-dependent ways of modifying an illegitimate address
8074 to be legitimate. If we find one, return the new, valid address.
8075 This macro is used in only one place: `memory_address' in explow.c.
8077 OLDX is the address as it was before break_out_memory_refs was called.
8078 In some cases it is useful to look at this to decide what needs to be done.
8080 MODE and WIN are passed so that this macro can use
8081 GO_IF_LEGITIMATE_ADDRESS.
8083 It is always safe for this macro to do nothing. It exists to recognize
8084 opportunities to optimize the output.
8086 For the 80386, we handle X+REG by loading X into a register R and
8087 using R+REG. R will go in a general reg and indexing will be used.
8088 However, if REG is a broken-out memory address or multiplication,
8089 nothing needs to be done because REG can certainly go in a general reg.
8091 When -fpic is used, special handling is needed for symbolic references.
8092 See comments by legitimize_pic_address in i386.c for details. */
8095 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8100 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8102 return legitimize_tls_address (x, (enum tls_model) log, false);
8103 if (GET_CODE (x) == CONST
8104 && GET_CODE (XEXP (x, 0)) == PLUS
8105 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8106 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8108 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8109 (enum tls_model) log, false);
8110 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8113 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8115 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8116 return legitimize_dllimport_symbol (x, true);
8117 if (GET_CODE (x) == CONST
8118 && GET_CODE (XEXP (x, 0)) == PLUS
8119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8120 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8122 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8123 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8127 if (flag_pic && SYMBOLIC_CONST (x))
8128 return legitimize_pic_address (x, 0);
8130 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8131 if (GET_CODE (x) == ASHIFT
8132 && CONST_INT_P (XEXP (x, 1))
8133 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8136 log = INTVAL (XEXP (x, 1));
8137 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8138 GEN_INT (1 << log));
8141 if (GET_CODE (x) == PLUS)
8143 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8145 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8146 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8147 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8150 log = INTVAL (XEXP (XEXP (x, 0), 1));
8151 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8152 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8153 GEN_INT (1 << log));
8156 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8157 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8158 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8161 log = INTVAL (XEXP (XEXP (x, 1), 1));
8162 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8163 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8164 GEN_INT (1 << log));
8167 /* Put multiply first if it isn't already. */
8168 if (GET_CODE (XEXP (x, 1)) == MULT)
8170 rtx tmp = XEXP (x, 0);
8171 XEXP (x, 0) = XEXP (x, 1);
8176 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8177 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8178 created by virtual register instantiation, register elimination, and
8179 similar optimizations. */
8180 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8183 x = gen_rtx_PLUS (Pmode,
8184 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8185 XEXP (XEXP (x, 1), 0)),
8186 XEXP (XEXP (x, 1), 1));
8190 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8191 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8192 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8193 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8194 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8195 && CONSTANT_P (XEXP (x, 1)))
8198 rtx other = NULL_RTX;
8200 if (CONST_INT_P (XEXP (x, 1)))
8202 constant = XEXP (x, 1);
8203 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8205 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8207 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8208 other = XEXP (x, 1);
8216 x = gen_rtx_PLUS (Pmode,
8217 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8218 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8219 plus_constant (other, INTVAL (constant)));
8223 if (changed && legitimate_address_p (mode, x, FALSE))
8226 if (GET_CODE (XEXP (x, 0)) == MULT)
8229 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8232 if (GET_CODE (XEXP (x, 1)) == MULT)
8235 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8239 && REG_P (XEXP (x, 1))
8240 && REG_P (XEXP (x, 0)))
8243 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8246 x = legitimize_pic_address (x, 0);
8249 if (changed && legitimate_address_p (mode, x, FALSE))
8252 if (REG_P (XEXP (x, 0)))
8254 rtx temp = gen_reg_rtx (Pmode);
8255 rtx val = force_operand (XEXP (x, 1), temp);
8257 emit_move_insn (temp, val);
8263 else if (REG_P (XEXP (x, 1)))
8265 rtx temp = gen_reg_rtx (Pmode);
8266 rtx val = force_operand (XEXP (x, 0), temp);
8268 emit_move_insn (temp, val);
8278 /* Print an integer constant expression in assembler syntax. Addition
8279 and subtraction are the only arithmetic that may appear in these
8280 expressions. FILE is the stdio stream to write to, X is the rtx, and
8281 CODE is the operand print code from the output string. */
8284 output_pic_addr_const (FILE *file, rtx x, int code)
8288 switch (GET_CODE (x))
8291 gcc_assert (flag_pic);
8296 if (! TARGET_MACHO || TARGET_64BIT)
8297 output_addr_const (file, x);
8300 const char *name = XSTR (x, 0);
8302 /* Mark the decl as referenced so that cgraph will
8303 output the function. */
8304 if (SYMBOL_REF_DECL (x))
8305 mark_decl_referenced (SYMBOL_REF_DECL (x));
8308 if (MACHOPIC_INDIRECT
8309 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8310 name = machopic_indirection_name (x, /*stub_p=*/true);
8312 assemble_name (file, name);
8314 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8315 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8316 fputs ("@PLT", file);
8323 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8324 assemble_name (asm_out_file, buf);
8328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8332 /* This used to output parentheses around the expression,
8333 but that does not work on the 386 (either ATT or BSD assembler). */
8334 output_pic_addr_const (file, XEXP (x, 0), code);
8338 if (GET_MODE (x) == VOIDmode)
8340 /* We can use %d if the number is <32 bits and positive. */
8341 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8342 fprintf (file, "0x%lx%08lx",
8343 (unsigned long) CONST_DOUBLE_HIGH (x),
8344 (unsigned long) CONST_DOUBLE_LOW (x));
8346 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8349 /* We can't handle floating point constants;
8350 PRINT_OPERAND must handle them. */
8351 output_operand_lossage ("floating constant misused");
8355 /* Some assemblers need integer constants to appear first. */
8356 if (CONST_INT_P (XEXP (x, 0)))
8358 output_pic_addr_const (file, XEXP (x, 0), code);
8360 output_pic_addr_const (file, XEXP (x, 1), code);
8364 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8365 output_pic_addr_const (file, XEXP (x, 1), code);
8367 output_pic_addr_const (file, XEXP (x, 0), code);
8373 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8374 output_pic_addr_const (file, XEXP (x, 0), code);
8376 output_pic_addr_const (file, XEXP (x, 1), code);
8378 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8382 gcc_assert (XVECLEN (x, 0) == 1);
8383 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8384 switch (XINT (x, 1))
8387 fputs ("@GOT", file);
8390 fputs ("@GOTOFF", file);
8393 fputs ("@PLTOFF", file);
8395 case UNSPEC_GOTPCREL:
8396 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8397 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8399 case UNSPEC_GOTTPOFF:
8400 /* FIXME: This might be @TPOFF in Sun ld too. */
8401 fputs ("@GOTTPOFF", file);
8404 fputs ("@TPOFF", file);
8408 fputs ("@TPOFF", file);
8410 fputs ("@NTPOFF", file);
8413 fputs ("@DTPOFF", file);
8415 case UNSPEC_GOTNTPOFF:
8417 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8418 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8420 fputs ("@GOTNTPOFF", file);
8422 case UNSPEC_INDNTPOFF:
8423 fputs ("@INDNTPOFF", file);
8426 output_operand_lossage ("invalid UNSPEC as operand");
8432 output_operand_lossage ("invalid expression as operand");
8436 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8437 We need to emit DTP-relative relocations. */
8439 static void ATTRIBUTE_UNUSED
8440 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8442 fputs (ASM_LONG, file);
8443 output_addr_const (file, x);
8444 fputs ("@DTPOFF", file);
8450 fputs (", 0", file);
8457 /* In the name of slightly smaller debug output, and to cater to
8458 general assembler lossage, recognize PIC+GOTOFF and turn it back
8459 into a direct symbol reference.
8461 On Darwin, this is necessary to avoid a crash, because Darwin
8462 has a different PIC label for each routine but the DWARF debugging
8463 information is not associated with any particular routine, so it's
8464 necessary to remove references to the PIC label from RTL stored by
8465 the DWARF output code. */
8468 ix86_delegitimize_address (rtx orig_x)
8471 /* reg_addend is NULL or a multiple of some register. */
8472 rtx reg_addend = NULL_RTX;
8473 /* const_addend is NULL or a const_int. */
8474 rtx const_addend = NULL_RTX;
8475 /* This is the result, or NULL. */
8476 rtx result = NULL_RTX;
8483 if (GET_CODE (x) != CONST
8484 || GET_CODE (XEXP (x, 0)) != UNSPEC
8485 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8488 return XVECEXP (XEXP (x, 0), 0, 0);
8491 if (GET_CODE (x) != PLUS
8492 || GET_CODE (XEXP (x, 1)) != CONST)
8495 if (REG_P (XEXP (x, 0))
8496 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8497 /* %ebx + GOT/GOTOFF */
8499 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8501 /* %ebx + %reg * scale + GOT/GOTOFF */
8502 reg_addend = XEXP (x, 0);
8503 if (REG_P (XEXP (reg_addend, 0))
8504 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8505 reg_addend = XEXP (reg_addend, 1);
8506 else if (REG_P (XEXP (reg_addend, 1))
8507 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8508 reg_addend = XEXP (reg_addend, 0);
8511 if (!REG_P (reg_addend)
8512 && GET_CODE (reg_addend) != MULT
8513 && GET_CODE (reg_addend) != ASHIFT)
8519 x = XEXP (XEXP (x, 1), 0);
8520 if (GET_CODE (x) == PLUS
8521 && CONST_INT_P (XEXP (x, 1)))
8523 const_addend = XEXP (x, 1);
8527 if (GET_CODE (x) == UNSPEC
8528 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8529 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8530 result = XVECEXP (x, 0, 0);
8532 if (TARGET_MACHO && darwin_local_data_pic (x)
8534 result = XEXP (x, 0);
8540 result = gen_rtx_PLUS (Pmode, result, const_addend);
8542 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8546 /* If X is a machine specific address (i.e. a symbol or label being
8547 referenced as a displacement from the GOT implemented using an
8548 UNSPEC), then return the base term. Otherwise return X. */
8551 ix86_find_base_term (rtx x)
8557 if (GET_CODE (x) != CONST)
8560 if (GET_CODE (term) == PLUS
8561 && (CONST_INT_P (XEXP (term, 1))
8562 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8563 term = XEXP (term, 0);
8564 if (GET_CODE (term) != UNSPEC
8565 || XINT (term, 1) != UNSPEC_GOTPCREL)
8568 term = XVECEXP (term, 0, 0);
8570 if (GET_CODE (term) != SYMBOL_REF
8571 && GET_CODE (term) != LABEL_REF)
8577 term = ix86_delegitimize_address (x);
8579 if (GET_CODE (term) != SYMBOL_REF
8580 && GET_CODE (term) != LABEL_REF)
8587 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8592 if (mode == CCFPmode || mode == CCFPUmode)
8594 enum rtx_code second_code, bypass_code;
8595 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8596 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8597 code = ix86_fp_compare_code_to_integer (code);
8601 code = reverse_condition (code);
8652 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8656 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8657 Those same assemblers have the same but opposite lossage on cmov. */
8659 suffix = fp ? "nbe" : "a";
8660 else if (mode == CCCmode)
8683 gcc_assert (mode == CCmode || mode == CCCmode);
8705 gcc_assert (mode == CCmode || mode == CCCmode);
8706 suffix = fp ? "nb" : "ae";
8709 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8716 else if (mode == CCCmode)
8717 suffix = fp ? "nb" : "ae";
8722 suffix = fp ? "u" : "p";
8725 suffix = fp ? "nu" : "np";
8730 fputs (suffix, file);
8733 /* Print the name of register X to FILE based on its machine mode and number.
8734 If CODE is 'w', pretend the mode is HImode.
8735 If CODE is 'b', pretend the mode is QImode.
8736 If CODE is 'k', pretend the mode is SImode.
8737 If CODE is 'q', pretend the mode is DImode.
8738 If CODE is 'h', pretend the reg is the 'high' byte register.
8739 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8742 print_reg (rtx x, int code, FILE *file)
8744 gcc_assert (x == pc_rtx
8745 || (REGNO (x) != ARG_POINTER_REGNUM
8746 && REGNO (x) != FRAME_POINTER_REGNUM
8747 && REGNO (x) != FLAGS_REG
8748 && REGNO (x) != FPSR_REG
8749 && REGNO (x) != FPCR_REG));
8751 if (ASSEMBLER_DIALECT == ASM_ATT)
8756 gcc_assert (TARGET_64BIT);
8757 fputs ("rip", file);
8761 if (code == 'w' || MMX_REG_P (x))
8763 else if (code == 'b')
8765 else if (code == 'k')
8767 else if (code == 'q')
8769 else if (code == 'y')
8771 else if (code == 'h')
8774 code = GET_MODE_SIZE (GET_MODE (x));
8776 /* Irritatingly, AMD extended registers use different naming convention
8777 from the normal registers. */
8778 if (REX_INT_REG_P (x))
8780 gcc_assert (TARGET_64BIT);
8784 error ("extended registers have no high halves");
8787 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8790 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8793 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8796 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8799 error ("unsupported operand size for extended register");
8807 if (STACK_TOP_P (x))
8809 fputs ("st(0)", file);
8816 if (! ANY_FP_REG_P (x))
8817 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8822 fputs (hi_reg_name[REGNO (x)], file);
8825 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8827 fputs (qi_reg_name[REGNO (x)], file);
8830 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8832 fputs (qi_high_reg_name[REGNO (x)], file);
8839 /* Locate some local-dynamic symbol still in use by this function
8840 so that we can print its name in some tls_local_dynamic_base
8844 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8848 if (GET_CODE (x) == SYMBOL_REF
8849 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8851 cfun->machine->some_ld_name = XSTR (x, 0);
8859 get_some_local_dynamic_name (void)
8863 if (cfun->machine->some_ld_name)
8864 return cfun->machine->some_ld_name;
8866 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8868 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8869 return cfun->machine->some_ld_name;
8875 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8876 C -- print opcode suffix for set/cmov insn.
8877 c -- like C, but print reversed condition
8878 F,f -- likewise, but for floating-point.
8879 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8881 R -- print the prefix for register names.
8882 z -- print the opcode suffix for the size of the current operand.
8883 * -- print a star (in certain assembler syntax)
8884 A -- print an absolute memory reference.
8885 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8886 s -- print a shift double count, followed by the assemblers argument
8888 b -- print the QImode name of the register for the indicated operand.
8889 %b0 would print %al if operands[0] is reg 0.
8890 w -- likewise, print the HImode name of the register.
8891 k -- likewise, print the SImode name of the register.
8892 q -- likewise, print the DImode name of the register.
8893 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8894 y -- print "st(0)" instead of "st" as a register.
8895 D -- print condition for SSE cmp instruction.
8896 P -- if PIC, print an @PLT suffix.
8897 X -- don't print any sort of PIC '@' suffix for a symbol.
8898 & -- print some in-use local-dynamic symbol name.
8899 H -- print a memory address offset by 8; used for sse high-parts
8900 Y -- print condition for SSE5 com* instruction.
8901 + -- print a branch hint as 'cs' or 'ds' prefix
8902 ; -- print a semicolon (after prefixes due to bug in older gas).
8906 print_operand (FILE *file, rtx x, int code)
8913 if (ASSEMBLER_DIALECT == ASM_ATT)
8918 assemble_name (file, get_some_local_dynamic_name ());
8922 switch (ASSEMBLER_DIALECT)
8929 /* Intel syntax. For absolute addresses, registers should not
8930 be surrounded by braces. */
8934 PRINT_OPERAND (file, x, 0);
8944 PRINT_OPERAND (file, x, 0);
8949 if (ASSEMBLER_DIALECT == ASM_ATT)
8954 if (ASSEMBLER_DIALECT == ASM_ATT)
8959 if (ASSEMBLER_DIALECT == ASM_ATT)
8964 if (ASSEMBLER_DIALECT == ASM_ATT)
8969 if (ASSEMBLER_DIALECT == ASM_ATT)
8974 if (ASSEMBLER_DIALECT == ASM_ATT)
8979 /* 387 opcodes don't get size suffixes if the operands are
8981 if (STACK_REG_P (x))
8984 /* Likewise if using Intel opcodes. */
8985 if (ASSEMBLER_DIALECT == ASM_INTEL)
8988 /* This is the size of op from size of operand. */
8989 switch (GET_MODE_SIZE (GET_MODE (x)))
8998 #ifdef HAVE_GAS_FILDS_FISTS
9008 if (GET_MODE (x) == SFmode)
9023 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9025 #ifdef GAS_MNEMONICS
9051 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9053 PRINT_OPERAND (file, x, 0);
9059 /* Little bit of braindamage here. The SSE compare instructions
9060 does use completely different names for the comparisons that the
9061 fp conditional moves. */
9062 switch (GET_CODE (x))
9077 fputs ("unord", file);
9081 fputs ("neq", file);
9085 fputs ("nlt", file);
9089 fputs ("nle", file);
9092 fputs ("ord", file);
9099 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9100 if (ASSEMBLER_DIALECT == ASM_ATT)
9102 switch (GET_MODE (x))
9104 case HImode: putc ('w', file); break;
9106 case SFmode: putc ('l', file); break;
9108 case DFmode: putc ('q', file); break;
9109 default: gcc_unreachable ();
9116 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9119 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9120 if (ASSEMBLER_DIALECT == ASM_ATT)
9123 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9126 /* Like above, but reverse condition */
9128 /* Check to see if argument to %c is really a constant
9129 and not a condition code which needs to be reversed. */
9130 if (!COMPARISON_P (x))
9132 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9135 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9138 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9139 if (ASSEMBLER_DIALECT == ASM_ATT)
9142 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9146 /* It doesn't actually matter what mode we use here, as we're
9147 only going to use this for printing. */
9148 x = adjust_address_nv (x, DImode, 8);
9155 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9158 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9161 int pred_val = INTVAL (XEXP (x, 0));
9163 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9164 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9166 int taken = pred_val > REG_BR_PROB_BASE / 2;
9167 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9169 /* Emit hints only in the case default branch prediction
9170 heuristics would fail. */
9171 if (taken != cputaken)
9173 /* We use 3e (DS) prefix for taken branches and
9174 2e (CS) prefix for not taken branches. */
9176 fputs ("ds ; ", file);
9178 fputs ("cs ; ", file);
9186 switch (GET_CODE (x))
9189 fputs ("neq", file);
9196 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9200 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9211 fputs ("unord", file);
9214 fputs ("ord", file);
9217 fputs ("ueq", file);
9220 fputs ("nlt", file);
9223 fputs ("nle", file);
9226 fputs ("ule", file);
9229 fputs ("ult", file);
9232 fputs ("une", file);
9241 fputs (" ; ", file);
9248 output_operand_lossage ("invalid operand code '%c'", code);
9253 print_reg (x, code, file);
9257 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9258 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9259 && GET_MODE (x) != BLKmode)
9262 switch (GET_MODE_SIZE (GET_MODE (x)))
9264 case 1: size = "BYTE"; break;
9265 case 2: size = "WORD"; break;
9266 case 4: size = "DWORD"; break;
9267 case 8: size = "QWORD"; break;
9268 case 12: size = "XWORD"; break;
9270 if (GET_MODE (x) == XFmode)
9279 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9282 else if (code == 'w')
9284 else if (code == 'k')
9288 fputs (" PTR ", file);
9292 /* Avoid (%rip) for call operands. */
9293 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9294 && !CONST_INT_P (x))
9295 output_addr_const (file, x);
9296 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9297 output_operand_lossage ("invalid constraints for operand");
9302 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9307 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9308 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9310 if (ASSEMBLER_DIALECT == ASM_ATT)
9312 fprintf (file, "0x%08lx", (long unsigned int) l);
9315 /* These float cases don't actually occur as immediate operands. */
9316 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9320 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9321 fprintf (file, "%s", dstr);
9324 else if (GET_CODE (x) == CONST_DOUBLE
9325 && GET_MODE (x) == XFmode)
9329 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9330 fprintf (file, "%s", dstr);
9335 /* We have patterns that allow zero sets of memory, for instance.
9336 In 64-bit mode, we should probably support all 8-byte vectors,
9337 since we can in fact encode that into an immediate. */
9338 if (GET_CODE (x) == CONST_VECTOR)
9340 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9346 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9348 if (ASSEMBLER_DIALECT == ASM_ATT)
9351 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9352 || GET_CODE (x) == LABEL_REF)
9354 if (ASSEMBLER_DIALECT == ASM_ATT)
9357 fputs ("OFFSET FLAT:", file);
9360 if (CONST_INT_P (x))
9361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9363 output_pic_addr_const (file, x, code);
9365 output_addr_const (file, x);
9369 /* Print a memory operand whose address is ADDR. */
9372 print_operand_address (FILE *file, rtx addr)
9374 struct ix86_address parts;
9375 rtx base, index, disp;
9377 int ok = ix86_decompose_address (addr, &parts);
9382 index = parts.index;
9384 scale = parts.scale;
9392 if (ASSEMBLER_DIALECT == ASM_ATT)
9394 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9400 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9401 if (TARGET_64BIT && !base && !index)
9405 if (GET_CODE (disp) == CONST
9406 && GET_CODE (XEXP (disp, 0)) == PLUS
9407 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9408 symbol = XEXP (XEXP (disp, 0), 0);
9410 if (GET_CODE (symbol) == LABEL_REF
9411 || (GET_CODE (symbol) == SYMBOL_REF
9412 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9415 if (!base && !index)
9417 /* Displacement only requires special attention. */
9419 if (CONST_INT_P (disp))
9421 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9422 fputs ("ds:", file);
9423 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9426 output_pic_addr_const (file, disp, 0);
9428 output_addr_const (file, disp);
9432 if (ASSEMBLER_DIALECT == ASM_ATT)
9437 output_pic_addr_const (file, disp, 0);
9438 else if (GET_CODE (disp) == LABEL_REF)
9439 output_asm_label (disp);
9441 output_addr_const (file, disp);
9446 print_reg (base, 0, file);
9450 print_reg (index, 0, file);
9452 fprintf (file, ",%d", scale);
9458 rtx offset = NULL_RTX;
9462 /* Pull out the offset of a symbol; print any symbol itself. */
9463 if (GET_CODE (disp) == CONST
9464 && GET_CODE (XEXP (disp, 0)) == PLUS
9465 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9467 offset = XEXP (XEXP (disp, 0), 1);
9468 disp = gen_rtx_CONST (VOIDmode,
9469 XEXP (XEXP (disp, 0), 0));
9473 output_pic_addr_const (file, disp, 0);
9474 else if (GET_CODE (disp) == LABEL_REF)
9475 output_asm_label (disp);
9476 else if (CONST_INT_P (disp))
9479 output_addr_const (file, disp);
9485 print_reg (base, 0, file);
9488 if (INTVAL (offset) >= 0)
9490 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9501 print_reg (index, 0, file);
9503 fprintf (file, "*%d", scale);
9511 output_addr_const_extra (FILE *file, rtx x)
9515 if (GET_CODE (x) != UNSPEC)
9518 op = XVECEXP (x, 0, 0);
9519 switch (XINT (x, 1))
9521 case UNSPEC_GOTTPOFF:
9522 output_addr_const (file, op);
9523 /* FIXME: This might be @TPOFF in Sun ld. */
9524 fputs ("@GOTTPOFF", file);
9527 output_addr_const (file, op);
9528 fputs ("@TPOFF", file);
9531 output_addr_const (file, op);
9533 fputs ("@TPOFF", file);
9535 fputs ("@NTPOFF", file);
9538 output_addr_const (file, op);
9539 fputs ("@DTPOFF", file);
9541 case UNSPEC_GOTNTPOFF:
9542 output_addr_const (file, op);
9544 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9545 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9547 fputs ("@GOTNTPOFF", file);
9549 case UNSPEC_INDNTPOFF:
9550 output_addr_const (file, op);
9551 fputs ("@INDNTPOFF", file);
9561 /* Split one or more DImode RTL references into pairs of SImode
9562 references. The RTL can be REG, offsettable MEM, integer constant, or
9563 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9564 split and "num" is its length. lo_half and hi_half are output arrays
9565 that parallel "operands". */
9568 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9572 rtx op = operands[num];
9574 /* simplify_subreg refuse to split volatile memory addresses,
9575 but we still have to handle it. */
9578 lo_half[num] = adjust_address (op, SImode, 0);
9579 hi_half[num] = adjust_address (op, SImode, 4);
9583 lo_half[num] = simplify_gen_subreg (SImode, op,
9584 GET_MODE (op) == VOIDmode
9585 ? DImode : GET_MODE (op), 0);
9586 hi_half[num] = simplify_gen_subreg (SImode, op,
9587 GET_MODE (op) == VOIDmode
9588 ? DImode : GET_MODE (op), 4);
9592 /* Split one or more TImode RTL references into pairs of DImode
9593 references. The RTL can be REG, offsettable MEM, integer constant, or
9594 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9595 split and "num" is its length. lo_half and hi_half are output arrays
9596 that parallel "operands". */
9599 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9603 rtx op = operands[num];
9605 /* simplify_subreg refuse to split volatile memory addresses, but we
9606 still have to handle it. */
9609 lo_half[num] = adjust_address (op, DImode, 0);
9610 hi_half[num] = adjust_address (op, DImode, 8);
9614 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9615 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9620 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9621 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9622 is the expression of the binary operation. The output may either be
9623 emitted here, or returned to the caller, like all output_* functions.
9625 There is no guarantee that the operands are the same mode, as they
9626 might be within FLOAT or FLOAT_EXTEND expressions. */
9628 #ifndef SYSV386_COMPAT
9629 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9630 wants to fix the assemblers because that causes incompatibility
9631 with gcc. No-one wants to fix gcc because that causes
9632 incompatibility with assemblers... You can use the option of
9633 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9634 #define SYSV386_COMPAT 1
9638 output_387_binary_op (rtx insn, rtx *operands)
9640 static char buf[30];
9643 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9645 #ifdef ENABLE_CHECKING
9646 /* Even if we do not want to check the inputs, this documents input
9647 constraints. Which helps in understanding the following code. */
9648 if (STACK_REG_P (operands[0])
9649 && ((REG_P (operands[1])
9650 && REGNO (operands[0]) == REGNO (operands[1])
9651 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9652 || (REG_P (operands[2])
9653 && REGNO (operands[0]) == REGNO (operands[2])
9654 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9655 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9658 gcc_assert (is_sse);
9661 switch (GET_CODE (operands[3]))
9664 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9665 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9673 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9674 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9682 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9683 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9691 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9692 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9706 if (GET_MODE (operands[0]) == SFmode)
9707 strcat (buf, "ss\t{%2, %0|%0, %2}");
9709 strcat (buf, "sd\t{%2, %0|%0, %2}");
9714 switch (GET_CODE (operands[3]))
9718 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9720 rtx temp = operands[2];
9721 operands[2] = operands[1];
9725 /* know operands[0] == operands[1]. */
9727 if (MEM_P (operands[2]))
9733 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9735 if (STACK_TOP_P (operands[0]))
9736 /* How is it that we are storing to a dead operand[2]?
9737 Well, presumably operands[1] is dead too. We can't
9738 store the result to st(0) as st(0) gets popped on this
9739 instruction. Instead store to operands[2] (which I
9740 think has to be st(1)). st(1) will be popped later.
9741 gcc <= 2.8.1 didn't have this check and generated
9742 assembly code that the Unixware assembler rejected. */
9743 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9745 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9749 if (STACK_TOP_P (operands[0]))
9750 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9752 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9757 if (MEM_P (operands[1]))
9763 if (MEM_P (operands[2]))
9769 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9772 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9773 derived assemblers, confusingly reverse the direction of
9774 the operation for fsub{r} and fdiv{r} when the
9775 destination register is not st(0). The Intel assembler
9776 doesn't have this brain damage. Read !SYSV386_COMPAT to
9777 figure out what the hardware really does. */
9778 if (STACK_TOP_P (operands[0]))
9779 p = "{p\t%0, %2|rp\t%2, %0}";
9781 p = "{rp\t%2, %0|p\t%0, %2}";
9783 if (STACK_TOP_P (operands[0]))
9784 /* As above for fmul/fadd, we can't store to st(0). */
9785 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9787 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9792 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9795 if (STACK_TOP_P (operands[0]))
9796 p = "{rp\t%0, %1|p\t%1, %0}";
9798 p = "{p\t%1, %0|rp\t%0, %1}";
9800 if (STACK_TOP_P (operands[0]))
9801 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9803 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9808 if (STACK_TOP_P (operands[0]))
9810 if (STACK_TOP_P (operands[1]))
9811 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9813 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9816 else if (STACK_TOP_P (operands[1]))
9819 p = "{\t%1, %0|r\t%0, %1}";
9821 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9827 p = "{r\t%2, %0|\t%0, %2}";
9829 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9842 /* Return needed mode for entity in optimize_mode_switching pass. */
9845 ix86_mode_needed (int entity, rtx insn)
9847 enum attr_i387_cw mode;
9849 /* The mode UNINITIALIZED is used to store control word after a
9850 function call or ASM pattern. The mode ANY specify that function
9851 has no requirements on the control word and make no changes in the
9852 bits we are interested in. */
9855 || (NONJUMP_INSN_P (insn)
9856 && (asm_noperands (PATTERN (insn)) >= 0
9857 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9858 return I387_CW_UNINITIALIZED;
9860 if (recog_memoized (insn) < 0)
9863 mode = get_attr_i387_cw (insn);
9868 if (mode == I387_CW_TRUNC)
9873 if (mode == I387_CW_FLOOR)
9878 if (mode == I387_CW_CEIL)
9883 if (mode == I387_CW_MASK_PM)
9894 /* Output code to initialize control word copies used by trunc?f?i and
9895 rounding patterns. CURRENT_MODE is set to current control word,
9896 while NEW_MODE is set to new control word. */
9899 emit_i387_cw_initialization (int mode)
9901 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9904 enum ix86_stack_slot slot;
9906 rtx reg = gen_reg_rtx (HImode);
9908 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9909 emit_move_insn (reg, copy_rtx (stored_mode));
9911 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9916 /* round toward zero (truncate) */
9917 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9918 slot = SLOT_CW_TRUNC;
9922 /* round down toward -oo */
9923 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9924 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9925 slot = SLOT_CW_FLOOR;
9929 /* round up toward +oo */
9930 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9932 slot = SLOT_CW_CEIL;
9935 case I387_CW_MASK_PM:
9936 /* mask precision exception for nearbyint() */
9937 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9938 slot = SLOT_CW_MASK_PM;
9950 /* round toward zero (truncate) */
9951 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9952 slot = SLOT_CW_TRUNC;
9956 /* round down toward -oo */
9957 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9958 slot = SLOT_CW_FLOOR;
9962 /* round up toward +oo */
9963 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9964 slot = SLOT_CW_CEIL;
9967 case I387_CW_MASK_PM:
9968 /* mask precision exception for nearbyint() */
9969 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9970 slot = SLOT_CW_MASK_PM;
9978 gcc_assert (slot < MAX_386_STACK_LOCALS);
9980 new_mode = assign_386_stack_local (HImode, slot);
9981 emit_move_insn (new_mode, reg);
9984 /* Output code for INSN to convert a float to a signed int. OPERANDS
9985 are the insn operands. The output may be [HSD]Imode and the input
9986 operand may be [SDX]Fmode. */
9989 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9991 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9992 int dimode_p = GET_MODE (operands[0]) == DImode;
9993 int round_mode = get_attr_i387_cw (insn);
9995 /* Jump through a hoop or two for DImode, since the hardware has no
9996 non-popping instruction. We used to do this a different way, but
9997 that was somewhat fragile and broke with post-reload splitters. */
9998 if ((dimode_p || fisttp) && !stack_top_dies)
9999 output_asm_insn ("fld\t%y1", operands);
10001 gcc_assert (STACK_TOP_P (operands[1]));
10002 gcc_assert (MEM_P (operands[0]));
10003 gcc_assert (GET_MODE (operands[1]) != TFmode);
10006 output_asm_insn ("fisttp%z0\t%0", operands);
10009 if (round_mode != I387_CW_ANY)
10010 output_asm_insn ("fldcw\t%3", operands);
10011 if (stack_top_dies || dimode_p)
10012 output_asm_insn ("fistp%z0\t%0", operands);
10014 output_asm_insn ("fist%z0\t%0", operands);
10015 if (round_mode != I387_CW_ANY)
10016 output_asm_insn ("fldcw\t%2", operands);
10022 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10023 have the values zero or one, indicates the ffreep insn's operand
10024 from the OPERANDS array. */
10026 static const char *
10027 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10029 if (TARGET_USE_FFREEP)
10030 #if HAVE_AS_IX86_FFREEP
10031 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10034 static char retval[] = ".word\t0xc_df";
10035 int regno = REGNO (operands[opno]);
10037 gcc_assert (FP_REGNO_P (regno));
10039 retval[9] = '0' + (regno - FIRST_STACK_REG);
10044 return opno ? "fstp\t%y1" : "fstp\t%y0";
10048 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10049 should be used. UNORDERED_P is true when fucom should be used. */
10052 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10054 int stack_top_dies;
10055 rtx cmp_op0, cmp_op1;
10056 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10060 cmp_op0 = operands[0];
10061 cmp_op1 = operands[1];
10065 cmp_op0 = operands[1];
10066 cmp_op1 = operands[2];
10071 if (GET_MODE (operands[0]) == SFmode)
10073 return "ucomiss\t{%1, %0|%0, %1}";
10075 return "comiss\t{%1, %0|%0, %1}";
10078 return "ucomisd\t{%1, %0|%0, %1}";
10080 return "comisd\t{%1, %0|%0, %1}";
10083 gcc_assert (STACK_TOP_P (cmp_op0));
10085 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10087 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10089 if (stack_top_dies)
10091 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10092 return output_387_ffreep (operands, 1);
10095 return "ftst\n\tfnstsw\t%0";
10098 if (STACK_REG_P (cmp_op1)
10100 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10101 && REGNO (cmp_op1) != FIRST_STACK_REG)
10103 /* If both the top of the 387 stack dies, and the other operand
10104 is also a stack register that dies, then this must be a
10105 `fcompp' float compare */
10109 /* There is no double popping fcomi variant. Fortunately,
10110 eflags is immune from the fstp's cc clobbering. */
10112 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10114 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10115 return output_387_ffreep (operands, 0);
10120 return "fucompp\n\tfnstsw\t%0";
10122 return "fcompp\n\tfnstsw\t%0";
10127 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10129 static const char * const alt[16] =
10131 "fcom%z2\t%y2\n\tfnstsw\t%0",
10132 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10133 "fucom%z2\t%y2\n\tfnstsw\t%0",
10134 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10136 "ficom%z2\t%y2\n\tfnstsw\t%0",
10137 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10141 "fcomi\t{%y1, %0|%0, %y1}",
10142 "fcomip\t{%y1, %0|%0, %y1}",
10143 "fucomi\t{%y1, %0|%0, %y1}",
10144 "fucomip\t{%y1, %0|%0, %y1}",
10155 mask = eflags_p << 3;
10156 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10157 mask |= unordered_p << 1;
10158 mask |= stack_top_dies;
10160 gcc_assert (mask < 16);
10169 ix86_output_addr_vec_elt (FILE *file, int value)
10171 const char *directive = ASM_LONG;
10175 directive = ASM_QUAD;
10177 gcc_assert (!TARGET_64BIT);
10180 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10184 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10186 const char *directive = ASM_LONG;
10189 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10190 directive = ASM_QUAD;
10192 gcc_assert (!TARGET_64BIT);
10194 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10195 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10196 fprintf (file, "%s%s%d-%s%d\n",
10197 directive, LPREFIX, value, LPREFIX, rel);
10198 else if (HAVE_AS_GOTOFF_IN_DATA)
10199 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10201 else if (TARGET_MACHO)
10203 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10204 machopic_output_function_base_name (file);
10205 fprintf(file, "\n");
10209 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10210 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10213 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10217 ix86_expand_clear (rtx dest)
10221 /* We play register width games, which are only valid after reload. */
10222 gcc_assert (reload_completed);
10224 /* Avoid HImode and its attendant prefix byte. */
10225 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10226 dest = gen_rtx_REG (SImode, REGNO (dest));
10227 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10229 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10230 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10232 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10233 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10239 /* X is an unchanging MEM. If it is a constant pool reference, return
10240 the constant pool rtx, else NULL. */
10243 maybe_get_pool_constant (rtx x)
10245 x = ix86_delegitimize_address (XEXP (x, 0));
10247 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10248 return get_pool_constant (x);
10254 ix86_expand_move (enum machine_mode mode, rtx operands[])
10257 enum tls_model model;
10262 if (GET_CODE (op1) == SYMBOL_REF)
10264 model = SYMBOL_REF_TLS_MODEL (op1);
10267 op1 = legitimize_tls_address (op1, model, true);
10268 op1 = force_operand (op1, op0);
10272 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10273 && SYMBOL_REF_DLLIMPORT_P (op1))
10274 op1 = legitimize_dllimport_symbol (op1, false);
10276 else if (GET_CODE (op1) == CONST
10277 && GET_CODE (XEXP (op1, 0)) == PLUS
10278 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10280 rtx addend = XEXP (XEXP (op1, 0), 1);
10281 rtx symbol = XEXP (XEXP (op1, 0), 0);
10284 model = SYMBOL_REF_TLS_MODEL (symbol);
10286 tmp = legitimize_tls_address (symbol, model, true);
10287 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10288 && SYMBOL_REF_DLLIMPORT_P (symbol))
10289 tmp = legitimize_dllimport_symbol (symbol, true);
10293 tmp = force_operand (tmp, NULL);
10294 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10295 op0, 1, OPTAB_DIRECT);
10301 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10303 if (TARGET_MACHO && !TARGET_64BIT)
10308 rtx temp = ((reload_in_progress
10309 || ((op0 && REG_P (op0))
10311 ? op0 : gen_reg_rtx (Pmode));
10312 op1 = machopic_indirect_data_reference (op1, temp);
10313 op1 = machopic_legitimize_pic_address (op1, mode,
10314 temp == op1 ? 0 : temp);
10316 else if (MACHOPIC_INDIRECT)
10317 op1 = machopic_indirect_data_reference (op1, 0);
10325 op1 = force_reg (Pmode, op1);
10326 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10328 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10329 op1 = legitimize_pic_address (op1, reg);
10338 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10339 || !push_operand (op0, mode))
10341 op1 = force_reg (mode, op1);
10343 if (push_operand (op0, mode)
10344 && ! general_no_elim_operand (op1, mode))
10345 op1 = copy_to_mode_reg (mode, op1);
10347 /* Force large constants in 64bit compilation into register
10348 to get them CSEed. */
10349 if (can_create_pseudo_p ()
10350 && (mode == DImode) && TARGET_64BIT
10351 && immediate_operand (op1, mode)
10352 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10353 && !register_operand (op0, mode)
10355 op1 = copy_to_mode_reg (mode, op1);
10357 if (can_create_pseudo_p ()
10358 && FLOAT_MODE_P (mode)
10359 && GET_CODE (op1) == CONST_DOUBLE)
10361 /* If we are loading a floating point constant to a register,
10362 force the value to memory now, since we'll get better code
10363 out the back end. */
10365 op1 = validize_mem (force_const_mem (mode, op1));
10366 if (!register_operand (op0, mode))
10368 rtx temp = gen_reg_rtx (mode);
10369 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10370 emit_move_insn (op0, temp);
10376 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10380 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10382 rtx op0 = operands[0], op1 = operands[1];
10383 unsigned int align = GET_MODE_ALIGNMENT (mode);
10385 /* Force constants other than zero into memory. We do not know how
10386 the instructions used to build constants modify the upper 64 bits
10387 of the register, once we have that information we may be able
10388 to handle some of them more efficiently. */
10389 if (can_create_pseudo_p ()
10390 && register_operand (op0, mode)
10391 && (CONSTANT_P (op1)
10392 || (GET_CODE (op1) == SUBREG
10393 && CONSTANT_P (SUBREG_REG (op1))))
10394 && standard_sse_constant_p (op1) <= 0)
10395 op1 = validize_mem (force_const_mem (mode, op1));
10397 /* TDmode values are passed as TImode on the stack. TImode values
10398 are moved via xmm registers, and moving them to stack can result in
10399 unaligned memory access. Use ix86_expand_vector_move_misalign()
10400 if memory operand is not aligned correctly. */
10401 if (can_create_pseudo_p ()
10402 && (mode == TImode) && !TARGET_64BIT
10403 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10404 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10408 /* ix86_expand_vector_move_misalign() does not like constants ... */
10409 if (CONSTANT_P (op1)
10410 || (GET_CODE (op1) == SUBREG
10411 && CONSTANT_P (SUBREG_REG (op1))))
10412 op1 = validize_mem (force_const_mem (mode, op1));
10414 /* ... nor both arguments in memory. */
10415 if (!register_operand (op0, mode)
10416 && !register_operand (op1, mode))
10417 op1 = force_reg (mode, op1);
10419 tmp[0] = op0; tmp[1] = op1;
10420 ix86_expand_vector_move_misalign (mode, tmp);
10424 /* Make operand1 a register if it isn't already. */
10425 if (can_create_pseudo_p ()
10426 && !register_operand (op0, mode)
10427 && !register_operand (op1, mode))
10429 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10433 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10436 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10437 straight to ix86_expand_vector_move. */
10438 /* Code generation for scalar reg-reg moves of single and double precision data:
10439 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10443 if (x86_sse_partial_reg_dependency == true)
10448 Code generation for scalar loads of double precision data:
10449 if (x86_sse_split_regs == true)
10450 movlpd mem, reg (gas syntax)
10454 Code generation for unaligned packed loads of single precision data
10455 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10456 if (x86_sse_unaligned_move_optimal)
10459 if (x86_sse_partial_reg_dependency == true)
10471 Code generation for unaligned packed loads of double precision data
10472 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10473 if (x86_sse_unaligned_move_optimal)
10476 if (x86_sse_split_regs == true)
10489 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10498 /* If we're optimizing for size, movups is the smallest. */
10501 op0 = gen_lowpart (V4SFmode, op0);
10502 op1 = gen_lowpart (V4SFmode, op1);
10503 emit_insn (gen_sse_movups (op0, op1));
10507 /* ??? If we have typed data, then it would appear that using
10508 movdqu is the only way to get unaligned data loaded with
10510 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10512 op0 = gen_lowpart (V16QImode, op0);
10513 op1 = gen_lowpart (V16QImode, op1);
10514 emit_insn (gen_sse2_movdqu (op0, op1));
10518 if (TARGET_SSE2 && mode == V2DFmode)
10522 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10524 op0 = gen_lowpart (V2DFmode, op0);
10525 op1 = gen_lowpart (V2DFmode, op1);
10526 emit_insn (gen_sse2_movupd (op0, op1));
10530 /* When SSE registers are split into halves, we can avoid
10531 writing to the top half twice. */
10532 if (TARGET_SSE_SPLIT_REGS)
10534 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10539 /* ??? Not sure about the best option for the Intel chips.
10540 The following would seem to satisfy; the register is
10541 entirely cleared, breaking the dependency chain. We
10542 then store to the upper half, with a dependency depth
10543 of one. A rumor has it that Intel recommends two movsd
10544 followed by an unpacklpd, but this is unconfirmed. And
10545 given that the dependency depth of the unpacklpd would
10546 still be one, I'm not sure why this would be better. */
10547 zero = CONST0_RTX (V2DFmode);
10550 m = adjust_address (op1, DFmode, 0);
10551 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10552 m = adjust_address (op1, DFmode, 8);
10553 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10557 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10559 op0 = gen_lowpart (V4SFmode, op0);
10560 op1 = gen_lowpart (V4SFmode, op1);
10561 emit_insn (gen_sse_movups (op0, op1));
10565 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10566 emit_move_insn (op0, CONST0_RTX (mode));
10568 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10570 if (mode != V4SFmode)
10571 op0 = gen_lowpart (V4SFmode, op0);
10572 m = adjust_address (op1, V2SFmode, 0);
10573 emit_insn (gen_sse_loadlps (op0, op0, m));
10574 m = adjust_address (op1, V2SFmode, 8);
10575 emit_insn (gen_sse_loadhps (op0, op0, m));
10578 else if (MEM_P (op0))
10580 /* If we're optimizing for size, movups is the smallest. */
10583 op0 = gen_lowpart (V4SFmode, op0);
10584 op1 = gen_lowpart (V4SFmode, op1);
10585 emit_insn (gen_sse_movups (op0, op1));
10589 /* ??? Similar to above, only less clear because of quote
10590 typeless stores unquote. */
10591 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10592 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10594 op0 = gen_lowpart (V16QImode, op0);
10595 op1 = gen_lowpart (V16QImode, op1);
10596 emit_insn (gen_sse2_movdqu (op0, op1));
10600 if (TARGET_SSE2 && mode == V2DFmode)
10602 m = adjust_address (op0, DFmode, 0);
10603 emit_insn (gen_sse2_storelpd (m, op1));
10604 m = adjust_address (op0, DFmode, 8);
10605 emit_insn (gen_sse2_storehpd (m, op1));
10609 if (mode != V4SFmode)
10610 op1 = gen_lowpart (V4SFmode, op1);
10611 m = adjust_address (op0, V2SFmode, 0);
10612 emit_insn (gen_sse_storelps (m, op1));
10613 m = adjust_address (op0, V2SFmode, 8);
10614 emit_insn (gen_sse_storehps (m, op1));
10618 gcc_unreachable ();
10621 /* Expand a push in MODE. This is some mode for which we do not support
10622 proper push instructions, at least from the registers that we expect
10623 the value to live in. */
10626 ix86_expand_push (enum machine_mode mode, rtx x)
10630 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10631 GEN_INT (-GET_MODE_SIZE (mode)),
10632 stack_pointer_rtx, 1, OPTAB_DIRECT);
10633 if (tmp != stack_pointer_rtx)
10634 emit_move_insn (stack_pointer_rtx, tmp);
10636 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10637 emit_move_insn (tmp, x);
10640 /* Helper function of ix86_fixup_binary_operands to canonicalize
10641 operand order. Returns true if the operands should be swapped. */
10644 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10647 rtx dst = operands[0];
10648 rtx src1 = operands[1];
10649 rtx src2 = operands[2];
10651 /* If the operation is not commutative, we can't do anything. */
10652 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10655 /* Highest priority is that src1 should match dst. */
10656 if (rtx_equal_p (dst, src1))
10658 if (rtx_equal_p (dst, src2))
10661 /* Next highest priority is that immediate constants come second. */
10662 if (immediate_operand (src2, mode))
10664 if (immediate_operand (src1, mode))
10667 /* Lowest priority is that memory references should come second. */
10677 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10678 destination to use for the operation. If different from the true
10679 destination in operands[0], a copy operation will be required. */
10682 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10685 rtx dst = operands[0];
10686 rtx src1 = operands[1];
10687 rtx src2 = operands[2];
10689 /* Canonicalize operand order. */
10690 if (ix86_swap_binary_operands_p (code, mode, operands))
10694 /* It is invalid to swap operands of different modes. */
10695 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10702 /* Both source operands cannot be in memory. */
10703 if (MEM_P (src1) && MEM_P (src2))
10705 /* Optimization: Only read from memory once. */
10706 if (rtx_equal_p (src1, src2))
10708 src2 = force_reg (mode, src2);
10712 src2 = force_reg (mode, src2);
10715 /* If the destination is memory, and we do not have matching source
10716 operands, do things in registers. */
10717 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10718 dst = gen_reg_rtx (mode);
10720 /* Source 1 cannot be a constant. */
10721 if (CONSTANT_P (src1))
10722 src1 = force_reg (mode, src1);
10724 /* Source 1 cannot be a non-matching memory. */
10725 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10726 src1 = force_reg (mode, src1);
10728 operands[1] = src1;
10729 operands[2] = src2;
10733 /* Similarly, but assume that the destination has already been
10734 set up properly. */
10737 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10738 enum machine_mode mode, rtx operands[])
10740 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10741 gcc_assert (dst == operands[0]);
10744 /* Attempt to expand a binary operator. Make the expansion closer to the
10745 actual machine, then just general_operand, which will allow 3 separate
10746 memory references (one output, two input) in a single insn. */
10749 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10752 rtx src1, src2, dst, op, clob;
10754 dst = ix86_fixup_binary_operands (code, mode, operands);
10755 src1 = operands[1];
10756 src2 = operands[2];
10758 /* Emit the instruction. */
10760 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10761 if (reload_in_progress)
10763 /* Reload doesn't know about the flags register, and doesn't know that
10764 it doesn't want to clobber it. We can only do this with PLUS. */
10765 gcc_assert (code == PLUS);
10770 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10771 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10774 /* Fix up the destination if needed. */
10775 if (dst != operands[0])
10776 emit_move_insn (operands[0], dst);
10779 /* Return TRUE or FALSE depending on whether the binary operator meets the
10780 appropriate constraints. */
10783 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10786 rtx dst = operands[0];
10787 rtx src1 = operands[1];
10788 rtx src2 = operands[2];
10790 /* Both source operands cannot be in memory. */
10791 if (MEM_P (src1) && MEM_P (src2))
10794 /* Canonicalize operand order for commutative operators. */
10795 if (ix86_swap_binary_operands_p (code, mode, operands))
10802 /* If the destination is memory, we must have a matching source operand. */
10803 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10806 /* Source 1 cannot be a constant. */
10807 if (CONSTANT_P (src1))
10810 /* Source 1 cannot be a non-matching memory. */
10811 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10817 /* Attempt to expand a unary operator. Make the expansion closer to the
10818 actual machine, then just general_operand, which will allow 2 separate
10819 memory references (one output, one input) in a single insn. */
10822 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10825 int matching_memory;
10826 rtx src, dst, op, clob;
10831 /* If the destination is memory, and we do not have matching source
10832 operands, do things in registers. */
10833 matching_memory = 0;
10836 if (rtx_equal_p (dst, src))
10837 matching_memory = 1;
10839 dst = gen_reg_rtx (mode);
10842 /* When source operand is memory, destination must match. */
10843 if (MEM_P (src) && !matching_memory)
10844 src = force_reg (mode, src);
10846 /* Emit the instruction. */
10848 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10849 if (reload_in_progress || code == NOT)
10851 /* Reload doesn't know about the flags register, and doesn't know that
10852 it doesn't want to clobber it. */
10853 gcc_assert (code == NOT);
10858 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10859 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10862 /* Fix up the destination if needed. */
10863 if (dst != operands[0])
10864 emit_move_insn (operands[0], dst);
10867 /* Return TRUE or FALSE depending on whether the unary operator meets the
10868 appropriate constraints. */
10871 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10872 enum machine_mode mode ATTRIBUTE_UNUSED,
10873 rtx operands[2] ATTRIBUTE_UNUSED)
10875 /* If one of operands is memory, source and destination must match. */
10876 if ((MEM_P (operands[0])
10877 || MEM_P (operands[1]))
10878 && ! rtx_equal_p (operands[0], operands[1]))
10883 /* Post-reload splitter for converting an SF or DFmode value in an
10884 SSE register into an unsigned SImode. */
10887 ix86_split_convert_uns_si_sse (rtx operands[])
10889 enum machine_mode vecmode;
10890 rtx value, large, zero_or_two31, input, two31, x;
10892 large = operands[1];
10893 zero_or_two31 = operands[2];
10894 input = operands[3];
10895 two31 = operands[4];
10896 vecmode = GET_MODE (large);
10897 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10899 /* Load up the value into the low element. We must ensure that the other
10900 elements are valid floats -- zero is the easiest such value. */
10903 if (vecmode == V4SFmode)
10904 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10906 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10910 input = gen_rtx_REG (vecmode, REGNO (input));
10911 emit_move_insn (value, CONST0_RTX (vecmode));
10912 if (vecmode == V4SFmode)
10913 emit_insn (gen_sse_movss (value, value, input));
10915 emit_insn (gen_sse2_movsd (value, value, input));
10918 emit_move_insn (large, two31);
10919 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10921 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10922 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10924 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10925 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10927 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10928 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10930 large = gen_rtx_REG (V4SImode, REGNO (large));
10931 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10933 x = gen_rtx_REG (V4SImode, REGNO (value));
10934 if (vecmode == V4SFmode)
10935 emit_insn (gen_sse2_cvttps2dq (x, value));
10937 emit_insn (gen_sse2_cvttpd2dq (x, value));
10940 emit_insn (gen_xorv4si3 (value, value, large));
10943 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10944 Expects the 64-bit DImode to be supplied in a pair of integral
10945 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10946 -mfpmath=sse, !optimize_size only. */
10949 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10951 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10952 rtx int_xmm, fp_xmm;
10953 rtx biases, exponents;
10956 int_xmm = gen_reg_rtx (V4SImode);
10957 if (TARGET_INTER_UNIT_MOVES)
10958 emit_insn (gen_movdi_to_sse (int_xmm, input));
10959 else if (TARGET_SSE_SPLIT_REGS)
10961 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10962 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10966 x = gen_reg_rtx (V2DImode);
10967 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10968 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10971 x = gen_rtx_CONST_VECTOR (V4SImode,
10972 gen_rtvec (4, GEN_INT (0x43300000UL),
10973 GEN_INT (0x45300000UL),
10974 const0_rtx, const0_rtx));
10975 exponents = validize_mem (force_const_mem (V4SImode, x));
10977 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10978 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10980 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10981 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10982 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10983 (0x1.0p84 + double(fp_value_hi_xmm)).
10984 Note these exponents differ by 32. */
10986 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10988 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10989 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10990 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10991 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10992 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10993 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10994 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10995 biases = validize_mem (force_const_mem (V2DFmode, biases));
10996 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10998 /* Add the upper and lower DFmode values together. */
11000 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11003 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11004 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11005 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11008 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11011 /* Not used, but eases macroization of patterns. */
11013 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11014 rtx input ATTRIBUTE_UNUSED)
11016 gcc_unreachable ();
11019 /* Convert an unsigned SImode value into a DFmode. Only currently used
11020 for SSE, but applicable anywhere. */
11023 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11025 REAL_VALUE_TYPE TWO31r;
11028 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11029 NULL, 1, OPTAB_DIRECT);
11031 fp = gen_reg_rtx (DFmode);
11032 emit_insn (gen_floatsidf2 (fp, x));
11034 real_ldexp (&TWO31r, &dconst1, 31);
11035 x = const_double_from_real_value (TWO31r, DFmode);
11037 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11039 emit_move_insn (target, x);
11042 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11043 32-bit mode; otherwise we have a direct convert instruction. */
11046 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11048 REAL_VALUE_TYPE TWO32r;
11049 rtx fp_lo, fp_hi, x;
11051 fp_lo = gen_reg_rtx (DFmode);
11052 fp_hi = gen_reg_rtx (DFmode);
11054 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11056 real_ldexp (&TWO32r, &dconst1, 32);
11057 x = const_double_from_real_value (TWO32r, DFmode);
11058 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11060 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11062 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11065 emit_move_insn (target, x);
11068 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11069 For x86_32, -mfpmath=sse, !optimize_size only. */
11071 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11073 REAL_VALUE_TYPE ONE16r;
11074 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11076 real_ldexp (&ONE16r, &dconst1, 16);
11077 x = const_double_from_real_value (ONE16r, SFmode);
11078 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11079 NULL, 0, OPTAB_DIRECT);
11080 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11081 NULL, 0, OPTAB_DIRECT);
11082 fp_hi = gen_reg_rtx (SFmode);
11083 fp_lo = gen_reg_rtx (SFmode);
11084 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11085 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11086 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11088 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11090 if (!rtx_equal_p (target, fp_hi))
11091 emit_move_insn (target, fp_hi);
11094 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11095 then replicate the value for all elements of the vector
11099 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11106 v = gen_rtvec (4, value, value, value, value);
11107 return gen_rtx_CONST_VECTOR (V4SImode, v);
11111 v = gen_rtvec (2, value, value);
11112 return gen_rtx_CONST_VECTOR (V2DImode, v);
11116 v = gen_rtvec (4, value, value, value, value);
11118 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11119 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11120 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11124 v = gen_rtvec (2, value, value);
11126 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11127 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11130 gcc_unreachable ();
11134 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11135 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11136 for an SSE register. If VECT is true, then replicate the mask for
11137 all elements of the vector register. If INVERT is true, then create
11138 a mask excluding the sign bit. */
11141 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11143 enum machine_mode vec_mode, imode;
11144 HOST_WIDE_INT hi, lo;
11149 /* Find the sign bit, sign extended to 2*HWI. */
11155 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11156 lo = 0x80000000, hi = lo < 0;
11162 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11163 if (HOST_BITS_PER_WIDE_INT >= 64)
11164 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11166 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11172 vec_mode = VOIDmode;
11173 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11174 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11178 gcc_unreachable ();
11182 lo = ~lo, hi = ~hi;
11184 /* Force this value into the low part of a fp vector constant. */
11185 mask = immed_double_const (lo, hi, imode);
11186 mask = gen_lowpart (mode, mask);
11188 if (vec_mode == VOIDmode)
11189 return force_reg (mode, mask);
11191 v = ix86_build_const_vector (mode, vect, mask);
11192 return force_reg (vec_mode, v);
11195 /* Generate code for floating point ABS or NEG. */
11198 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11201 rtx mask, set, use, clob, dst, src;
11202 bool use_sse = false;
11203 bool vector_mode = VECTOR_MODE_P (mode);
11204 enum machine_mode elt_mode = mode;
11208 elt_mode = GET_MODE_INNER (mode);
11211 else if (mode == TFmode)
11213 else if (TARGET_SSE_MATH)
11214 use_sse = SSE_FLOAT_MODE_P (mode);
11216 /* NEG and ABS performed with SSE use bitwise mask operations.
11217 Create the appropriate mask now. */
11219 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11228 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11229 set = gen_rtx_SET (VOIDmode, dst, set);
11234 set = gen_rtx_fmt_e (code, mode, src);
11235 set = gen_rtx_SET (VOIDmode, dst, set);
11238 use = gen_rtx_USE (VOIDmode, mask);
11239 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11240 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11241 gen_rtvec (3, set, use, clob)));
11248 /* Expand a copysign operation. Special case operand 0 being a constant. */
11251 ix86_expand_copysign (rtx operands[])
11253 enum machine_mode mode;
11254 rtx dest, op0, op1, mask, nmask;
11256 dest = operands[0];
11260 mode = GET_MODE (dest);
11262 if (GET_CODE (op0) == CONST_DOUBLE)
11264 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11266 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11267 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11269 if (mode == SFmode || mode == DFmode)
11271 enum machine_mode vmode;
11273 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11275 if (op0 == CONST0_RTX (mode))
11276 op0 = CONST0_RTX (vmode);
11281 if (mode == SFmode)
11282 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11283 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11285 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11287 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11290 else if (op0 != CONST0_RTX (mode))
11291 op0 = force_reg (mode, op0);
11293 mask = ix86_build_signbit_mask (mode, 0, 0);
11295 if (mode == SFmode)
11296 copysign_insn = gen_copysignsf3_const;
11297 else if (mode == DFmode)
11298 copysign_insn = gen_copysigndf3_const;
11300 copysign_insn = gen_copysigntf3_const;
11302 emit_insn (copysign_insn (dest, op0, op1, mask));
11306 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11308 nmask = ix86_build_signbit_mask (mode, 0, 1);
11309 mask = ix86_build_signbit_mask (mode, 0, 0);
11311 if (mode == SFmode)
11312 copysign_insn = gen_copysignsf3_var;
11313 else if (mode == DFmode)
11314 copysign_insn = gen_copysigndf3_var;
11316 copysign_insn = gen_copysigntf3_var;
11318 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11322 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11323 be a constant, and so has already been expanded into a vector constant. */
11326 ix86_split_copysign_const (rtx operands[])
11328 enum machine_mode mode, vmode;
11329 rtx dest, op0, op1, mask, x;
11331 dest = operands[0];
11334 mask = operands[3];
11336 mode = GET_MODE (dest);
11337 vmode = GET_MODE (mask);
11339 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11340 x = gen_rtx_AND (vmode, dest, mask);
11341 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11343 if (op0 != CONST0_RTX (vmode))
11345 x = gen_rtx_IOR (vmode, dest, op0);
11346 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11350 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11351 so we have to do two masks. */
11354 ix86_split_copysign_var (rtx operands[])
11356 enum machine_mode mode, vmode;
11357 rtx dest, scratch, op0, op1, mask, nmask, x;
11359 dest = operands[0];
11360 scratch = operands[1];
11363 nmask = operands[4];
11364 mask = operands[5];
11366 mode = GET_MODE (dest);
11367 vmode = GET_MODE (mask);
11369 if (rtx_equal_p (op0, op1))
11371 /* Shouldn't happen often (it's useless, obviously), but when it does
11372 we'd generate incorrect code if we continue below. */
11373 emit_move_insn (dest, op0);
11377 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11379 gcc_assert (REGNO (op1) == REGNO (scratch));
11381 x = gen_rtx_AND (vmode, scratch, mask);
11382 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11385 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11386 x = gen_rtx_NOT (vmode, dest);
11387 x = gen_rtx_AND (vmode, x, op0);
11388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11392 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11394 x = gen_rtx_AND (vmode, scratch, mask);
11396 else /* alternative 2,4 */
11398 gcc_assert (REGNO (mask) == REGNO (scratch));
11399 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11400 x = gen_rtx_AND (vmode, scratch, op1);
11402 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11404 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11406 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11407 x = gen_rtx_AND (vmode, dest, nmask);
11409 else /* alternative 3,4 */
11411 gcc_assert (REGNO (nmask) == REGNO (dest));
11413 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11414 x = gen_rtx_AND (vmode, dest, op0);
11416 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11419 x = gen_rtx_IOR (vmode, dest, scratch);
11420 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11423 /* Return TRUE or FALSE depending on whether the first SET in INSN
11424 has source and destination with matching CC modes, and that the
11425 CC mode is at least as constrained as REQ_MODE. */
11428 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11431 enum machine_mode set_mode;
11433 set = PATTERN (insn);
11434 if (GET_CODE (set) == PARALLEL)
11435 set = XVECEXP (set, 0, 0);
11436 gcc_assert (GET_CODE (set) == SET);
11437 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11439 set_mode = GET_MODE (SET_DEST (set));
11443 if (req_mode != CCNOmode
11444 && (req_mode != CCmode
11445 || XEXP (SET_SRC (set), 1) != const0_rtx))
11449 if (req_mode == CCGCmode)
11453 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11457 if (req_mode == CCZmode)
11464 gcc_unreachable ();
11467 return (GET_MODE (SET_SRC (set)) == set_mode);
11470 /* Generate insn patterns to do an integer compare of OPERANDS. */
11473 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11475 enum machine_mode cmpmode;
11478 cmpmode = SELECT_CC_MODE (code, op0, op1);
11479 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11481 /* This is very simple, but making the interface the same as in the
11482 FP case makes the rest of the code easier. */
11483 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11484 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11486 /* Return the test that should be put into the flags user, i.e.
11487 the bcc, scc, or cmov instruction. */
11488 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11491 /* Figure out whether to use ordered or unordered fp comparisons.
11492 Return the appropriate mode to use. */
11495 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11497 /* ??? In order to make all comparisons reversible, we do all comparisons
11498 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11499 all forms trapping and nontrapping comparisons, we can make inequality
11500 comparisons trapping again, since it results in better code when using
11501 FCOM based compares. */
11502 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11506 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11508 enum machine_mode mode = GET_MODE (op0);
11510 if (SCALAR_FLOAT_MODE_P (mode))
11512 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11513 return ix86_fp_compare_mode (code);
11518 /* Only zero flag is needed. */
11519 case EQ: /* ZF=0 */
11520 case NE: /* ZF!=0 */
11522 /* Codes needing carry flag. */
11523 case GEU: /* CF=0 */
11524 case LTU: /* CF=1 */
11525 /* Detect overflow checks. They need just the carry flag. */
11526 if (GET_CODE (op0) == PLUS
11527 && rtx_equal_p (op1, XEXP (op0, 0)))
11531 case GTU: /* CF=0 & ZF=0 */
11532 case LEU: /* CF=1 | ZF=1 */
11533 /* Detect overflow checks. They need just the carry flag. */
11534 if (GET_CODE (op0) == MINUS
11535 && rtx_equal_p (op1, XEXP (op0, 0)))
11539 /* Codes possibly doable only with sign flag when
11540 comparing against zero. */
11541 case GE: /* SF=OF or SF=0 */
11542 case LT: /* SF<>OF or SF=1 */
11543 if (op1 == const0_rtx)
11546 /* For other cases Carry flag is not required. */
11548 /* Codes doable only with sign flag when comparing
11549 against zero, but we miss jump instruction for it
11550 so we need to use relational tests against overflow
11551 that thus needs to be zero. */
11552 case GT: /* ZF=0 & SF=OF */
11553 case LE: /* ZF=1 | SF<>OF */
11554 if (op1 == const0_rtx)
11558 /* strcmp pattern do (use flags) and combine may ask us for proper
11563 gcc_unreachable ();
11567 /* Return the fixed registers used for condition codes. */
11570 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11577 /* If two condition code modes are compatible, return a condition code
11578 mode which is compatible with both. Otherwise, return
11581 static enum machine_mode
11582 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11587 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11590 if ((m1 == CCGCmode && m2 == CCGOCmode)
11591 || (m1 == CCGOCmode && m2 == CCGCmode))
11597 gcc_unreachable ();
11627 /* These are only compatible with themselves, which we already
11633 /* Split comparison code CODE into comparisons we can do using branch
11634 instructions. BYPASS_CODE is comparison code for branch that will
11635 branch around FIRST_CODE and SECOND_CODE. If some of branches
11636 is not required, set value to UNKNOWN.
11637 We never require more than two branches. */
11640 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11641 enum rtx_code *first_code,
11642 enum rtx_code *second_code)
11644 *first_code = code;
11645 *bypass_code = UNKNOWN;
11646 *second_code = UNKNOWN;
11648 /* The fcomi comparison sets flags as follows:
11658 case GT: /* GTU - CF=0 & ZF=0 */
11659 case GE: /* GEU - CF=0 */
11660 case ORDERED: /* PF=0 */
11661 case UNORDERED: /* PF=1 */
11662 case UNEQ: /* EQ - ZF=1 */
11663 case UNLT: /* LTU - CF=1 */
11664 case UNLE: /* LEU - CF=1 | ZF=1 */
11665 case LTGT: /* EQ - ZF=0 */
11667 case LT: /* LTU - CF=1 - fails on unordered */
11668 *first_code = UNLT;
11669 *bypass_code = UNORDERED;
11671 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11672 *first_code = UNLE;
11673 *bypass_code = UNORDERED;
11675 case EQ: /* EQ - ZF=1 - fails on unordered */
11676 *first_code = UNEQ;
11677 *bypass_code = UNORDERED;
11679 case NE: /* NE - ZF=0 - fails on unordered */
11680 *first_code = LTGT;
11681 *second_code = UNORDERED;
11683 case UNGE: /* GEU - CF=0 - fails on unordered */
11685 *second_code = UNORDERED;
11687 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11689 *second_code = UNORDERED;
11692 gcc_unreachable ();
11694 if (!TARGET_IEEE_FP)
11696 *second_code = UNKNOWN;
11697 *bypass_code = UNKNOWN;
11701 /* Return cost of comparison done fcom + arithmetics operations on AX.
11702 All following functions do use number of instructions as a cost metrics.
11703 In future this should be tweaked to compute bytes for optimize_size and
11704 take into account performance of various instructions on various CPUs. */
11706 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11708 if (!TARGET_IEEE_FP)
11710 /* The cost of code output by ix86_expand_fp_compare. */
11734 gcc_unreachable ();
11738 /* Return cost of comparison done using fcomi operation.
11739 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11741 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11743 enum rtx_code bypass_code, first_code, second_code;
11744 /* Return arbitrarily high cost when instruction is not supported - this
11745 prevents gcc from using it. */
11748 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11749 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11752 /* Return cost of comparison done using sahf operation.
11753 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11755 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11757 enum rtx_code bypass_code, first_code, second_code;
11758 /* Return arbitrarily high cost when instruction is not preferred - this
11759 avoids gcc from using it. */
11760 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11762 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11763 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11766 /* Compute cost of the comparison done using any method.
11767 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11769 ix86_fp_comparison_cost (enum rtx_code code)
11771 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11774 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11775 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11777 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11778 if (min > sahf_cost)
11780 if (min > fcomi_cost)
11785 /* Return true if we should use an FCOMI instruction for this
11789 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11791 enum rtx_code swapped_code = swap_condition (code);
11793 return ((ix86_fp_comparison_cost (code)
11794 == ix86_fp_comparison_fcomi_cost (code))
11795 || (ix86_fp_comparison_cost (swapped_code)
11796 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11799 /* Swap, force into registers, or otherwise massage the two operands
11800 to a fp comparison. The operands are updated in place; the new
11801 comparison code is returned. */
11803 static enum rtx_code
11804 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11806 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11807 rtx op0 = *pop0, op1 = *pop1;
11808 enum machine_mode op_mode = GET_MODE (op0);
11809 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11811 /* All of the unordered compare instructions only work on registers.
11812 The same is true of the fcomi compare instructions. The XFmode
11813 compare instructions require registers except when comparing
11814 against zero or when converting operand 1 from fixed point to
11818 && (fpcmp_mode == CCFPUmode
11819 || (op_mode == XFmode
11820 && ! (standard_80387_constant_p (op0) == 1
11821 || standard_80387_constant_p (op1) == 1)
11822 && GET_CODE (op1) != FLOAT)
11823 || ix86_use_fcomi_compare (code)))
11825 op0 = force_reg (op_mode, op0);
11826 op1 = force_reg (op_mode, op1);
11830 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11831 things around if they appear profitable, otherwise force op0
11832 into a register. */
11834 if (standard_80387_constant_p (op0) == 0
11836 && ! (standard_80387_constant_p (op1) == 0
11840 tmp = op0, op0 = op1, op1 = tmp;
11841 code = swap_condition (code);
11845 op0 = force_reg (op_mode, op0);
11847 if (CONSTANT_P (op1))
11849 int tmp = standard_80387_constant_p (op1);
11851 op1 = validize_mem (force_const_mem (op_mode, op1));
11855 op1 = force_reg (op_mode, op1);
11858 op1 = force_reg (op_mode, op1);
11862 /* Try to rearrange the comparison to make it cheaper. */
11863 if (ix86_fp_comparison_cost (code)
11864 > ix86_fp_comparison_cost (swap_condition (code))
11865 && (REG_P (op1) || can_create_pseudo_p ()))
11868 tmp = op0, op0 = op1, op1 = tmp;
11869 code = swap_condition (code);
11871 op0 = force_reg (op_mode, op0);
11879 /* Convert comparison codes we use to represent FP comparison to integer
11880 code that will result in proper branch. Return UNKNOWN if no such code
11884 ix86_fp_compare_code_to_integer (enum rtx_code code)
11913 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11916 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11917 rtx *second_test, rtx *bypass_test)
11919 enum machine_mode fpcmp_mode, intcmp_mode;
11921 int cost = ix86_fp_comparison_cost (code);
11922 enum rtx_code bypass_code, first_code, second_code;
11924 fpcmp_mode = ix86_fp_compare_mode (code);
11925 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11928 *second_test = NULL_RTX;
11930 *bypass_test = NULL_RTX;
11932 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11934 /* Do fcomi/sahf based test when profitable. */
11935 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11936 && (bypass_code == UNKNOWN || bypass_test)
11937 && (second_code == UNKNOWN || second_test))
11939 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11940 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11946 gcc_assert (TARGET_SAHF);
11949 scratch = gen_reg_rtx (HImode);
11950 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11952 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11955 /* The FP codes work out to act like unsigned. */
11956 intcmp_mode = fpcmp_mode;
11958 if (bypass_code != UNKNOWN)
11959 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11960 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11962 if (second_code != UNKNOWN)
11963 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11964 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11969 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11970 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11971 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11973 scratch = gen_reg_rtx (HImode);
11974 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11976 /* In the unordered case, we have to check C2 for NaN's, which
11977 doesn't happen to work out to anything nice combination-wise.
11978 So do some bit twiddling on the value we've got in AH to come
11979 up with an appropriate set of condition codes. */
11981 intcmp_mode = CCNOmode;
11986 if (code == GT || !TARGET_IEEE_FP)
11988 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11993 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11994 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11995 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11996 intcmp_mode = CCmode;
12002 if (code == LT && TARGET_IEEE_FP)
12004 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12005 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12006 intcmp_mode = CCmode;
12011 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12017 if (code == GE || !TARGET_IEEE_FP)
12019 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12024 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12025 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12032 if (code == LE && TARGET_IEEE_FP)
12034 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12035 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12036 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12037 intcmp_mode = CCmode;
12042 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12048 if (code == EQ && TARGET_IEEE_FP)
12050 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12051 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12052 intcmp_mode = CCmode;
12057 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12064 if (code == NE && TARGET_IEEE_FP)
12066 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12067 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12079 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12088 gcc_unreachable ();
12092 /* Return the test that should be put into the flags user, i.e.
12093 the bcc, scc, or cmov instruction. */
12094 return gen_rtx_fmt_ee (code, VOIDmode,
12095 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12100 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12103 op0 = ix86_compare_op0;
12104 op1 = ix86_compare_op1;
12107 *second_test = NULL_RTX;
12109 *bypass_test = NULL_RTX;
12111 if (ix86_compare_emitted)
12113 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12114 ix86_compare_emitted = NULL_RTX;
12116 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12118 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12119 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12120 second_test, bypass_test);
12123 ret = ix86_expand_int_compare (code, op0, op1);
12128 /* Return true if the CODE will result in nontrivial jump sequence. */
12130 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12132 enum rtx_code bypass_code, first_code, second_code;
12135 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12136 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12140 ix86_expand_branch (enum rtx_code code, rtx label)
12144 /* If we have emitted a compare insn, go straight to simple.
12145 ix86_expand_compare won't emit anything if ix86_compare_emitted
12147 if (ix86_compare_emitted)
12150 switch (GET_MODE (ix86_compare_op0))
12156 tmp = ix86_expand_compare (code, NULL, NULL);
12157 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12158 gen_rtx_LABEL_REF (VOIDmode, label),
12160 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12169 enum rtx_code bypass_code, first_code, second_code;
12171 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12172 &ix86_compare_op1);
12174 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12176 /* Check whether we will use the natural sequence with one jump. If
12177 so, we can expand jump early. Otherwise delay expansion by
12178 creating compound insn to not confuse optimizers. */
12179 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12181 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12182 gen_rtx_LABEL_REF (VOIDmode, label),
12183 pc_rtx, NULL_RTX, NULL_RTX);
12187 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12188 ix86_compare_op0, ix86_compare_op1);
12189 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12190 gen_rtx_LABEL_REF (VOIDmode, label),
12192 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12194 use_fcomi = ix86_use_fcomi_compare (code);
12195 vec = rtvec_alloc (3 + !use_fcomi);
12196 RTVEC_ELT (vec, 0) = tmp;
12198 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12200 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12203 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12205 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12214 /* Expand DImode branch into multiple compare+branch. */
12216 rtx lo[2], hi[2], label2;
12217 enum rtx_code code1, code2, code3;
12218 enum machine_mode submode;
12220 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12222 tmp = ix86_compare_op0;
12223 ix86_compare_op0 = ix86_compare_op1;
12224 ix86_compare_op1 = tmp;
12225 code = swap_condition (code);
12227 if (GET_MODE (ix86_compare_op0) == DImode)
12229 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12230 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12235 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12236 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12240 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12241 avoid two branches. This costs one extra insn, so disable when
12242 optimizing for size. */
12244 if ((code == EQ || code == NE)
12246 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12251 if (hi[1] != const0_rtx)
12252 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12253 NULL_RTX, 0, OPTAB_WIDEN);
12256 if (lo[1] != const0_rtx)
12257 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12258 NULL_RTX, 0, OPTAB_WIDEN);
12260 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12261 NULL_RTX, 0, OPTAB_WIDEN);
12263 ix86_compare_op0 = tmp;
12264 ix86_compare_op1 = const0_rtx;
12265 ix86_expand_branch (code, label);
12269 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12270 op1 is a constant and the low word is zero, then we can just
12271 examine the high word. Similarly for low word -1 and
12272 less-or-equal-than or greater-than. */
12274 if (CONST_INT_P (hi[1]))
12277 case LT: case LTU: case GE: case GEU:
12278 if (lo[1] == const0_rtx)
12280 ix86_compare_op0 = hi[0];
12281 ix86_compare_op1 = hi[1];
12282 ix86_expand_branch (code, label);
12286 case LE: case LEU: case GT: case GTU:
12287 if (lo[1] == constm1_rtx)
12289 ix86_compare_op0 = hi[0];
12290 ix86_compare_op1 = hi[1];
12291 ix86_expand_branch (code, label);
12299 /* Otherwise, we need two or three jumps. */
12301 label2 = gen_label_rtx ();
12304 code2 = swap_condition (code);
12305 code3 = unsigned_condition (code);
12309 case LT: case GT: case LTU: case GTU:
12312 case LE: code1 = LT; code2 = GT; break;
12313 case GE: code1 = GT; code2 = LT; break;
12314 case LEU: code1 = LTU; code2 = GTU; break;
12315 case GEU: code1 = GTU; code2 = LTU; break;
12317 case EQ: code1 = UNKNOWN; code2 = NE; break;
12318 case NE: code2 = UNKNOWN; break;
12321 gcc_unreachable ();
12326 * if (hi(a) < hi(b)) goto true;
12327 * if (hi(a) > hi(b)) goto false;
12328 * if (lo(a) < lo(b)) goto true;
12332 ix86_compare_op0 = hi[0];
12333 ix86_compare_op1 = hi[1];
12335 if (code1 != UNKNOWN)
12336 ix86_expand_branch (code1, label);
12337 if (code2 != UNKNOWN)
12338 ix86_expand_branch (code2, label2);
12340 ix86_compare_op0 = lo[0];
12341 ix86_compare_op1 = lo[1];
12342 ix86_expand_branch (code3, label);
12344 if (code2 != UNKNOWN)
12345 emit_label (label2);
12350 gcc_unreachable ();
12354 /* Split branch based on floating point condition. */
12356 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12357 rtx target1, rtx target2, rtx tmp, rtx pushed)
12359 rtx second, bypass;
12360 rtx label = NULL_RTX;
12362 int bypass_probability = -1, second_probability = -1, probability = -1;
12365 if (target2 != pc_rtx)
12368 code = reverse_condition_maybe_unordered (code);
12373 condition = ix86_expand_fp_compare (code, op1, op2,
12374 tmp, &second, &bypass);
12376 /* Remove pushed operand from stack. */
12378 ix86_free_from_memory (GET_MODE (pushed));
12380 if (split_branch_probability >= 0)
12382 /* Distribute the probabilities across the jumps.
12383 Assume the BYPASS and SECOND to be always test
12385 probability = split_branch_probability;
12387 /* Value of 1 is low enough to make no need for probability
12388 to be updated. Later we may run some experiments and see
12389 if unordered values are more frequent in practice. */
12391 bypass_probability = 1;
12393 second_probability = 1;
12395 if (bypass != NULL_RTX)
12397 label = gen_label_rtx ();
12398 i = emit_jump_insn (gen_rtx_SET
12400 gen_rtx_IF_THEN_ELSE (VOIDmode,
12402 gen_rtx_LABEL_REF (VOIDmode,
12405 if (bypass_probability >= 0)
12407 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12408 GEN_INT (bypass_probability),
12411 i = emit_jump_insn (gen_rtx_SET
12413 gen_rtx_IF_THEN_ELSE (VOIDmode,
12414 condition, target1, target2)));
12415 if (probability >= 0)
12417 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12418 GEN_INT (probability),
12420 if (second != NULL_RTX)
12422 i = emit_jump_insn (gen_rtx_SET
12424 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12426 if (second_probability >= 0)
12428 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12429 GEN_INT (second_probability),
12432 if (label != NULL_RTX)
12433 emit_label (label);
12437 ix86_expand_setcc (enum rtx_code code, rtx dest)
12439 rtx ret, tmp, tmpreg, equiv;
12440 rtx second_test, bypass_test;
12442 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12443 return 0; /* FAIL */
12445 gcc_assert (GET_MODE (dest) == QImode);
12447 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12448 PUT_MODE (ret, QImode);
12453 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12454 if (bypass_test || second_test)
12456 rtx test = second_test;
12458 rtx tmp2 = gen_reg_rtx (QImode);
12461 gcc_assert (!second_test);
12462 test = bypass_test;
12464 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12466 PUT_MODE (test, QImode);
12467 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12470 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12472 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12475 /* Attach a REG_EQUAL note describing the comparison result. */
12476 if (ix86_compare_op0 && ix86_compare_op1)
12478 equiv = simplify_gen_relational (code, QImode,
12479 GET_MODE (ix86_compare_op0),
12480 ix86_compare_op0, ix86_compare_op1);
12481 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12484 return 1; /* DONE */
12487 /* Expand comparison setting or clearing carry flag. Return true when
12488 successful and set pop for the operation. */
12490 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12492 enum machine_mode mode =
12493 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12495 /* Do not handle DImode compares that go through special path. */
12496 if (mode == (TARGET_64BIT ? TImode : DImode))
12499 if (SCALAR_FLOAT_MODE_P (mode))
12501 rtx second_test = NULL, bypass_test = NULL;
12502 rtx compare_op, compare_seq;
12504 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12506 /* Shortcut: following common codes never translate
12507 into carry flag compares. */
12508 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12509 || code == ORDERED || code == UNORDERED)
12512 /* These comparisons require zero flag; swap operands so they won't. */
12513 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12514 && !TARGET_IEEE_FP)
12519 code = swap_condition (code);
12522 /* Try to expand the comparison and verify that we end up with
12523 carry flag based comparison. This fails to be true only when
12524 we decide to expand comparison using arithmetic that is not
12525 too common scenario. */
12527 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12528 &second_test, &bypass_test);
12529 compare_seq = get_insns ();
12532 if (second_test || bypass_test)
12535 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12536 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12537 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12539 code = GET_CODE (compare_op);
12541 if (code != LTU && code != GEU)
12544 emit_insn (compare_seq);
12549 if (!INTEGRAL_MODE_P (mode))
12558 /* Convert a==0 into (unsigned)a<1. */
12561 if (op1 != const0_rtx)
12564 code = (code == EQ ? LTU : GEU);
12567 /* Convert a>b into b<a or a>=b-1. */
12570 if (CONST_INT_P (op1))
12572 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12573 /* Bail out on overflow. We still can swap operands but that
12574 would force loading of the constant into register. */
12575 if (op1 == const0_rtx
12576 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12578 code = (code == GTU ? GEU : LTU);
12585 code = (code == GTU ? LTU : GEU);
12589 /* Convert a>=0 into (unsigned)a<0x80000000. */
12592 if (mode == DImode || op1 != const0_rtx)
12594 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12595 code = (code == LT ? GEU : LTU);
12599 if (mode == DImode || op1 != constm1_rtx)
12601 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12602 code = (code == LE ? GEU : LTU);
12608 /* Swapping operands may cause constant to appear as first operand. */
12609 if (!nonimmediate_operand (op0, VOIDmode))
12611 if (!can_create_pseudo_p ())
12613 op0 = force_reg (mode, op0);
12615 ix86_compare_op0 = op0;
12616 ix86_compare_op1 = op1;
12617 *pop = ix86_expand_compare (code, NULL, NULL);
12618 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12623 ix86_expand_int_movcc (rtx operands[])
12625 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12626 rtx compare_seq, compare_op;
12627 rtx second_test, bypass_test;
12628 enum machine_mode mode = GET_MODE (operands[0]);
12629 bool sign_bit_compare_p = false;;
12632 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12633 compare_seq = get_insns ();
12636 compare_code = GET_CODE (compare_op);
12638 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12639 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12640 sign_bit_compare_p = true;
12642 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12643 HImode insns, we'd be swallowed in word prefix ops. */
12645 if ((mode != HImode || TARGET_FAST_PREFIX)
12646 && (mode != (TARGET_64BIT ? TImode : DImode))
12647 && CONST_INT_P (operands[2])
12648 && CONST_INT_P (operands[3]))
12650 rtx out = operands[0];
12651 HOST_WIDE_INT ct = INTVAL (operands[2]);
12652 HOST_WIDE_INT cf = INTVAL (operands[3]);
12653 HOST_WIDE_INT diff;
12656 /* Sign bit compares are better done using shifts than we do by using
12658 if (sign_bit_compare_p
12659 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12660 ix86_compare_op1, &compare_op))
12662 /* Detect overlap between destination and compare sources. */
12665 if (!sign_bit_compare_p)
12667 bool fpcmp = false;
12669 compare_code = GET_CODE (compare_op);
12671 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12672 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12675 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12678 /* To simplify rest of code, restrict to the GEU case. */
12679 if (compare_code == LTU)
12681 HOST_WIDE_INT tmp = ct;
12684 compare_code = reverse_condition (compare_code);
12685 code = reverse_condition (code);
12690 PUT_CODE (compare_op,
12691 reverse_condition_maybe_unordered
12692 (GET_CODE (compare_op)));
12694 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12698 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12699 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12700 tmp = gen_reg_rtx (mode);
12702 if (mode == DImode)
12703 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12705 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12709 if (code == GT || code == GE)
12710 code = reverse_condition (code);
12713 HOST_WIDE_INT tmp = ct;
12718 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12719 ix86_compare_op1, VOIDmode, 0, -1);
12732 tmp = expand_simple_binop (mode, PLUS,
12734 copy_rtx (tmp), 1, OPTAB_DIRECT);
12745 tmp = expand_simple_binop (mode, IOR,
12747 copy_rtx (tmp), 1, OPTAB_DIRECT);
12749 else if (diff == -1 && ct)
12759 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12761 tmp = expand_simple_binop (mode, PLUS,
12762 copy_rtx (tmp), GEN_INT (cf),
12763 copy_rtx (tmp), 1, OPTAB_DIRECT);
12771 * andl cf - ct, dest
12781 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12784 tmp = expand_simple_binop (mode, AND,
12786 gen_int_mode (cf - ct, mode),
12787 copy_rtx (tmp), 1, OPTAB_DIRECT);
12789 tmp = expand_simple_binop (mode, PLUS,
12790 copy_rtx (tmp), GEN_INT (ct),
12791 copy_rtx (tmp), 1, OPTAB_DIRECT);
12794 if (!rtx_equal_p (tmp, out))
12795 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12797 return 1; /* DONE */
12802 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12805 tmp = ct, ct = cf, cf = tmp;
12808 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12810 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12812 /* We may be reversing unordered compare to normal compare, that
12813 is not valid in general (we may convert non-trapping condition
12814 to trapping one), however on i386 we currently emit all
12815 comparisons unordered. */
12816 compare_code = reverse_condition_maybe_unordered (compare_code);
12817 code = reverse_condition_maybe_unordered (code);
12821 compare_code = reverse_condition (compare_code);
12822 code = reverse_condition (code);
12826 compare_code = UNKNOWN;
12827 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12828 && CONST_INT_P (ix86_compare_op1))
12830 if (ix86_compare_op1 == const0_rtx
12831 && (code == LT || code == GE))
12832 compare_code = code;
12833 else if (ix86_compare_op1 == constm1_rtx)
12837 else if (code == GT)
12842 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12843 if (compare_code != UNKNOWN
12844 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12845 && (cf == -1 || ct == -1))
12847 /* If lea code below could be used, only optimize
12848 if it results in a 2 insn sequence. */
12850 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12851 || diff == 3 || diff == 5 || diff == 9)
12852 || (compare_code == LT && ct == -1)
12853 || (compare_code == GE && cf == -1))
12856 * notl op1 (if necessary)
12864 code = reverse_condition (code);
12867 out = emit_store_flag (out, code, ix86_compare_op0,
12868 ix86_compare_op1, VOIDmode, 0, -1);
12870 out = expand_simple_binop (mode, IOR,
12872 out, 1, OPTAB_DIRECT);
12873 if (out != operands[0])
12874 emit_move_insn (operands[0], out);
12876 return 1; /* DONE */
12881 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12882 || diff == 3 || diff == 5 || diff == 9)
12883 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12885 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12891 * lea cf(dest*(ct-cf)),dest
12895 * This also catches the degenerate setcc-only case.
12901 out = emit_store_flag (out, code, ix86_compare_op0,
12902 ix86_compare_op1, VOIDmode, 0, 1);
12905 /* On x86_64 the lea instruction operates on Pmode, so we need
12906 to get arithmetics done in proper mode to match. */
12908 tmp = copy_rtx (out);
12912 out1 = copy_rtx (out);
12913 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12917 tmp = gen_rtx_PLUS (mode, tmp, out1);
12923 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12926 if (!rtx_equal_p (tmp, out))
12929 out = force_operand (tmp, copy_rtx (out));
12931 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12933 if (!rtx_equal_p (out, operands[0]))
12934 emit_move_insn (operands[0], copy_rtx (out));
12936 return 1; /* DONE */
12940 * General case: Jumpful:
12941 * xorl dest,dest cmpl op1, op2
12942 * cmpl op1, op2 movl ct, dest
12943 * setcc dest jcc 1f
12944 * decl dest movl cf, dest
12945 * andl (cf-ct),dest 1:
12948 * Size 20. Size 14.
12950 * This is reasonably steep, but branch mispredict costs are
12951 * high on modern cpus, so consider failing only if optimizing
12955 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12956 && BRANCH_COST >= 2)
12960 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12965 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12967 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12969 /* We may be reversing unordered compare to normal compare,
12970 that is not valid in general (we may convert non-trapping
12971 condition to trapping one), however on i386 we currently
12972 emit all comparisons unordered. */
12973 code = reverse_condition_maybe_unordered (code);
12977 code = reverse_condition (code);
12978 if (compare_code != UNKNOWN)
12979 compare_code = reverse_condition (compare_code);
12983 if (compare_code != UNKNOWN)
12985 /* notl op1 (if needed)
12990 For x < 0 (resp. x <= -1) there will be no notl,
12991 so if possible swap the constants to get rid of the
12993 True/false will be -1/0 while code below (store flag
12994 followed by decrement) is 0/-1, so the constants need
12995 to be exchanged once more. */
12997 if (compare_code == GE || !cf)
12999 code = reverse_condition (code);
13004 HOST_WIDE_INT tmp = cf;
13009 out = emit_store_flag (out, code, ix86_compare_op0,
13010 ix86_compare_op1, VOIDmode, 0, -1);
13014 out = emit_store_flag (out, code, ix86_compare_op0,
13015 ix86_compare_op1, VOIDmode, 0, 1);
13017 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13018 copy_rtx (out), 1, OPTAB_DIRECT);
13021 out = expand_simple_binop (mode, AND, copy_rtx (out),
13022 gen_int_mode (cf - ct, mode),
13023 copy_rtx (out), 1, OPTAB_DIRECT);
13025 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13026 copy_rtx (out), 1, OPTAB_DIRECT);
13027 if (!rtx_equal_p (out, operands[0]))
13028 emit_move_insn (operands[0], copy_rtx (out));
13030 return 1; /* DONE */
13034 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13036 /* Try a few things more with specific constants and a variable. */
13039 rtx var, orig_out, out, tmp;
13041 if (BRANCH_COST <= 2)
13042 return 0; /* FAIL */
13044 /* If one of the two operands is an interesting constant, load a
13045 constant with the above and mask it in with a logical operation. */
13047 if (CONST_INT_P (operands[2]))
13050 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13051 operands[3] = constm1_rtx, op = and_optab;
13052 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13053 operands[3] = const0_rtx, op = ior_optab;
13055 return 0; /* FAIL */
13057 else if (CONST_INT_P (operands[3]))
13060 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13061 operands[2] = constm1_rtx, op = and_optab;
13062 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13063 operands[2] = const0_rtx, op = ior_optab;
13065 return 0; /* FAIL */
13068 return 0; /* FAIL */
13070 orig_out = operands[0];
13071 tmp = gen_reg_rtx (mode);
13074 /* Recurse to get the constant loaded. */
13075 if (ix86_expand_int_movcc (operands) == 0)
13076 return 0; /* FAIL */
13078 /* Mask in the interesting variable. */
13079 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13081 if (!rtx_equal_p (out, orig_out))
13082 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13084 return 1; /* DONE */
13088 * For comparison with above,
13098 if (! nonimmediate_operand (operands[2], mode))
13099 operands[2] = force_reg (mode, operands[2]);
13100 if (! nonimmediate_operand (operands[3], mode))
13101 operands[3] = force_reg (mode, operands[3]);
13103 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13105 rtx tmp = gen_reg_rtx (mode);
13106 emit_move_insn (tmp, operands[3]);
13109 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13111 rtx tmp = gen_reg_rtx (mode);
13112 emit_move_insn (tmp, operands[2]);
13116 if (! register_operand (operands[2], VOIDmode)
13118 || ! register_operand (operands[3], VOIDmode)))
13119 operands[2] = force_reg (mode, operands[2]);
13122 && ! register_operand (operands[3], VOIDmode))
13123 operands[3] = force_reg (mode, operands[3]);
13125 emit_insn (compare_seq);
13126 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13127 gen_rtx_IF_THEN_ELSE (mode,
13128 compare_op, operands[2],
13131 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13132 gen_rtx_IF_THEN_ELSE (mode,
13134 copy_rtx (operands[3]),
13135 copy_rtx (operands[0]))));
13137 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13138 gen_rtx_IF_THEN_ELSE (mode,
13140 copy_rtx (operands[2]),
13141 copy_rtx (operands[0]))));
13143 return 1; /* DONE */
13146 /* Swap, force into registers, or otherwise massage the two operands
13147 to an sse comparison with a mask result. Thus we differ a bit from
13148 ix86_prepare_fp_compare_args which expects to produce a flags result.
13150 The DEST operand exists to help determine whether to commute commutative
13151 operators. The POP0/POP1 operands are updated in place. The new
13152 comparison code is returned, or UNKNOWN if not implementable. */
13154 static enum rtx_code
13155 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13156 rtx *pop0, rtx *pop1)
13164 /* We have no LTGT as an operator. We could implement it with
13165 NE & ORDERED, but this requires an extra temporary. It's
13166 not clear that it's worth it. */
13173 /* These are supported directly. */
13180 /* For commutative operators, try to canonicalize the destination
13181 operand to be first in the comparison - this helps reload to
13182 avoid extra moves. */
13183 if (!dest || !rtx_equal_p (dest, *pop1))
13191 /* These are not supported directly. Swap the comparison operands
13192 to transform into something that is supported. */
13196 code = swap_condition (code);
13200 gcc_unreachable ();
13206 /* Detect conditional moves that exactly match min/max operational
13207 semantics. Note that this is IEEE safe, as long as we don't
13208 interchange the operands.
13210 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13211 and TRUE if the operation is successful and instructions are emitted. */
13214 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13215 rtx cmp_op1, rtx if_true, rtx if_false)
13217 enum machine_mode mode;
13223 else if (code == UNGE)
13226 if_true = if_false;
13232 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13234 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13239 mode = GET_MODE (dest);
13241 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13242 but MODE may be a vector mode and thus not appropriate. */
13243 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13245 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13248 if_true = force_reg (mode, if_true);
13249 v = gen_rtvec (2, if_true, if_false);
13250 tmp = gen_rtx_UNSPEC (mode, v, u);
13254 code = is_min ? SMIN : SMAX;
13255 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13258 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13262 /* Expand an sse vector comparison. Return the register with the result. */
13265 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13266 rtx op_true, rtx op_false)
13268 enum machine_mode mode = GET_MODE (dest);
13271 cmp_op0 = force_reg (mode, cmp_op0);
13272 if (!nonimmediate_operand (cmp_op1, mode))
13273 cmp_op1 = force_reg (mode, cmp_op1);
13276 || reg_overlap_mentioned_p (dest, op_true)
13277 || reg_overlap_mentioned_p (dest, op_false))
13278 dest = gen_reg_rtx (mode);
13280 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13281 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13286 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13287 operations. This is used for both scalar and vector conditional moves. */
13290 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13292 enum machine_mode mode = GET_MODE (dest);
13297 rtx pcmov = gen_rtx_SET (mode, dest,
13298 gen_rtx_IF_THEN_ELSE (mode, cmp,
13303 else if (op_false == CONST0_RTX (mode))
13305 op_true = force_reg (mode, op_true);
13306 x = gen_rtx_AND (mode, cmp, op_true);
13307 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13309 else if (op_true == CONST0_RTX (mode))
13311 op_false = force_reg (mode, op_false);
13312 x = gen_rtx_NOT (mode, cmp);
13313 x = gen_rtx_AND (mode, x, op_false);
13314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13318 op_true = force_reg (mode, op_true);
13319 op_false = force_reg (mode, op_false);
13321 t2 = gen_reg_rtx (mode);
13323 t3 = gen_reg_rtx (mode);
13327 x = gen_rtx_AND (mode, op_true, cmp);
13328 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13330 x = gen_rtx_NOT (mode, cmp);
13331 x = gen_rtx_AND (mode, x, op_false);
13332 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13334 x = gen_rtx_IOR (mode, t3, t2);
13335 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13339 /* Expand a floating-point conditional move. Return true if successful. */
13342 ix86_expand_fp_movcc (rtx operands[])
13344 enum machine_mode mode = GET_MODE (operands[0]);
13345 enum rtx_code code = GET_CODE (operands[1]);
13346 rtx tmp, compare_op, second_test, bypass_test;
13348 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13350 enum machine_mode cmode;
13352 /* Since we've no cmove for sse registers, don't force bad register
13353 allocation just to gain access to it. Deny movcc when the
13354 comparison mode doesn't match the move mode. */
13355 cmode = GET_MODE (ix86_compare_op0);
13356 if (cmode == VOIDmode)
13357 cmode = GET_MODE (ix86_compare_op1);
13361 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13363 &ix86_compare_op1);
13364 if (code == UNKNOWN)
13367 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13368 ix86_compare_op1, operands[2],
13372 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13373 ix86_compare_op1, operands[2], operands[3]);
13374 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13378 /* The floating point conditional move instructions don't directly
13379 support conditions resulting from a signed integer comparison. */
13381 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13383 /* The floating point conditional move instructions don't directly
13384 support signed integer comparisons. */
13386 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13388 gcc_assert (!second_test && !bypass_test);
13389 tmp = gen_reg_rtx (QImode);
13390 ix86_expand_setcc (code, tmp);
13392 ix86_compare_op0 = tmp;
13393 ix86_compare_op1 = const0_rtx;
13394 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13396 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13398 tmp = gen_reg_rtx (mode);
13399 emit_move_insn (tmp, operands[3]);
13402 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13404 tmp = gen_reg_rtx (mode);
13405 emit_move_insn (tmp, operands[2]);
13409 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13410 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13411 operands[2], operands[3])));
13413 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13414 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13415 operands[3], operands[0])));
13417 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13418 gen_rtx_IF_THEN_ELSE (mode, second_test,
13419 operands[2], operands[0])));
13424 /* Expand a floating-point vector conditional move; a vcond operation
13425 rather than a movcc operation. */
13428 ix86_expand_fp_vcond (rtx operands[])
13430 enum rtx_code code = GET_CODE (operands[3]);
13433 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13434 &operands[4], &operands[5]);
13435 if (code == UNKNOWN)
13438 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13439 operands[5], operands[1], operands[2]))
13442 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13443 operands[1], operands[2]);
13444 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13448 /* Expand a signed/unsigned integral vector conditional move. */
13451 ix86_expand_int_vcond (rtx operands[])
13453 enum machine_mode mode = GET_MODE (operands[0]);
13454 enum rtx_code code = GET_CODE (operands[3]);
13455 bool negate = false;
13458 cop0 = operands[4];
13459 cop1 = operands[5];
13461 /* Canonicalize the comparison to EQ, GT, GTU. */
13472 code = reverse_condition (code);
13478 code = reverse_condition (code);
13484 code = swap_condition (code);
13485 x = cop0, cop0 = cop1, cop1 = x;
13489 gcc_unreachable ();
13492 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13493 if (mode == V2DImode)
13498 /* SSE4.1 supports EQ. */
13499 if (!TARGET_SSE4_1)
13505 /* SSE4.2 supports GT/GTU. */
13506 if (!TARGET_SSE4_2)
13511 gcc_unreachable ();
13515 /* Unsigned parallel compare is not supported by the hardware. Play some
13516 tricks to turn this into a signed comparison against 0. */
13519 cop0 = force_reg (mode, cop0);
13528 /* Perform a parallel modulo subtraction. */
13529 t1 = gen_reg_rtx (mode);
13530 emit_insn ((mode == V4SImode
13532 : gen_subv2di3) (t1, cop0, cop1));
13534 /* Extract the original sign bit of op0. */
13535 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13537 t2 = gen_reg_rtx (mode);
13538 emit_insn ((mode == V4SImode
13540 : gen_andv2di3) (t2, cop0, mask));
13542 /* XOR it back into the result of the subtraction. This results
13543 in the sign bit set iff we saw unsigned underflow. */
13544 x = gen_reg_rtx (mode);
13545 emit_insn ((mode == V4SImode
13547 : gen_xorv2di3) (x, t1, t2));
13555 /* Perform a parallel unsigned saturating subtraction. */
13556 x = gen_reg_rtx (mode);
13557 emit_insn (gen_rtx_SET (VOIDmode, x,
13558 gen_rtx_US_MINUS (mode, cop0, cop1)));
13565 gcc_unreachable ();
13569 cop1 = CONST0_RTX (mode);
13572 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13573 operands[1+negate], operands[2-negate]);
13575 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13576 operands[2-negate]);
13580 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13581 true if we should do zero extension, else sign extension. HIGH_P is
13582 true if we want the N/2 high elements, else the low elements. */
13585 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13587 enum machine_mode imode = GET_MODE (operands[1]);
13588 rtx (*unpack)(rtx, rtx, rtx);
13595 unpack = gen_vec_interleave_highv16qi;
13597 unpack = gen_vec_interleave_lowv16qi;
13601 unpack = gen_vec_interleave_highv8hi;
13603 unpack = gen_vec_interleave_lowv8hi;
13607 unpack = gen_vec_interleave_highv4si;
13609 unpack = gen_vec_interleave_lowv4si;
13612 gcc_unreachable ();
13615 dest = gen_lowpart (imode, operands[0]);
13618 se = force_reg (imode, CONST0_RTX (imode));
13620 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13621 operands[1], pc_rtx, pc_rtx);
13623 emit_insn (unpack (dest, operands[1], se));
13626 /* This function performs the same task as ix86_expand_sse_unpack,
13627 but with SSE4.1 instructions. */
13630 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13632 enum machine_mode imode = GET_MODE (operands[1]);
13633 rtx (*unpack)(rtx, rtx);
13640 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13642 unpack = gen_sse4_1_extendv8qiv8hi2;
13646 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13648 unpack = gen_sse4_1_extendv4hiv4si2;
13652 unpack = gen_sse4_1_zero_extendv2siv2di2;
13654 unpack = gen_sse4_1_extendv2siv2di2;
13657 gcc_unreachable ();
13660 dest = operands[0];
13663 /* Shift higher 8 bytes to lower 8 bytes. */
13664 src = gen_reg_rtx (imode);
13665 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13666 gen_lowpart (TImode, operands[1]),
13672 emit_insn (unpack (dest, src));
13675 /* This function performs the same task as ix86_expand_sse_unpack,
13676 but with amdfam15 instructions. */
13678 #define PPERM_SRC 0x00 /* copy source */
13679 #define PPERM_INVERT 0x20 /* invert source */
13680 #define PPERM_REVERSE 0x40 /* bit reverse source */
13681 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13682 #define PPERM_ZERO 0x80 /* all 0's */
13683 #define PPERM_ONES 0xa0 /* all 1's */
13684 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13685 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13687 #define PPERM_SRC1 0x00 /* use first source byte */
13688 #define PPERM_SRC2 0x10 /* use second source byte */
13691 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13693 enum machine_mode imode = GET_MODE (operands[1]);
13694 int pperm_bytes[16];
13696 int h = (high_p) ? 8 : 0;
13699 rtvec v = rtvec_alloc (16);
13702 rtx op0 = operands[0], op1 = operands[1];
13707 vs = rtvec_alloc (8);
13708 h2 = (high_p) ? 8 : 0;
13709 for (i = 0; i < 8; i++)
13711 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13712 pperm_bytes[2*i+1] = ((unsigned_p)
13714 : PPERM_SIGN | PPERM_SRC2 | i | h);
13717 for (i = 0; i < 16; i++)
13718 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13720 for (i = 0; i < 8; i++)
13721 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13723 p = gen_rtx_PARALLEL (VOIDmode, vs);
13724 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13726 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13728 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13732 vs = rtvec_alloc (4);
13733 h2 = (high_p) ? 4 : 0;
13734 for (i = 0; i < 4; i++)
13736 sign_extend = ((unsigned_p)
13738 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13739 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13740 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13741 pperm_bytes[4*i+2] = sign_extend;
13742 pperm_bytes[4*i+3] = sign_extend;
13745 for (i = 0; i < 16; i++)
13746 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13748 for (i = 0; i < 4; i++)
13749 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13751 p = gen_rtx_PARALLEL (VOIDmode, vs);
13752 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13754 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13756 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13760 vs = rtvec_alloc (2);
13761 h2 = (high_p) ? 2 : 0;
13762 for (i = 0; i < 2; i++)
13764 sign_extend = ((unsigned_p)
13766 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13767 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13768 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13769 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13770 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13771 pperm_bytes[8*i+4] = sign_extend;
13772 pperm_bytes[8*i+5] = sign_extend;
13773 pperm_bytes[8*i+6] = sign_extend;
13774 pperm_bytes[8*i+7] = sign_extend;
13777 for (i = 0; i < 16; i++)
13778 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13780 for (i = 0; i < 2; i++)
13781 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13783 p = gen_rtx_PARALLEL (VOIDmode, vs);
13784 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13786 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13788 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13792 gcc_unreachable ();
13798 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13799 next narrower integer vector type */
13801 ix86_expand_sse5_pack (rtx operands[3])
13803 enum machine_mode imode = GET_MODE (operands[0]);
13804 int pperm_bytes[16];
13806 rtvec v = rtvec_alloc (16);
13808 rtx op0 = operands[0];
13809 rtx op1 = operands[1];
13810 rtx op2 = operands[2];
13815 for (i = 0; i < 8; i++)
13817 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13818 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13821 for (i = 0; i < 16; i++)
13822 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13824 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13825 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13829 for (i = 0; i < 4; i++)
13831 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13832 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13833 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13834 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13837 for (i = 0; i < 16; i++)
13838 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13840 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13841 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13845 for (i = 0; i < 2; i++)
13847 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13848 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13849 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13850 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13851 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13852 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13853 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13854 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13857 for (i = 0; i < 16; i++)
13858 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13860 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13861 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13865 gcc_unreachable ();
13871 /* Expand conditional increment or decrement using adb/sbb instructions.
13872 The default case using setcc followed by the conditional move can be
13873 done by generic code. */
13875 ix86_expand_int_addcc (rtx operands[])
13877 enum rtx_code code = GET_CODE (operands[1]);
13879 rtx val = const0_rtx;
13880 bool fpcmp = false;
13881 enum machine_mode mode = GET_MODE (operands[0]);
13883 if (operands[3] != const1_rtx
13884 && operands[3] != constm1_rtx)
13886 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13887 ix86_compare_op1, &compare_op))
13889 code = GET_CODE (compare_op);
13891 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13892 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13895 code = ix86_fp_compare_code_to_integer (code);
13902 PUT_CODE (compare_op,
13903 reverse_condition_maybe_unordered
13904 (GET_CODE (compare_op)));
13906 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13908 PUT_MODE (compare_op, mode);
13910 /* Construct either adc or sbb insn. */
13911 if ((code == LTU) == (operands[3] == constm1_rtx))
13913 switch (GET_MODE (operands[0]))
13916 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13919 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13922 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13925 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13928 gcc_unreachable ();
13933 switch (GET_MODE (operands[0]))
13936 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13939 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13942 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13945 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13948 gcc_unreachable ();
13951 return 1; /* DONE */
13955 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13956 works for floating pointer parameters and nonoffsetable memories.
13957 For pushes, it returns just stack offsets; the values will be saved
13958 in the right order. Maximally three parts are generated. */
13961 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13966 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13968 size = (GET_MODE_SIZE (mode) + 4) / 8;
13970 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13971 gcc_assert (size >= 2 && size <= 3);
13973 /* Optimize constant pool reference to immediates. This is used by fp
13974 moves, that force all constants to memory to allow combining. */
13975 if (MEM_P (operand) && MEM_READONLY_P (operand))
13977 rtx tmp = maybe_get_pool_constant (operand);
13982 if (MEM_P (operand) && !offsettable_memref_p (operand))
13984 /* The only non-offsetable memories we handle are pushes. */
13985 int ok = push_operand (operand, VOIDmode);
13989 operand = copy_rtx (operand);
13990 PUT_MODE (operand, Pmode);
13991 parts[0] = parts[1] = parts[2] = operand;
13995 if (GET_CODE (operand) == CONST_VECTOR)
13997 enum machine_mode imode = int_mode_for_mode (mode);
13998 /* Caution: if we looked through a constant pool memory above,
13999 the operand may actually have a different mode now. That's
14000 ok, since we want to pun this all the way back to an integer. */
14001 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14002 gcc_assert (operand != NULL);
14008 if (mode == DImode)
14009 split_di (&operand, 1, &parts[0], &parts[1]);
14012 if (REG_P (operand))
14014 gcc_assert (reload_completed);
14015 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
14016 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
14018 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
14020 else if (offsettable_memref_p (operand))
14022 operand = adjust_address (operand, SImode, 0);
14023 parts[0] = operand;
14024 parts[1] = adjust_address (operand, SImode, 4);
14026 parts[2] = adjust_address (operand, SImode, 8);
14028 else if (GET_CODE (operand) == CONST_DOUBLE)
14033 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14037 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14038 parts[2] = gen_int_mode (l[2], SImode);
14041 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14044 gcc_unreachable ();
14046 parts[1] = gen_int_mode (l[1], SImode);
14047 parts[0] = gen_int_mode (l[0], SImode);
14050 gcc_unreachable ();
14055 if (mode == TImode)
14056 split_ti (&operand, 1, &parts[0], &parts[1]);
14057 if (mode == XFmode || mode == TFmode)
14059 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14060 if (REG_P (operand))
14062 gcc_assert (reload_completed);
14063 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14064 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14066 else if (offsettable_memref_p (operand))
14068 operand = adjust_address (operand, DImode, 0);
14069 parts[0] = operand;
14070 parts[1] = adjust_address (operand, upper_mode, 8);
14072 else if (GET_CODE (operand) == CONST_DOUBLE)
14077 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14078 real_to_target (l, &r, mode);
14080 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14081 if (HOST_BITS_PER_WIDE_INT >= 64)
14084 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14085 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14088 parts[0] = immed_double_const (l[0], l[1], DImode);
14090 if (upper_mode == SImode)
14091 parts[1] = gen_int_mode (l[2], SImode);
14092 else if (HOST_BITS_PER_WIDE_INT >= 64)
14095 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14096 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14099 parts[1] = immed_double_const (l[2], l[3], DImode);
14102 gcc_unreachable ();
14109 /* Emit insns to perform a move or push of DI, DF, and XF values.
14110 Return false when normal moves are needed; true when all required
14111 insns have been emitted. Operands 2-4 contain the input values
14112 int the correct order; operands 5-7 contain the output values. */
14115 ix86_split_long_move (rtx operands[])
14120 int collisions = 0;
14121 enum machine_mode mode = GET_MODE (operands[0]);
14123 /* The DFmode expanders may ask us to move double.
14124 For 64bit target this is single move. By hiding the fact
14125 here we simplify i386.md splitters. */
14126 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14128 /* Optimize constant pool reference to immediates. This is used by
14129 fp moves, that force all constants to memory to allow combining. */
14131 if (MEM_P (operands[1])
14132 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14133 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14134 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14135 if (push_operand (operands[0], VOIDmode))
14137 operands[0] = copy_rtx (operands[0]);
14138 PUT_MODE (operands[0], Pmode);
14141 operands[0] = gen_lowpart (DImode, operands[0]);
14142 operands[1] = gen_lowpart (DImode, operands[1]);
14143 emit_move_insn (operands[0], operands[1]);
14147 /* The only non-offsettable memory we handle is push. */
14148 if (push_operand (operands[0], VOIDmode))
14151 gcc_assert (!MEM_P (operands[0])
14152 || offsettable_memref_p (operands[0]));
14154 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14155 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14157 /* When emitting push, take care for source operands on the stack. */
14158 if (push && MEM_P (operands[1])
14159 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14162 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14163 XEXP (part[1][2], 0));
14164 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14165 XEXP (part[1][1], 0));
14168 /* We need to do copy in the right order in case an address register
14169 of the source overlaps the destination. */
14170 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14172 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14174 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14177 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14180 /* Collision in the middle part can be handled by reordering. */
14181 if (collisions == 1 && nparts == 3
14182 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14185 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14186 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14189 /* If there are more collisions, we can't handle it by reordering.
14190 Do an lea to the last part and use only one colliding move. */
14191 else if (collisions > 1)
14197 base = part[0][nparts - 1];
14199 /* Handle the case when the last part isn't valid for lea.
14200 Happens in 64-bit mode storing the 12-byte XFmode. */
14201 if (GET_MODE (base) != Pmode)
14202 base = gen_rtx_REG (Pmode, REGNO (base));
14204 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14205 part[1][0] = replace_equiv_address (part[1][0], base);
14206 part[1][1] = replace_equiv_address (part[1][1],
14207 plus_constant (base, UNITS_PER_WORD));
14209 part[1][2] = replace_equiv_address (part[1][2],
14210 plus_constant (base, 8));
14220 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14221 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14222 emit_move_insn (part[0][2], part[1][2]);
14227 /* In 64bit mode we don't have 32bit push available. In case this is
14228 register, it is OK - we will just use larger counterpart. We also
14229 retype memory - these comes from attempt to avoid REX prefix on
14230 moving of second half of TFmode value. */
14231 if (GET_MODE (part[1][1]) == SImode)
14233 switch (GET_CODE (part[1][1]))
14236 part[1][1] = adjust_address (part[1][1], DImode, 0);
14240 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14244 gcc_unreachable ();
14247 if (GET_MODE (part[1][0]) == SImode)
14248 part[1][0] = part[1][1];
14251 emit_move_insn (part[0][1], part[1][1]);
14252 emit_move_insn (part[0][0], part[1][0]);
14256 /* Choose correct order to not overwrite the source before it is copied. */
14257 if ((REG_P (part[0][0])
14258 && REG_P (part[1][1])
14259 && (REGNO (part[0][0]) == REGNO (part[1][1])
14261 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14263 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14267 operands[2] = part[0][2];
14268 operands[3] = part[0][1];
14269 operands[4] = part[0][0];
14270 operands[5] = part[1][2];
14271 operands[6] = part[1][1];
14272 operands[7] = part[1][0];
14276 operands[2] = part[0][1];
14277 operands[3] = part[0][0];
14278 operands[5] = part[1][1];
14279 operands[6] = part[1][0];
14286 operands[2] = part[0][0];
14287 operands[3] = part[0][1];
14288 operands[4] = part[0][2];
14289 operands[5] = part[1][0];
14290 operands[6] = part[1][1];
14291 operands[7] = part[1][2];
14295 operands[2] = part[0][0];
14296 operands[3] = part[0][1];
14297 operands[5] = part[1][0];
14298 operands[6] = part[1][1];
14302 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14305 if (CONST_INT_P (operands[5])
14306 && operands[5] != const0_rtx
14307 && REG_P (operands[2]))
14309 if (CONST_INT_P (operands[6])
14310 && INTVAL (operands[6]) == INTVAL (operands[5]))
14311 operands[6] = operands[2];
14314 && CONST_INT_P (operands[7])
14315 && INTVAL (operands[7]) == INTVAL (operands[5]))
14316 operands[7] = operands[2];
14320 && CONST_INT_P (operands[6])
14321 && operands[6] != const0_rtx
14322 && REG_P (operands[3])
14323 && CONST_INT_P (operands[7])
14324 && INTVAL (operands[7]) == INTVAL (operands[6]))
14325 operands[7] = operands[3];
14328 emit_move_insn (operands[2], operands[5]);
14329 emit_move_insn (operands[3], operands[6]);
14331 emit_move_insn (operands[4], operands[7]);
14336 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14337 left shift by a constant, either using a single shift or
14338 a sequence of add instructions. */
14341 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14345 emit_insn ((mode == DImode
14347 : gen_adddi3) (operand, operand, operand));
14349 else if (!optimize_size
14350 && count * ix86_cost->add <= ix86_cost->shift_const)
14353 for (i=0; i<count; i++)
14355 emit_insn ((mode == DImode
14357 : gen_adddi3) (operand, operand, operand));
14361 emit_insn ((mode == DImode
14363 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14367 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14369 rtx low[2], high[2];
14371 const int single_width = mode == DImode ? 32 : 64;
14373 if (CONST_INT_P (operands[2]))
14375 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14376 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14378 if (count >= single_width)
14380 emit_move_insn (high[0], low[1]);
14381 emit_move_insn (low[0], const0_rtx);
14383 if (count > single_width)
14384 ix86_expand_ashl_const (high[0], count - single_width, mode);
14388 if (!rtx_equal_p (operands[0], operands[1]))
14389 emit_move_insn (operands[0], operands[1]);
14390 emit_insn ((mode == DImode
14392 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14393 ix86_expand_ashl_const (low[0], count, mode);
14398 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14400 if (operands[1] == const1_rtx)
14402 /* Assuming we've chosen a QImode capable registers, then 1 << N
14403 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14404 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14406 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14408 ix86_expand_clear (low[0]);
14409 ix86_expand_clear (high[0]);
14410 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14412 d = gen_lowpart (QImode, low[0]);
14413 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14414 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14415 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14417 d = gen_lowpart (QImode, high[0]);
14418 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14419 s = gen_rtx_NE (QImode, flags, const0_rtx);
14420 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14423 /* Otherwise, we can get the same results by manually performing
14424 a bit extract operation on bit 5/6, and then performing the two
14425 shifts. The two methods of getting 0/1 into low/high are exactly
14426 the same size. Avoiding the shift in the bit extract case helps
14427 pentium4 a bit; no one else seems to care much either way. */
14432 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14433 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14435 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14436 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14438 emit_insn ((mode == DImode
14440 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14441 emit_insn ((mode == DImode
14443 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14444 emit_move_insn (low[0], high[0]);
14445 emit_insn ((mode == DImode
14447 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14450 emit_insn ((mode == DImode
14452 : gen_ashldi3) (low[0], low[0], operands[2]));
14453 emit_insn ((mode == DImode
14455 : gen_ashldi3) (high[0], high[0], operands[2]));
14459 if (operands[1] == constm1_rtx)
14461 /* For -1 << N, we can avoid the shld instruction, because we
14462 know that we're shifting 0...31/63 ones into a -1. */
14463 emit_move_insn (low[0], constm1_rtx);
14465 emit_move_insn (high[0], low[0]);
14467 emit_move_insn (high[0], constm1_rtx);
14471 if (!rtx_equal_p (operands[0], operands[1]))
14472 emit_move_insn (operands[0], operands[1]);
14474 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14475 emit_insn ((mode == DImode
14477 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14480 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14482 if (TARGET_CMOVE && scratch)
14484 ix86_expand_clear (scratch);
14485 emit_insn ((mode == DImode
14486 ? gen_x86_shift_adj_1
14487 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14490 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14494 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14496 rtx low[2], high[2];
14498 const int single_width = mode == DImode ? 32 : 64;
14500 if (CONST_INT_P (operands[2]))
14502 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14503 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14505 if (count == single_width * 2 - 1)
14507 emit_move_insn (high[0], high[1]);
14508 emit_insn ((mode == DImode
14510 : gen_ashrdi3) (high[0], high[0],
14511 GEN_INT (single_width - 1)));
14512 emit_move_insn (low[0], high[0]);
14515 else if (count >= single_width)
14517 emit_move_insn (low[0], high[1]);
14518 emit_move_insn (high[0], low[0]);
14519 emit_insn ((mode == DImode
14521 : gen_ashrdi3) (high[0], high[0],
14522 GEN_INT (single_width - 1)));
14523 if (count > single_width)
14524 emit_insn ((mode == DImode
14526 : gen_ashrdi3) (low[0], low[0],
14527 GEN_INT (count - single_width)));
14531 if (!rtx_equal_p (operands[0], operands[1]))
14532 emit_move_insn (operands[0], operands[1]);
14533 emit_insn ((mode == DImode
14535 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14536 emit_insn ((mode == DImode
14538 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14543 if (!rtx_equal_p (operands[0], operands[1]))
14544 emit_move_insn (operands[0], operands[1]);
14546 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14548 emit_insn ((mode == DImode
14550 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14551 emit_insn ((mode == DImode
14553 : gen_ashrdi3) (high[0], high[0], operands[2]));
14555 if (TARGET_CMOVE && scratch)
14557 emit_move_insn (scratch, high[0]);
14558 emit_insn ((mode == DImode
14560 : gen_ashrdi3) (scratch, scratch,
14561 GEN_INT (single_width - 1)));
14562 emit_insn ((mode == DImode
14563 ? gen_x86_shift_adj_1
14564 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14568 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14573 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14575 rtx low[2], high[2];
14577 const int single_width = mode == DImode ? 32 : 64;
14579 if (CONST_INT_P (operands[2]))
14581 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14582 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14584 if (count >= single_width)
14586 emit_move_insn (low[0], high[1]);
14587 ix86_expand_clear (high[0]);
14589 if (count > single_width)
14590 emit_insn ((mode == DImode
14592 : gen_lshrdi3) (low[0], low[0],
14593 GEN_INT (count - single_width)));
14597 if (!rtx_equal_p (operands[0], operands[1]))
14598 emit_move_insn (operands[0], operands[1]);
14599 emit_insn ((mode == DImode
14601 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14602 emit_insn ((mode == DImode
14604 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14609 if (!rtx_equal_p (operands[0], operands[1]))
14610 emit_move_insn (operands[0], operands[1]);
14612 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14614 emit_insn ((mode == DImode
14616 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14617 emit_insn ((mode == DImode
14619 : gen_lshrdi3) (high[0], high[0], operands[2]));
14621 /* Heh. By reversing the arguments, we can reuse this pattern. */
14622 if (TARGET_CMOVE && scratch)
14624 ix86_expand_clear (scratch);
14625 emit_insn ((mode == DImode
14626 ? gen_x86_shift_adj_1
14627 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14631 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14635 /* Predict just emitted jump instruction to be taken with probability PROB. */
14637 predict_jump (int prob)
14639 rtx insn = get_last_insn ();
14640 gcc_assert (JUMP_P (insn));
14642 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14647 /* Helper function for the string operations below. Dest VARIABLE whether
14648 it is aligned to VALUE bytes. If true, jump to the label. */
14650 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14652 rtx label = gen_label_rtx ();
14653 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14654 if (GET_MODE (variable) == DImode)
14655 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14657 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14658 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14661 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14663 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14667 /* Adjust COUNTER by the VALUE. */
14669 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14671 if (GET_MODE (countreg) == DImode)
14672 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14674 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14677 /* Zero extend possibly SImode EXP to Pmode register. */
14679 ix86_zero_extend_to_Pmode (rtx exp)
14682 if (GET_MODE (exp) == VOIDmode)
14683 return force_reg (Pmode, exp);
14684 if (GET_MODE (exp) == Pmode)
14685 return copy_to_mode_reg (Pmode, exp);
14686 r = gen_reg_rtx (Pmode);
14687 emit_insn (gen_zero_extendsidi2 (r, exp));
14691 /* Divide COUNTREG by SCALE. */
14693 scale_counter (rtx countreg, int scale)
14696 rtx piece_size_mask;
14700 if (CONST_INT_P (countreg))
14701 return GEN_INT (INTVAL (countreg) / scale);
14702 gcc_assert (REG_P (countreg));
14704 piece_size_mask = GEN_INT (scale - 1);
14705 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14706 GEN_INT (exact_log2 (scale)),
14707 NULL, 1, OPTAB_DIRECT);
14711 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14712 DImode for constant loop counts. */
14714 static enum machine_mode
14715 counter_mode (rtx count_exp)
14717 if (GET_MODE (count_exp) != VOIDmode)
14718 return GET_MODE (count_exp);
14719 if (GET_CODE (count_exp) != CONST_INT)
14721 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14726 /* When SRCPTR is non-NULL, output simple loop to move memory
14727 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14728 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14729 equivalent loop to set memory by VALUE (supposed to be in MODE).
14731 The size is rounded down to whole number of chunk size moved at once.
14732 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14736 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14737 rtx destptr, rtx srcptr, rtx value,
14738 rtx count, enum machine_mode mode, int unroll,
14741 rtx out_label, top_label, iter, tmp;
14742 enum machine_mode iter_mode = counter_mode (count);
14743 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14744 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14750 top_label = gen_label_rtx ();
14751 out_label = gen_label_rtx ();
14752 iter = gen_reg_rtx (iter_mode);
14754 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14755 NULL, 1, OPTAB_DIRECT);
14756 /* Those two should combine. */
14757 if (piece_size == const1_rtx)
14759 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14761 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14763 emit_move_insn (iter, const0_rtx);
14765 emit_label (top_label);
14767 tmp = convert_modes (Pmode, iter_mode, iter, true);
14768 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14769 destmem = change_address (destmem, mode, x_addr);
14773 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14774 srcmem = change_address (srcmem, mode, y_addr);
14776 /* When unrolling for chips that reorder memory reads and writes,
14777 we can save registers by using single temporary.
14778 Also using 4 temporaries is overkill in 32bit mode. */
14779 if (!TARGET_64BIT && 0)
14781 for (i = 0; i < unroll; i++)
14786 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14788 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14790 emit_move_insn (destmem, srcmem);
14796 gcc_assert (unroll <= 4);
14797 for (i = 0; i < unroll; i++)
14799 tmpreg[i] = gen_reg_rtx (mode);
14803 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14805 emit_move_insn (tmpreg[i], srcmem);
14807 for (i = 0; i < unroll; i++)
14812 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14814 emit_move_insn (destmem, tmpreg[i]);
14819 for (i = 0; i < unroll; i++)
14823 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14824 emit_move_insn (destmem, value);
14827 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14828 true, OPTAB_LIB_WIDEN);
14830 emit_move_insn (iter, tmp);
14832 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14834 if (expected_size != -1)
14836 expected_size /= GET_MODE_SIZE (mode) * unroll;
14837 if (expected_size == 0)
14839 else if (expected_size > REG_BR_PROB_BASE)
14840 predict_jump (REG_BR_PROB_BASE - 1);
14842 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14845 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14846 iter = ix86_zero_extend_to_Pmode (iter);
14847 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14848 true, OPTAB_LIB_WIDEN);
14849 if (tmp != destptr)
14850 emit_move_insn (destptr, tmp);
14853 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14854 true, OPTAB_LIB_WIDEN);
14856 emit_move_insn (srcptr, tmp);
14858 emit_label (out_label);
14861 /* Output "rep; mov" instruction.
14862 Arguments have same meaning as for previous function */
14864 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14865 rtx destptr, rtx srcptr,
14867 enum machine_mode mode)
14873 /* If the size is known, it is shorter to use rep movs. */
14874 if (mode == QImode && CONST_INT_P (count)
14875 && !(INTVAL (count) & 3))
14878 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14879 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14880 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14881 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14882 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14883 if (mode != QImode)
14885 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14886 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14887 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14888 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14889 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14890 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14894 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14895 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14897 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14901 /* Output "rep; stos" instruction.
14902 Arguments have same meaning as for previous function */
14904 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14906 enum machine_mode mode)
14911 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14912 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14913 value = force_reg (mode, gen_lowpart (mode, value));
14914 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14915 if (mode != QImode)
14917 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14918 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14919 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14922 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14923 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14927 emit_strmov (rtx destmem, rtx srcmem,
14928 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14930 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14931 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14932 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14935 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14937 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14938 rtx destptr, rtx srcptr, rtx count, int max_size)
14941 if (CONST_INT_P (count))
14943 HOST_WIDE_INT countval = INTVAL (count);
14946 if ((countval & 0x10) && max_size > 16)
14950 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14951 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14954 gcc_unreachable ();
14957 if ((countval & 0x08) && max_size > 8)
14960 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14963 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14964 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14968 if ((countval & 0x04) && max_size > 4)
14970 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14973 if ((countval & 0x02) && max_size > 2)
14975 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14978 if ((countval & 0x01) && max_size > 1)
14980 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14987 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14988 count, 1, OPTAB_DIRECT);
14989 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14990 count, QImode, 1, 4);
14994 /* When there are stringops, we can cheaply increase dest and src pointers.
14995 Otherwise we save code size by maintaining offset (zero is readily
14996 available from preceding rep operation) and using x86 addressing modes.
14998 if (TARGET_SINGLE_STRINGOP)
15002 rtx label = ix86_expand_aligntest (count, 4, true);
15003 src = change_address (srcmem, SImode, srcptr);
15004 dest = change_address (destmem, SImode, destptr);
15005 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15006 emit_label (label);
15007 LABEL_NUSES (label) = 1;
15011 rtx label = ix86_expand_aligntest (count, 2, true);
15012 src = change_address (srcmem, HImode, srcptr);
15013 dest = change_address (destmem, HImode, destptr);
15014 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15015 emit_label (label);
15016 LABEL_NUSES (label) = 1;
15020 rtx label = ix86_expand_aligntest (count, 1, true);
15021 src = change_address (srcmem, QImode, srcptr);
15022 dest = change_address (destmem, QImode, destptr);
15023 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15024 emit_label (label);
15025 LABEL_NUSES (label) = 1;
15030 rtx offset = force_reg (Pmode, const0_rtx);
15035 rtx label = ix86_expand_aligntest (count, 4, true);
15036 src = change_address (srcmem, SImode, srcptr);
15037 dest = change_address (destmem, SImode, destptr);
15038 emit_move_insn (dest, src);
15039 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15040 true, OPTAB_LIB_WIDEN);
15042 emit_move_insn (offset, tmp);
15043 emit_label (label);
15044 LABEL_NUSES (label) = 1;
15048 rtx label = ix86_expand_aligntest (count, 2, true);
15049 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15050 src = change_address (srcmem, HImode, tmp);
15051 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15052 dest = change_address (destmem, HImode, tmp);
15053 emit_move_insn (dest, src);
15054 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15055 true, OPTAB_LIB_WIDEN);
15057 emit_move_insn (offset, tmp);
15058 emit_label (label);
15059 LABEL_NUSES (label) = 1;
15063 rtx label = ix86_expand_aligntest (count, 1, true);
15064 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15065 src = change_address (srcmem, QImode, tmp);
15066 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15067 dest = change_address (destmem, QImode, tmp);
15068 emit_move_insn (dest, src);
15069 emit_label (label);
15070 LABEL_NUSES (label) = 1;
15075 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15077 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15078 rtx count, int max_size)
15081 expand_simple_binop (counter_mode (count), AND, count,
15082 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15083 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15084 gen_lowpart (QImode, value), count, QImode,
15088 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15090 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15094 if (CONST_INT_P (count))
15096 HOST_WIDE_INT countval = INTVAL (count);
15099 if ((countval & 0x10) && max_size > 16)
15103 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15104 emit_insn (gen_strset (destptr, dest, value));
15105 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15106 emit_insn (gen_strset (destptr, dest, value));
15109 gcc_unreachable ();
15112 if ((countval & 0x08) && max_size > 8)
15116 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15117 emit_insn (gen_strset (destptr, dest, value));
15121 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15122 emit_insn (gen_strset (destptr, dest, value));
15123 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15124 emit_insn (gen_strset (destptr, dest, value));
15128 if ((countval & 0x04) && max_size > 4)
15130 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15131 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15134 if ((countval & 0x02) && max_size > 2)
15136 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15137 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15140 if ((countval & 0x01) && max_size > 1)
15142 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15143 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15150 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15155 rtx label = ix86_expand_aligntest (count, 16, true);
15158 dest = change_address (destmem, DImode, destptr);
15159 emit_insn (gen_strset (destptr, dest, value));
15160 emit_insn (gen_strset (destptr, dest, value));
15164 dest = change_address (destmem, SImode, destptr);
15165 emit_insn (gen_strset (destptr, dest, value));
15166 emit_insn (gen_strset (destptr, dest, value));
15167 emit_insn (gen_strset (destptr, dest, value));
15168 emit_insn (gen_strset (destptr, dest, value));
15170 emit_label (label);
15171 LABEL_NUSES (label) = 1;
15175 rtx label = ix86_expand_aligntest (count, 8, true);
15178 dest = change_address (destmem, DImode, destptr);
15179 emit_insn (gen_strset (destptr, dest, value));
15183 dest = change_address (destmem, SImode, destptr);
15184 emit_insn (gen_strset (destptr, dest, value));
15185 emit_insn (gen_strset (destptr, dest, value));
15187 emit_label (label);
15188 LABEL_NUSES (label) = 1;
15192 rtx label = ix86_expand_aligntest (count, 4, true);
15193 dest = change_address (destmem, SImode, destptr);
15194 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15195 emit_label (label);
15196 LABEL_NUSES (label) = 1;
15200 rtx label = ix86_expand_aligntest (count, 2, true);
15201 dest = change_address (destmem, HImode, destptr);
15202 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15203 emit_label (label);
15204 LABEL_NUSES (label) = 1;
15208 rtx label = ix86_expand_aligntest (count, 1, true);
15209 dest = change_address (destmem, QImode, destptr);
15210 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15211 emit_label (label);
15212 LABEL_NUSES (label) = 1;
15216 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15217 DESIRED_ALIGNMENT. */
15219 expand_movmem_prologue (rtx destmem, rtx srcmem,
15220 rtx destptr, rtx srcptr, rtx count,
15221 int align, int desired_alignment)
15223 if (align <= 1 && desired_alignment > 1)
15225 rtx label = ix86_expand_aligntest (destptr, 1, false);
15226 srcmem = change_address (srcmem, QImode, srcptr);
15227 destmem = change_address (destmem, QImode, destptr);
15228 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15229 ix86_adjust_counter (count, 1);
15230 emit_label (label);
15231 LABEL_NUSES (label) = 1;
15233 if (align <= 2 && desired_alignment > 2)
15235 rtx label = ix86_expand_aligntest (destptr, 2, false);
15236 srcmem = change_address (srcmem, HImode, srcptr);
15237 destmem = change_address (destmem, HImode, destptr);
15238 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15239 ix86_adjust_counter (count, 2);
15240 emit_label (label);
15241 LABEL_NUSES (label) = 1;
15243 if (align <= 4 && desired_alignment > 4)
15245 rtx label = ix86_expand_aligntest (destptr, 4, false);
15246 srcmem = change_address (srcmem, SImode, srcptr);
15247 destmem = change_address (destmem, SImode, destptr);
15248 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15249 ix86_adjust_counter (count, 4);
15250 emit_label (label);
15251 LABEL_NUSES (label) = 1;
15253 gcc_assert (desired_alignment <= 8);
15256 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15257 DESIRED_ALIGNMENT. */
15259 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15260 int align, int desired_alignment)
15262 if (align <= 1 && desired_alignment > 1)
15264 rtx label = ix86_expand_aligntest (destptr, 1, false);
15265 destmem = change_address (destmem, QImode, destptr);
15266 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15267 ix86_adjust_counter (count, 1);
15268 emit_label (label);
15269 LABEL_NUSES (label) = 1;
15271 if (align <= 2 && desired_alignment > 2)
15273 rtx label = ix86_expand_aligntest (destptr, 2, false);
15274 destmem = change_address (destmem, HImode, destptr);
15275 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15276 ix86_adjust_counter (count, 2);
15277 emit_label (label);
15278 LABEL_NUSES (label) = 1;
15280 if (align <= 4 && desired_alignment > 4)
15282 rtx label = ix86_expand_aligntest (destptr, 4, false);
15283 destmem = change_address (destmem, SImode, destptr);
15284 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15285 ix86_adjust_counter (count, 4);
15286 emit_label (label);
15287 LABEL_NUSES (label) = 1;
15289 gcc_assert (desired_alignment <= 8);
15292 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15293 static enum stringop_alg
15294 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15295 int *dynamic_check)
15297 const struct stringop_algs * algs;
15298 /* Algorithms using the rep prefix want at least edi and ecx;
15299 additionally, memset wants eax and memcpy wants esi. Don't
15300 consider such algorithms if the user has appropriated those
15301 registers for their own purposes. */
15302 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15304 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15306 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15307 || (alg != rep_prefix_1_byte \
15308 && alg != rep_prefix_4_byte \
15309 && alg != rep_prefix_8_byte))
15311 *dynamic_check = -1;
15313 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15315 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15316 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15317 return stringop_alg;
15318 /* rep; movq or rep; movl is the smallest variant. */
15319 else if (optimize_size)
15321 if (!count || (count & 3))
15322 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15324 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15326 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15328 else if (expected_size != -1 && expected_size < 4)
15329 return loop_1_byte;
15330 else if (expected_size != -1)
15333 enum stringop_alg alg = libcall;
15334 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15336 /* We get here if the algorithms that were not libcall-based
15337 were rep-prefix based and we are unable to use rep prefixes
15338 based on global register usage. Break out of the loop and
15339 use the heuristic below. */
15340 if (algs->size[i].max == 0)
15342 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15344 enum stringop_alg candidate = algs->size[i].alg;
15346 if (candidate != libcall && ALG_USABLE_P (candidate))
15348 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15349 last non-libcall inline algorithm. */
15350 if (TARGET_INLINE_ALL_STRINGOPS)
15352 /* When the current size is best to be copied by a libcall,
15353 but we are still forced to inline, run the heuristic below
15354 that will pick code for medium sized blocks. */
15355 if (alg != libcall)
15359 else if (ALG_USABLE_P (candidate))
15363 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15365 /* When asked to inline the call anyway, try to pick meaningful choice.
15366 We look for maximal size of block that is faster to copy by hand and
15367 take blocks of at most of that size guessing that average size will
15368 be roughly half of the block.
15370 If this turns out to be bad, we might simply specify the preferred
15371 choice in ix86_costs. */
15372 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15373 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15376 enum stringop_alg alg;
15378 bool any_alg_usable_p = true;
15380 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15382 enum stringop_alg candidate = algs->size[i].alg;
15383 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15385 if (candidate != libcall && candidate
15386 && ALG_USABLE_P (candidate))
15387 max = algs->size[i].max;
15389 /* If there aren't any usable algorithms, then recursing on
15390 smaller sizes isn't going to find anything. Just return the
15391 simple byte-at-a-time copy loop. */
15392 if (!any_alg_usable_p)
15394 /* Pick something reasonable. */
15395 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15396 *dynamic_check = 128;
15397 return loop_1_byte;
15401 alg = decide_alg (count, max / 2, memset, dynamic_check);
15402 gcc_assert (*dynamic_check == -1);
15403 gcc_assert (alg != libcall);
15404 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15405 *dynamic_check = max;
15408 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15409 #undef ALG_USABLE_P
15412 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15413 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15415 decide_alignment (int align,
15416 enum stringop_alg alg,
15419 int desired_align = 0;
15423 gcc_unreachable ();
15425 case unrolled_loop:
15426 desired_align = GET_MODE_SIZE (Pmode);
15428 case rep_prefix_8_byte:
15431 case rep_prefix_4_byte:
15432 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15433 copying whole cacheline at once. */
15434 if (TARGET_PENTIUMPRO)
15439 case rep_prefix_1_byte:
15440 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15441 copying whole cacheline at once. */
15442 if (TARGET_PENTIUMPRO)
15456 if (desired_align < align)
15457 desired_align = align;
15458 if (expected_size != -1 && expected_size < 4)
15459 desired_align = align;
15460 return desired_align;
15463 /* Return the smallest power of 2 greater than VAL. */
15465 smallest_pow2_greater_than (int val)
15473 /* Expand string move (memcpy) operation. Use i386 string operations when
15474 profitable. expand_setmem contains similar code. The code depends upon
15475 architecture, block size and alignment, but always has the same
15478 1) Prologue guard: Conditional that jumps up to epilogues for small
15479 blocks that can be handled by epilogue alone. This is faster but
15480 also needed for correctness, since prologue assume the block is larger
15481 than the desired alignment.
15483 Optional dynamic check for size and libcall for large
15484 blocks is emitted here too, with -minline-stringops-dynamically.
15486 2) Prologue: copy first few bytes in order to get destination aligned
15487 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15488 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15489 We emit either a jump tree on power of two sized blocks, or a byte loop.
15491 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15492 with specified algorithm.
15494 4) Epilogue: code copying tail of the block that is too small to be
15495 handled by main body (or up to size guarded by prologue guard). */
15498 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15499 rtx expected_align_exp, rtx expected_size_exp)
15505 rtx jump_around_label = NULL;
15506 HOST_WIDE_INT align = 1;
15507 unsigned HOST_WIDE_INT count = 0;
15508 HOST_WIDE_INT expected_size = -1;
15509 int size_needed = 0, epilogue_size_needed;
15510 int desired_align = 0;
15511 enum stringop_alg alg;
15514 if (CONST_INT_P (align_exp))
15515 align = INTVAL (align_exp);
15516 /* i386 can do misaligned access on reasonably increased cost. */
15517 if (CONST_INT_P (expected_align_exp)
15518 && INTVAL (expected_align_exp) > align)
15519 align = INTVAL (expected_align_exp);
15520 if (CONST_INT_P (count_exp))
15521 count = expected_size = INTVAL (count_exp);
15522 if (CONST_INT_P (expected_size_exp) && count == 0)
15523 expected_size = INTVAL (expected_size_exp);
15525 /* Make sure we don't need to care about overflow later on. */
15526 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15529 /* Step 0: Decide on preferred algorithm, desired alignment and
15530 size of chunks to be copied by main loop. */
15532 alg = decide_alg (count, expected_size, false, &dynamic_check);
15533 desired_align = decide_alignment (align, alg, expected_size);
15535 if (!TARGET_ALIGN_STRINGOPS)
15536 align = desired_align;
15538 if (alg == libcall)
15540 gcc_assert (alg != no_stringop);
15542 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15543 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15544 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15549 gcc_unreachable ();
15551 size_needed = GET_MODE_SIZE (Pmode);
15553 case unrolled_loop:
15554 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15556 case rep_prefix_8_byte:
15559 case rep_prefix_4_byte:
15562 case rep_prefix_1_byte:
15568 epilogue_size_needed = size_needed;
15570 /* Step 1: Prologue guard. */
15572 /* Alignment code needs count to be in register. */
15573 if (CONST_INT_P (count_exp) && desired_align > align)
15574 count_exp = force_reg (counter_mode (count_exp), count_exp);
15575 gcc_assert (desired_align >= 1 && align >= 1);
15577 /* Ensure that alignment prologue won't copy past end of block. */
15578 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15580 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15581 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15582 Make sure it is power of 2. */
15583 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15585 if (CONST_INT_P (count_exp))
15587 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15592 label = gen_label_rtx ();
15593 emit_cmp_and_jump_insns (count_exp,
15594 GEN_INT (epilogue_size_needed),
15595 LTU, 0, counter_mode (count_exp), 1, label);
15596 if (expected_size == -1 || expected_size < epilogue_size_needed)
15597 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15599 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15603 /* Emit code to decide on runtime whether library call or inline should be
15605 if (dynamic_check != -1)
15607 if (CONST_INT_P (count_exp))
15609 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15611 emit_block_move_via_libcall (dst, src, count_exp, false);
15612 count_exp = const0_rtx;
15618 rtx hot_label = gen_label_rtx ();
15619 jump_around_label = gen_label_rtx ();
15620 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15621 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15622 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15623 emit_block_move_via_libcall (dst, src, count_exp, false);
15624 emit_jump (jump_around_label);
15625 emit_label (hot_label);
15629 /* Step 2: Alignment prologue. */
15631 if (desired_align > align)
15633 /* Except for the first move in epilogue, we no longer know
15634 constant offset in aliasing info. It don't seems to worth
15635 the pain to maintain it for the first move, so throw away
15637 src = change_address (src, BLKmode, srcreg);
15638 dst = change_address (dst, BLKmode, destreg);
15639 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15642 if (label && size_needed == 1)
15644 emit_label (label);
15645 LABEL_NUSES (label) = 1;
15649 /* Step 3: Main loop. */
15655 gcc_unreachable ();
15657 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15658 count_exp, QImode, 1, expected_size);
15661 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15662 count_exp, Pmode, 1, expected_size);
15664 case unrolled_loop:
15665 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15666 registers for 4 temporaries anyway. */
15667 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15668 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15671 case rep_prefix_8_byte:
15672 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15675 case rep_prefix_4_byte:
15676 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15679 case rep_prefix_1_byte:
15680 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15684 /* Adjust properly the offset of src and dest memory for aliasing. */
15685 if (CONST_INT_P (count_exp))
15687 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15688 (count / size_needed) * size_needed);
15689 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15690 (count / size_needed) * size_needed);
15694 src = change_address (src, BLKmode, srcreg);
15695 dst = change_address (dst, BLKmode, destreg);
15698 /* Step 4: Epilogue to copy the remaining bytes. */
15702 /* When the main loop is done, COUNT_EXP might hold original count,
15703 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15704 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15705 bytes. Compensate if needed. */
15707 if (size_needed < epilogue_size_needed)
15710 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15711 GEN_INT (size_needed - 1), count_exp, 1,
15713 if (tmp != count_exp)
15714 emit_move_insn (count_exp, tmp);
15716 emit_label (label);
15717 LABEL_NUSES (label) = 1;
15720 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15721 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15722 epilogue_size_needed);
15723 if (jump_around_label)
15724 emit_label (jump_around_label);
15728 /* Helper function for memcpy. For QImode value 0xXY produce
15729 0xXYXYXYXY of wide specified by MODE. This is essentially
15730 a * 0x10101010, but we can do slightly better than
15731 synth_mult by unwinding the sequence by hand on CPUs with
15734 promote_duplicated_reg (enum machine_mode mode, rtx val)
15736 enum machine_mode valmode = GET_MODE (val);
15738 int nops = mode == DImode ? 3 : 2;
15740 gcc_assert (mode == SImode || mode == DImode);
15741 if (val == const0_rtx)
15742 return copy_to_mode_reg (mode, const0_rtx);
15743 if (CONST_INT_P (val))
15745 HOST_WIDE_INT v = INTVAL (val) & 255;
15749 if (mode == DImode)
15750 v |= (v << 16) << 16;
15751 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15754 if (valmode == VOIDmode)
15756 if (valmode != QImode)
15757 val = gen_lowpart (QImode, val);
15758 if (mode == QImode)
15760 if (!TARGET_PARTIAL_REG_STALL)
15762 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15763 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15764 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15765 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15767 rtx reg = convert_modes (mode, QImode, val, true);
15768 tmp = promote_duplicated_reg (mode, const1_rtx);
15769 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15774 rtx reg = convert_modes (mode, QImode, val, true);
15776 if (!TARGET_PARTIAL_REG_STALL)
15777 if (mode == SImode)
15778 emit_insn (gen_movsi_insv_1 (reg, reg));
15780 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15783 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15784 NULL, 1, OPTAB_DIRECT);
15786 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15788 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15789 NULL, 1, OPTAB_DIRECT);
15790 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15791 if (mode == SImode)
15793 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15794 NULL, 1, OPTAB_DIRECT);
15795 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15800 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15801 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15802 alignment from ALIGN to DESIRED_ALIGN. */
15804 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15809 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15810 promoted_val = promote_duplicated_reg (DImode, val);
15811 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15812 promoted_val = promote_duplicated_reg (SImode, val);
15813 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15814 promoted_val = promote_duplicated_reg (HImode, val);
15816 promoted_val = val;
15818 return promoted_val;
15821 /* Expand string clear operation (bzero). Use i386 string operations when
15822 profitable. See expand_movmem comment for explanation of individual
15823 steps performed. */
15825 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15826 rtx expected_align_exp, rtx expected_size_exp)
15831 rtx jump_around_label = NULL;
15832 HOST_WIDE_INT align = 1;
15833 unsigned HOST_WIDE_INT count = 0;
15834 HOST_WIDE_INT expected_size = -1;
15835 int size_needed = 0, epilogue_size_needed;
15836 int desired_align = 0;
15837 enum stringop_alg alg;
15838 rtx promoted_val = NULL;
15839 bool force_loopy_epilogue = false;
15842 if (CONST_INT_P (align_exp))
15843 align = INTVAL (align_exp);
15844 /* i386 can do misaligned access on reasonably increased cost. */
15845 if (CONST_INT_P (expected_align_exp)
15846 && INTVAL (expected_align_exp) > align)
15847 align = INTVAL (expected_align_exp);
15848 if (CONST_INT_P (count_exp))
15849 count = expected_size = INTVAL (count_exp);
15850 if (CONST_INT_P (expected_size_exp) && count == 0)
15851 expected_size = INTVAL (expected_size_exp);
15853 /* Make sure we don't need to care about overflow later on. */
15854 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15857 /* Step 0: Decide on preferred algorithm, desired alignment and
15858 size of chunks to be copied by main loop. */
15860 alg = decide_alg (count, expected_size, true, &dynamic_check);
15861 desired_align = decide_alignment (align, alg, expected_size);
15863 if (!TARGET_ALIGN_STRINGOPS)
15864 align = desired_align;
15866 if (alg == libcall)
15868 gcc_assert (alg != no_stringop);
15870 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15871 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15876 gcc_unreachable ();
15878 size_needed = GET_MODE_SIZE (Pmode);
15880 case unrolled_loop:
15881 size_needed = GET_MODE_SIZE (Pmode) * 4;
15883 case rep_prefix_8_byte:
15886 case rep_prefix_4_byte:
15889 case rep_prefix_1_byte:
15894 epilogue_size_needed = size_needed;
15896 /* Step 1: Prologue guard. */
15898 /* Alignment code needs count to be in register. */
15899 if (CONST_INT_P (count_exp) && desired_align > align)
15901 enum machine_mode mode = SImode;
15902 if (TARGET_64BIT && (count & ~0xffffffff))
15904 count_exp = force_reg (mode, count_exp);
15906 /* Do the cheap promotion to allow better CSE across the
15907 main loop and epilogue (ie one load of the big constant in the
15908 front of all code. */
15909 if (CONST_INT_P (val_exp))
15910 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15911 desired_align, align);
15912 /* Ensure that alignment prologue won't copy past end of block. */
15913 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15915 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15916 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15917 Make sure it is power of 2. */
15918 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15920 /* To improve performance of small blocks, we jump around the VAL
15921 promoting mode. This mean that if the promoted VAL is not constant,
15922 we might not use it in the epilogue and have to use byte
15924 if (epilogue_size_needed > 2 && !promoted_val)
15925 force_loopy_epilogue = true;
15926 label = gen_label_rtx ();
15927 emit_cmp_and_jump_insns (count_exp,
15928 GEN_INT (epilogue_size_needed),
15929 LTU, 0, counter_mode (count_exp), 1, label);
15930 if (GET_CODE (count_exp) == CONST_INT)
15932 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15933 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15935 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15937 if (dynamic_check != -1)
15939 rtx hot_label = gen_label_rtx ();
15940 jump_around_label = gen_label_rtx ();
15941 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15942 LEU, 0, counter_mode (count_exp), 1, hot_label);
15943 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15944 set_storage_via_libcall (dst, count_exp, val_exp, false);
15945 emit_jump (jump_around_label);
15946 emit_label (hot_label);
15949 /* Step 2: Alignment prologue. */
15951 /* Do the expensive promotion once we branched off the small blocks. */
15953 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15954 desired_align, align);
15955 gcc_assert (desired_align >= 1 && align >= 1);
15957 if (desired_align > align)
15959 /* Except for the first move in epilogue, we no longer know
15960 constant offset in aliasing info. It don't seems to worth
15961 the pain to maintain it for the first move, so throw away
15963 dst = change_address (dst, BLKmode, destreg);
15964 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15967 if (label && size_needed == 1)
15969 emit_label (label);
15970 LABEL_NUSES (label) = 1;
15974 /* Step 3: Main loop. */
15980 gcc_unreachable ();
15982 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15983 count_exp, QImode, 1, expected_size);
15986 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15987 count_exp, Pmode, 1, expected_size);
15989 case unrolled_loop:
15990 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15991 count_exp, Pmode, 4, expected_size);
15993 case rep_prefix_8_byte:
15994 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15997 case rep_prefix_4_byte:
15998 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16001 case rep_prefix_1_byte:
16002 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16006 /* Adjust properly the offset of src and dest memory for aliasing. */
16007 if (CONST_INT_P (count_exp))
16008 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16009 (count / size_needed) * size_needed);
16011 dst = change_address (dst, BLKmode, destreg);
16013 /* Step 4: Epilogue to copy the remaining bytes. */
16017 /* When the main loop is done, COUNT_EXP might hold original count,
16018 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16019 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16020 bytes. Compensate if needed. */
16022 if (size_needed < desired_align - align)
16025 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16026 GEN_INT (size_needed - 1), count_exp, 1,
16028 size_needed = desired_align - align + 1;
16029 if (tmp != count_exp)
16030 emit_move_insn (count_exp, tmp);
16032 emit_label (label);
16033 LABEL_NUSES (label) = 1;
16035 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16037 if (force_loopy_epilogue)
16038 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16041 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16044 if (jump_around_label)
16045 emit_label (jump_around_label);
16049 /* Expand the appropriate insns for doing strlen if not just doing
16052 out = result, initialized with the start address
16053 align_rtx = alignment of the address.
16054 scratch = scratch register, initialized with the startaddress when
16055 not aligned, otherwise undefined
16057 This is just the body. It needs the initializations mentioned above and
16058 some address computing at the end. These things are done in i386.md. */
16061 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16065 rtx align_2_label = NULL_RTX;
16066 rtx align_3_label = NULL_RTX;
16067 rtx align_4_label = gen_label_rtx ();
16068 rtx end_0_label = gen_label_rtx ();
16070 rtx tmpreg = gen_reg_rtx (SImode);
16071 rtx scratch = gen_reg_rtx (SImode);
16075 if (CONST_INT_P (align_rtx))
16076 align = INTVAL (align_rtx);
16078 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16080 /* Is there a known alignment and is it less than 4? */
16083 rtx scratch1 = gen_reg_rtx (Pmode);
16084 emit_move_insn (scratch1, out);
16085 /* Is there a known alignment and is it not 2? */
16088 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16089 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16091 /* Leave just the 3 lower bits. */
16092 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16093 NULL_RTX, 0, OPTAB_WIDEN);
16095 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16096 Pmode, 1, align_4_label);
16097 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16098 Pmode, 1, align_2_label);
16099 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16100 Pmode, 1, align_3_label);
16104 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16105 check if is aligned to 4 - byte. */
16107 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16108 NULL_RTX, 0, OPTAB_WIDEN);
16110 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16111 Pmode, 1, align_4_label);
16114 mem = change_address (src, QImode, out);
16116 /* Now compare the bytes. */
16118 /* Compare the first n unaligned byte on a byte per byte basis. */
16119 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16120 QImode, 1, end_0_label);
16122 /* Increment the address. */
16124 emit_insn (gen_adddi3 (out, out, const1_rtx));
16126 emit_insn (gen_addsi3 (out, out, const1_rtx));
16128 /* Not needed with an alignment of 2 */
16131 emit_label (align_2_label);
16133 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16137 emit_insn (gen_adddi3 (out, out, const1_rtx));
16139 emit_insn (gen_addsi3 (out, out, const1_rtx));
16141 emit_label (align_3_label);
16144 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16148 emit_insn (gen_adddi3 (out, out, const1_rtx));
16150 emit_insn (gen_addsi3 (out, out, const1_rtx));
16153 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16154 align this loop. It gives only huge programs, but does not help to
16156 emit_label (align_4_label);
16158 mem = change_address (src, SImode, out);
16159 emit_move_insn (scratch, mem);
16161 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16163 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16165 /* This formula yields a nonzero result iff one of the bytes is zero.
16166 This saves three branches inside loop and many cycles. */
16168 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16169 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16170 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16171 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16172 gen_int_mode (0x80808080, SImode)));
16173 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16178 rtx reg = gen_reg_rtx (SImode);
16179 rtx reg2 = gen_reg_rtx (Pmode);
16180 emit_move_insn (reg, tmpreg);
16181 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16183 /* If zero is not in the first two bytes, move two bytes forward. */
16184 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16185 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16186 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16187 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16188 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16191 /* Emit lea manually to avoid clobbering of flags. */
16192 emit_insn (gen_rtx_SET (SImode, reg2,
16193 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16195 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16196 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16197 emit_insn (gen_rtx_SET (VOIDmode, out,
16198 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16205 rtx end_2_label = gen_label_rtx ();
16206 /* Is zero in the first two bytes? */
16208 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16209 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16210 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16211 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16212 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16214 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16215 JUMP_LABEL (tmp) = end_2_label;
16217 /* Not in the first two. Move two bytes forward. */
16218 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16220 emit_insn (gen_adddi3 (out, out, const2_rtx));
16222 emit_insn (gen_addsi3 (out, out, const2_rtx));
16224 emit_label (end_2_label);
16228 /* Avoid branch in fixing the byte. */
16229 tmpreg = gen_lowpart (QImode, tmpreg);
16230 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16231 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16233 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16235 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16237 emit_label (end_0_label);
16240 /* Expand strlen. */
16243 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16245 rtx addr, scratch1, scratch2, scratch3, scratch4;
16247 /* The generic case of strlen expander is long. Avoid it's
16248 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16250 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16251 && !TARGET_INLINE_ALL_STRINGOPS
16253 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16256 addr = force_reg (Pmode, XEXP (src, 0));
16257 scratch1 = gen_reg_rtx (Pmode);
16259 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16262 /* Well it seems that some optimizer does not combine a call like
16263 foo(strlen(bar), strlen(bar));
16264 when the move and the subtraction is done here. It does calculate
16265 the length just once when these instructions are done inside of
16266 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16267 often used and I use one fewer register for the lifetime of
16268 output_strlen_unroll() this is better. */
16270 emit_move_insn (out, addr);
16272 ix86_expand_strlensi_unroll_1 (out, src, align);
16274 /* strlensi_unroll_1 returns the address of the zero at the end of
16275 the string, like memchr(), so compute the length by subtracting
16276 the start address. */
16278 emit_insn (gen_subdi3 (out, out, addr));
16280 emit_insn (gen_subsi3 (out, out, addr));
16286 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16287 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16290 scratch2 = gen_reg_rtx (Pmode);
16291 scratch3 = gen_reg_rtx (Pmode);
16292 scratch4 = force_reg (Pmode, constm1_rtx);
16294 emit_move_insn (scratch3, addr);
16295 eoschar = force_reg (QImode, eoschar);
16297 src = replace_equiv_address_nv (src, scratch3);
16299 /* If .md starts supporting :P, this can be done in .md. */
16300 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16301 scratch4), UNSPEC_SCAS);
16302 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16305 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16306 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16310 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16311 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16317 /* For given symbol (function) construct code to compute address of it's PLT
16318 entry in large x86-64 PIC model. */
16320 construct_plt_address (rtx symbol)
16322 rtx tmp = gen_reg_rtx (Pmode);
16323 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16325 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16326 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16328 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16329 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16334 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16335 rtx callarg2 ATTRIBUTE_UNUSED,
16336 rtx pop, int sibcall)
16338 rtx use = NULL, call;
16340 if (pop == const0_rtx)
16342 gcc_assert (!TARGET_64BIT || !pop);
16344 if (TARGET_MACHO && !TARGET_64BIT)
16347 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16348 fnaddr = machopic_indirect_call_target (fnaddr);
16353 /* Static functions and indirect calls don't need the pic register. */
16354 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16355 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16356 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16357 use_reg (&use, pic_offset_table_rtx);
16360 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16362 rtx al = gen_rtx_REG (QImode, AX_REG);
16363 emit_move_insn (al, callarg2);
16364 use_reg (&use, al);
16367 if (ix86_cmodel == CM_LARGE_PIC
16368 && GET_CODE (fnaddr) == MEM
16369 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16370 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16371 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16372 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16374 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16375 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16377 if (sibcall && TARGET_64BIT
16378 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16381 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16382 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16383 emit_move_insn (fnaddr, addr);
16384 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16387 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16389 call = gen_rtx_SET (VOIDmode, retval, call);
16392 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16393 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16394 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16397 call = emit_call_insn (call);
16399 CALL_INSN_FUNCTION_USAGE (call) = use;
16403 /* Clear stack slot assignments remembered from previous functions.
16404 This is called from INIT_EXPANDERS once before RTL is emitted for each
16407 static struct machine_function *
16408 ix86_init_machine_status (void)
16410 struct machine_function *f;
16412 f = GGC_CNEW (struct machine_function);
16413 f->use_fast_prologue_epilogue_nregs = -1;
16414 f->tls_descriptor_call_expanded_p = 0;
16419 /* Return a MEM corresponding to a stack slot with mode MODE.
16420 Allocate a new slot if necessary.
16422 The RTL for a function can have several slots available: N is
16423 which slot to use. */
16426 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16428 struct stack_local_entry *s;
16430 gcc_assert (n < MAX_386_STACK_LOCALS);
16432 /* Virtual slot is valid only before vregs are instantiated. */
16433 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16435 for (s = ix86_stack_locals; s; s = s->next)
16436 if (s->mode == mode && s->n == n)
16437 return copy_rtx (s->rtl);
16439 s = (struct stack_local_entry *)
16440 ggc_alloc (sizeof (struct stack_local_entry));
16443 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16445 s->next = ix86_stack_locals;
16446 ix86_stack_locals = s;
16450 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16452 static GTY(()) rtx ix86_tls_symbol;
16454 ix86_tls_get_addr (void)
16457 if (!ix86_tls_symbol)
16459 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16460 (TARGET_ANY_GNU_TLS
16462 ? "___tls_get_addr"
16463 : "__tls_get_addr");
16466 return ix86_tls_symbol;
16469 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16471 static GTY(()) rtx ix86_tls_module_base_symbol;
16473 ix86_tls_module_base (void)
16476 if (!ix86_tls_module_base_symbol)
16478 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16479 "_TLS_MODULE_BASE_");
16480 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16481 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16484 return ix86_tls_module_base_symbol;
16487 /* Calculate the length of the memory address in the instruction
16488 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16491 memory_address_length (rtx addr)
16493 struct ix86_address parts;
16494 rtx base, index, disp;
16498 if (GET_CODE (addr) == PRE_DEC
16499 || GET_CODE (addr) == POST_INC
16500 || GET_CODE (addr) == PRE_MODIFY
16501 || GET_CODE (addr) == POST_MODIFY)
16504 ok = ix86_decompose_address (addr, &parts);
16507 if (parts.base && GET_CODE (parts.base) == SUBREG)
16508 parts.base = SUBREG_REG (parts.base);
16509 if (parts.index && GET_CODE (parts.index) == SUBREG)
16510 parts.index = SUBREG_REG (parts.index);
16513 index = parts.index;
16518 - esp as the base always wants an index,
16519 - ebp as the base always wants a displacement. */
16521 /* Register Indirect. */
16522 if (base && !index && !disp)
16524 /* esp (for its index) and ebp (for its displacement) need
16525 the two-byte modrm form. */
16526 if (addr == stack_pointer_rtx
16527 || addr == arg_pointer_rtx
16528 || addr == frame_pointer_rtx
16529 || addr == hard_frame_pointer_rtx)
16533 /* Direct Addressing. */
16534 else if (disp && !base && !index)
16539 /* Find the length of the displacement constant. */
16542 if (base && satisfies_constraint_K (disp))
16547 /* ebp always wants a displacement. */
16548 else if (base == hard_frame_pointer_rtx)
16551 /* An index requires the two-byte modrm form.... */
16553 /* ...like esp, which always wants an index. */
16554 || base == stack_pointer_rtx
16555 || base == arg_pointer_rtx
16556 || base == frame_pointer_rtx)
16563 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16564 is set, expect that insn have 8bit immediate alternative. */
16566 ix86_attr_length_immediate_default (rtx insn, int shortform)
16570 extract_insn_cached (insn);
16571 for (i = recog_data.n_operands - 1; i >= 0; --i)
16572 if (CONSTANT_P (recog_data.operand[i]))
16575 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16579 switch (get_attr_mode (insn))
16590 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16595 fatal_insn ("unknown insn mode", insn);
16601 /* Compute default value for "length_address" attribute. */
16603 ix86_attr_length_address_default (rtx insn)
16607 if (get_attr_type (insn) == TYPE_LEA)
16609 rtx set = PATTERN (insn);
16611 if (GET_CODE (set) == PARALLEL)
16612 set = XVECEXP (set, 0, 0);
16614 gcc_assert (GET_CODE (set) == SET);
16616 return memory_address_length (SET_SRC (set));
16619 extract_insn_cached (insn);
16620 for (i = recog_data.n_operands - 1; i >= 0; --i)
16621 if (MEM_P (recog_data.operand[i]))
16623 return memory_address_length (XEXP (recog_data.operand[i], 0));
16629 /* Return the maximum number of instructions a cpu can issue. */
16632 ix86_issue_rate (void)
16636 case PROCESSOR_PENTIUM:
16640 case PROCESSOR_PENTIUMPRO:
16641 case PROCESSOR_PENTIUM4:
16642 case PROCESSOR_ATHLON:
16644 case PROCESSOR_AMDFAM10:
16645 case PROCESSOR_NOCONA:
16646 case PROCESSOR_GENERIC32:
16647 case PROCESSOR_GENERIC64:
16650 case PROCESSOR_CORE2:
16658 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16659 by DEP_INSN and nothing set by DEP_INSN. */
16662 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16666 /* Simplify the test for uninteresting insns. */
16667 if (insn_type != TYPE_SETCC
16668 && insn_type != TYPE_ICMOV
16669 && insn_type != TYPE_FCMOV
16670 && insn_type != TYPE_IBR)
16673 if ((set = single_set (dep_insn)) != 0)
16675 set = SET_DEST (set);
16678 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16679 && XVECLEN (PATTERN (dep_insn), 0) == 2
16680 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16681 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16683 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16684 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16689 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16692 /* This test is true if the dependent insn reads the flags but
16693 not any other potentially set register. */
16694 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16697 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16703 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16704 address with operands set by DEP_INSN. */
16707 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16711 if (insn_type == TYPE_LEA
16714 addr = PATTERN (insn);
16716 if (GET_CODE (addr) == PARALLEL)
16717 addr = XVECEXP (addr, 0, 0);
16719 gcc_assert (GET_CODE (addr) == SET);
16721 addr = SET_SRC (addr);
16726 extract_insn_cached (insn);
16727 for (i = recog_data.n_operands - 1; i >= 0; --i)
16728 if (MEM_P (recog_data.operand[i]))
16730 addr = XEXP (recog_data.operand[i], 0);
16737 return modified_in_p (addr, dep_insn);
16741 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16743 enum attr_type insn_type, dep_insn_type;
16744 enum attr_memory memory;
16746 int dep_insn_code_number;
16748 /* Anti and output dependencies have zero cost on all CPUs. */
16749 if (REG_NOTE_KIND (link) != 0)
16752 dep_insn_code_number = recog_memoized (dep_insn);
16754 /* If we can't recognize the insns, we can't really do anything. */
16755 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16758 insn_type = get_attr_type (insn);
16759 dep_insn_type = get_attr_type (dep_insn);
16763 case PROCESSOR_PENTIUM:
16764 /* Address Generation Interlock adds a cycle of latency. */
16765 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16768 /* ??? Compares pair with jump/setcc. */
16769 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16772 /* Floating point stores require value to be ready one cycle earlier. */
16773 if (insn_type == TYPE_FMOV
16774 && get_attr_memory (insn) == MEMORY_STORE
16775 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16779 case PROCESSOR_PENTIUMPRO:
16780 memory = get_attr_memory (insn);
16782 /* INT->FP conversion is expensive. */
16783 if (get_attr_fp_int_src (dep_insn))
16786 /* There is one cycle extra latency between an FP op and a store. */
16787 if (insn_type == TYPE_FMOV
16788 && (set = single_set (dep_insn)) != NULL_RTX
16789 && (set2 = single_set (insn)) != NULL_RTX
16790 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16791 && MEM_P (SET_DEST (set2)))
16794 /* Show ability of reorder buffer to hide latency of load by executing
16795 in parallel with previous instruction in case
16796 previous instruction is not needed to compute the address. */
16797 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16798 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16800 /* Claim moves to take one cycle, as core can issue one load
16801 at time and the next load can start cycle later. */
16802 if (dep_insn_type == TYPE_IMOV
16803 || dep_insn_type == TYPE_FMOV)
16811 memory = get_attr_memory (insn);
16813 /* The esp dependency is resolved before the instruction is really
16815 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16816 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16819 /* INT->FP conversion is expensive. */
16820 if (get_attr_fp_int_src (dep_insn))
16823 /* Show ability of reorder buffer to hide latency of load by executing
16824 in parallel with previous instruction in case
16825 previous instruction is not needed to compute the address. */
16826 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16827 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16829 /* Claim moves to take one cycle, as core can issue one load
16830 at time and the next load can start cycle later. */
16831 if (dep_insn_type == TYPE_IMOV
16832 || dep_insn_type == TYPE_FMOV)
16841 case PROCESSOR_ATHLON:
16843 case PROCESSOR_AMDFAM10:
16844 case PROCESSOR_GENERIC32:
16845 case PROCESSOR_GENERIC64:
16846 memory = get_attr_memory (insn);
16848 /* Show ability of reorder buffer to hide latency of load by executing
16849 in parallel with previous instruction in case
16850 previous instruction is not needed to compute the address. */
16851 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16852 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16854 enum attr_unit unit = get_attr_unit (insn);
16857 /* Because of the difference between the length of integer and
16858 floating unit pipeline preparation stages, the memory operands
16859 for floating point are cheaper.
16861 ??? For Athlon it the difference is most probably 2. */
16862 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16865 loadcost = TARGET_ATHLON ? 2 : 0;
16867 if (cost >= loadcost)
16880 /* How many alternative schedules to try. This should be as wide as the
16881 scheduling freedom in the DFA, but no wider. Making this value too
16882 large results extra work for the scheduler. */
16885 ia32_multipass_dfa_lookahead (void)
16889 case PROCESSOR_PENTIUM:
16892 case PROCESSOR_PENTIUMPRO:
16902 /* Compute the alignment given to a constant that is being placed in memory.
16903 EXP is the constant and ALIGN is the alignment that the object would
16905 The value of this function is used instead of that alignment to align
16909 ix86_constant_alignment (tree exp, int align)
16911 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16912 || TREE_CODE (exp) == INTEGER_CST)
16914 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16916 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16919 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16920 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16921 return BITS_PER_WORD;
16926 /* Compute the alignment for a static variable.
16927 TYPE is the data type, and ALIGN is the alignment that
16928 the object would ordinarily have. The value of this function is used
16929 instead of that alignment to align the object. */
16932 ix86_data_alignment (tree type, int align)
16934 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16936 if (AGGREGATE_TYPE_P (type)
16937 && TYPE_SIZE (type)
16938 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16939 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16940 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16941 && align < max_align)
16944 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16945 to 16byte boundary. */
16948 if (AGGREGATE_TYPE_P (type)
16949 && TYPE_SIZE (type)
16950 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16951 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16952 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16956 if (TREE_CODE (type) == ARRAY_TYPE)
16958 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16960 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16963 else if (TREE_CODE (type) == COMPLEX_TYPE)
16966 if (TYPE_MODE (type) == DCmode && align < 64)
16968 if (TYPE_MODE (type) == XCmode && align < 128)
16971 else if ((TREE_CODE (type) == RECORD_TYPE
16972 || TREE_CODE (type) == UNION_TYPE
16973 || TREE_CODE (type) == QUAL_UNION_TYPE)
16974 && TYPE_FIELDS (type))
16976 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16978 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16981 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16982 || TREE_CODE (type) == INTEGER_TYPE)
16984 if (TYPE_MODE (type) == DFmode && align < 64)
16986 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16993 /* Compute the alignment for a local variable.
16994 TYPE is the data type, and ALIGN is the alignment that
16995 the object would ordinarily have. The value of this macro is used
16996 instead of that alignment to align the object. */
16999 ix86_local_alignment (tree type, int align)
17001 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17002 to 16byte boundary. */
17005 if (AGGREGATE_TYPE_P (type)
17006 && TYPE_SIZE (type)
17007 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17008 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17009 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17012 if (TREE_CODE (type) == ARRAY_TYPE)
17014 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17016 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17019 else if (TREE_CODE (type) == COMPLEX_TYPE)
17021 if (TYPE_MODE (type) == DCmode && align < 64)
17023 if (TYPE_MODE (type) == XCmode && align < 128)
17026 else if ((TREE_CODE (type) == RECORD_TYPE
17027 || TREE_CODE (type) == UNION_TYPE
17028 || TREE_CODE (type) == QUAL_UNION_TYPE)
17029 && TYPE_FIELDS (type))
17031 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17033 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17036 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17037 || TREE_CODE (type) == INTEGER_TYPE)
17040 if (TYPE_MODE (type) == DFmode && align < 64)
17042 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17048 /* Emit RTL insns to initialize the variable parts of a trampoline.
17049 FNADDR is an RTX for the address of the function's pure code.
17050 CXT is an RTX for the static chain value for the function. */
17052 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17056 /* Compute offset from the end of the jmp to the target function. */
17057 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17058 plus_constant (tramp, 10),
17059 NULL_RTX, 1, OPTAB_DIRECT);
17060 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17061 gen_int_mode (0xb9, QImode));
17062 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17063 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17064 gen_int_mode (0xe9, QImode));
17065 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17070 /* Try to load address using shorter movl instead of movabs.
17071 We may want to support movq for kernel mode, but kernel does not use
17072 trampolines at the moment. */
17073 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17075 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17076 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17077 gen_int_mode (0xbb41, HImode));
17078 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17079 gen_lowpart (SImode, fnaddr));
17084 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17085 gen_int_mode (0xbb49, HImode));
17086 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17090 /* Load static chain using movabs to r10. */
17091 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17092 gen_int_mode (0xba49, HImode));
17093 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17096 /* Jump to the r11 */
17097 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17098 gen_int_mode (0xff49, HImode));
17099 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17100 gen_int_mode (0xe3, QImode));
17102 gcc_assert (offset <= TRAMPOLINE_SIZE);
17105 #ifdef ENABLE_EXECUTE_STACK
17106 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17107 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17111 /* Codes for all the SSE/MMX builtins. */
17114 IX86_BUILTIN_ADDPS,
17115 IX86_BUILTIN_ADDSS,
17116 IX86_BUILTIN_DIVPS,
17117 IX86_BUILTIN_DIVSS,
17118 IX86_BUILTIN_MULPS,
17119 IX86_BUILTIN_MULSS,
17120 IX86_BUILTIN_SUBPS,
17121 IX86_BUILTIN_SUBSS,
17123 IX86_BUILTIN_CMPEQPS,
17124 IX86_BUILTIN_CMPLTPS,
17125 IX86_BUILTIN_CMPLEPS,
17126 IX86_BUILTIN_CMPGTPS,
17127 IX86_BUILTIN_CMPGEPS,
17128 IX86_BUILTIN_CMPNEQPS,
17129 IX86_BUILTIN_CMPNLTPS,
17130 IX86_BUILTIN_CMPNLEPS,
17131 IX86_BUILTIN_CMPNGTPS,
17132 IX86_BUILTIN_CMPNGEPS,
17133 IX86_BUILTIN_CMPORDPS,
17134 IX86_BUILTIN_CMPUNORDPS,
17135 IX86_BUILTIN_CMPEQSS,
17136 IX86_BUILTIN_CMPLTSS,
17137 IX86_BUILTIN_CMPLESS,
17138 IX86_BUILTIN_CMPNEQSS,
17139 IX86_BUILTIN_CMPNLTSS,
17140 IX86_BUILTIN_CMPNLESS,
17141 IX86_BUILTIN_CMPNGTSS,
17142 IX86_BUILTIN_CMPNGESS,
17143 IX86_BUILTIN_CMPORDSS,
17144 IX86_BUILTIN_CMPUNORDSS,
17146 IX86_BUILTIN_COMIEQSS,
17147 IX86_BUILTIN_COMILTSS,
17148 IX86_BUILTIN_COMILESS,
17149 IX86_BUILTIN_COMIGTSS,
17150 IX86_BUILTIN_COMIGESS,
17151 IX86_BUILTIN_COMINEQSS,
17152 IX86_BUILTIN_UCOMIEQSS,
17153 IX86_BUILTIN_UCOMILTSS,
17154 IX86_BUILTIN_UCOMILESS,
17155 IX86_BUILTIN_UCOMIGTSS,
17156 IX86_BUILTIN_UCOMIGESS,
17157 IX86_BUILTIN_UCOMINEQSS,
17159 IX86_BUILTIN_CVTPI2PS,
17160 IX86_BUILTIN_CVTPS2PI,
17161 IX86_BUILTIN_CVTSI2SS,
17162 IX86_BUILTIN_CVTSI642SS,
17163 IX86_BUILTIN_CVTSS2SI,
17164 IX86_BUILTIN_CVTSS2SI64,
17165 IX86_BUILTIN_CVTTPS2PI,
17166 IX86_BUILTIN_CVTTSS2SI,
17167 IX86_BUILTIN_CVTTSS2SI64,
17169 IX86_BUILTIN_MAXPS,
17170 IX86_BUILTIN_MAXSS,
17171 IX86_BUILTIN_MINPS,
17172 IX86_BUILTIN_MINSS,
17174 IX86_BUILTIN_LOADUPS,
17175 IX86_BUILTIN_STOREUPS,
17176 IX86_BUILTIN_MOVSS,
17178 IX86_BUILTIN_MOVHLPS,
17179 IX86_BUILTIN_MOVLHPS,
17180 IX86_BUILTIN_LOADHPS,
17181 IX86_BUILTIN_LOADLPS,
17182 IX86_BUILTIN_STOREHPS,
17183 IX86_BUILTIN_STORELPS,
17185 IX86_BUILTIN_MASKMOVQ,
17186 IX86_BUILTIN_MOVMSKPS,
17187 IX86_BUILTIN_PMOVMSKB,
17189 IX86_BUILTIN_MOVNTPS,
17190 IX86_BUILTIN_MOVNTQ,
17192 IX86_BUILTIN_LOADDQU,
17193 IX86_BUILTIN_STOREDQU,
17195 IX86_BUILTIN_PACKSSWB,
17196 IX86_BUILTIN_PACKSSDW,
17197 IX86_BUILTIN_PACKUSWB,
17199 IX86_BUILTIN_PADDB,
17200 IX86_BUILTIN_PADDW,
17201 IX86_BUILTIN_PADDD,
17202 IX86_BUILTIN_PADDQ,
17203 IX86_BUILTIN_PADDSB,
17204 IX86_BUILTIN_PADDSW,
17205 IX86_BUILTIN_PADDUSB,
17206 IX86_BUILTIN_PADDUSW,
17207 IX86_BUILTIN_PSUBB,
17208 IX86_BUILTIN_PSUBW,
17209 IX86_BUILTIN_PSUBD,
17210 IX86_BUILTIN_PSUBQ,
17211 IX86_BUILTIN_PSUBSB,
17212 IX86_BUILTIN_PSUBSW,
17213 IX86_BUILTIN_PSUBUSB,
17214 IX86_BUILTIN_PSUBUSW,
17217 IX86_BUILTIN_PANDN,
17221 IX86_BUILTIN_PAVGB,
17222 IX86_BUILTIN_PAVGW,
17224 IX86_BUILTIN_PCMPEQB,
17225 IX86_BUILTIN_PCMPEQW,
17226 IX86_BUILTIN_PCMPEQD,
17227 IX86_BUILTIN_PCMPGTB,
17228 IX86_BUILTIN_PCMPGTW,
17229 IX86_BUILTIN_PCMPGTD,
17231 IX86_BUILTIN_PMADDWD,
17233 IX86_BUILTIN_PMAXSW,
17234 IX86_BUILTIN_PMAXUB,
17235 IX86_BUILTIN_PMINSW,
17236 IX86_BUILTIN_PMINUB,
17238 IX86_BUILTIN_PMULHUW,
17239 IX86_BUILTIN_PMULHW,
17240 IX86_BUILTIN_PMULLW,
17242 IX86_BUILTIN_PSADBW,
17243 IX86_BUILTIN_PSHUFW,
17245 IX86_BUILTIN_PSLLW,
17246 IX86_BUILTIN_PSLLD,
17247 IX86_BUILTIN_PSLLQ,
17248 IX86_BUILTIN_PSRAW,
17249 IX86_BUILTIN_PSRAD,
17250 IX86_BUILTIN_PSRLW,
17251 IX86_BUILTIN_PSRLD,
17252 IX86_BUILTIN_PSRLQ,
17253 IX86_BUILTIN_PSLLWI,
17254 IX86_BUILTIN_PSLLDI,
17255 IX86_BUILTIN_PSLLQI,
17256 IX86_BUILTIN_PSRAWI,
17257 IX86_BUILTIN_PSRADI,
17258 IX86_BUILTIN_PSRLWI,
17259 IX86_BUILTIN_PSRLDI,
17260 IX86_BUILTIN_PSRLQI,
17262 IX86_BUILTIN_PUNPCKHBW,
17263 IX86_BUILTIN_PUNPCKHWD,
17264 IX86_BUILTIN_PUNPCKHDQ,
17265 IX86_BUILTIN_PUNPCKLBW,
17266 IX86_BUILTIN_PUNPCKLWD,
17267 IX86_BUILTIN_PUNPCKLDQ,
17269 IX86_BUILTIN_SHUFPS,
17271 IX86_BUILTIN_RCPPS,
17272 IX86_BUILTIN_RCPSS,
17273 IX86_BUILTIN_RSQRTPS,
17274 IX86_BUILTIN_RSQRTPS_NR,
17275 IX86_BUILTIN_RSQRTSS,
17276 IX86_BUILTIN_RSQRTF,
17277 IX86_BUILTIN_SQRTPS,
17278 IX86_BUILTIN_SQRTPS_NR,
17279 IX86_BUILTIN_SQRTSS,
17281 IX86_BUILTIN_UNPCKHPS,
17282 IX86_BUILTIN_UNPCKLPS,
17284 IX86_BUILTIN_ANDPS,
17285 IX86_BUILTIN_ANDNPS,
17287 IX86_BUILTIN_XORPS,
17290 IX86_BUILTIN_LDMXCSR,
17291 IX86_BUILTIN_STMXCSR,
17292 IX86_BUILTIN_SFENCE,
17294 /* 3DNow! Original */
17295 IX86_BUILTIN_FEMMS,
17296 IX86_BUILTIN_PAVGUSB,
17297 IX86_BUILTIN_PF2ID,
17298 IX86_BUILTIN_PFACC,
17299 IX86_BUILTIN_PFADD,
17300 IX86_BUILTIN_PFCMPEQ,
17301 IX86_BUILTIN_PFCMPGE,
17302 IX86_BUILTIN_PFCMPGT,
17303 IX86_BUILTIN_PFMAX,
17304 IX86_BUILTIN_PFMIN,
17305 IX86_BUILTIN_PFMUL,
17306 IX86_BUILTIN_PFRCP,
17307 IX86_BUILTIN_PFRCPIT1,
17308 IX86_BUILTIN_PFRCPIT2,
17309 IX86_BUILTIN_PFRSQIT1,
17310 IX86_BUILTIN_PFRSQRT,
17311 IX86_BUILTIN_PFSUB,
17312 IX86_BUILTIN_PFSUBR,
17313 IX86_BUILTIN_PI2FD,
17314 IX86_BUILTIN_PMULHRW,
17316 /* 3DNow! Athlon Extensions */
17317 IX86_BUILTIN_PF2IW,
17318 IX86_BUILTIN_PFNACC,
17319 IX86_BUILTIN_PFPNACC,
17320 IX86_BUILTIN_PI2FW,
17321 IX86_BUILTIN_PSWAPDSI,
17322 IX86_BUILTIN_PSWAPDSF,
17325 IX86_BUILTIN_ADDPD,
17326 IX86_BUILTIN_ADDSD,
17327 IX86_BUILTIN_DIVPD,
17328 IX86_BUILTIN_DIVSD,
17329 IX86_BUILTIN_MULPD,
17330 IX86_BUILTIN_MULSD,
17331 IX86_BUILTIN_SUBPD,
17332 IX86_BUILTIN_SUBSD,
17334 IX86_BUILTIN_CMPEQPD,
17335 IX86_BUILTIN_CMPLTPD,
17336 IX86_BUILTIN_CMPLEPD,
17337 IX86_BUILTIN_CMPGTPD,
17338 IX86_BUILTIN_CMPGEPD,
17339 IX86_BUILTIN_CMPNEQPD,
17340 IX86_BUILTIN_CMPNLTPD,
17341 IX86_BUILTIN_CMPNLEPD,
17342 IX86_BUILTIN_CMPNGTPD,
17343 IX86_BUILTIN_CMPNGEPD,
17344 IX86_BUILTIN_CMPORDPD,
17345 IX86_BUILTIN_CMPUNORDPD,
17346 IX86_BUILTIN_CMPEQSD,
17347 IX86_BUILTIN_CMPLTSD,
17348 IX86_BUILTIN_CMPLESD,
17349 IX86_BUILTIN_CMPNEQSD,
17350 IX86_BUILTIN_CMPNLTSD,
17351 IX86_BUILTIN_CMPNLESD,
17352 IX86_BUILTIN_CMPORDSD,
17353 IX86_BUILTIN_CMPUNORDSD,
17355 IX86_BUILTIN_COMIEQSD,
17356 IX86_BUILTIN_COMILTSD,
17357 IX86_BUILTIN_COMILESD,
17358 IX86_BUILTIN_COMIGTSD,
17359 IX86_BUILTIN_COMIGESD,
17360 IX86_BUILTIN_COMINEQSD,
17361 IX86_BUILTIN_UCOMIEQSD,
17362 IX86_BUILTIN_UCOMILTSD,
17363 IX86_BUILTIN_UCOMILESD,
17364 IX86_BUILTIN_UCOMIGTSD,
17365 IX86_BUILTIN_UCOMIGESD,
17366 IX86_BUILTIN_UCOMINEQSD,
17368 IX86_BUILTIN_MAXPD,
17369 IX86_BUILTIN_MAXSD,
17370 IX86_BUILTIN_MINPD,
17371 IX86_BUILTIN_MINSD,
17373 IX86_BUILTIN_ANDPD,
17374 IX86_BUILTIN_ANDNPD,
17376 IX86_BUILTIN_XORPD,
17378 IX86_BUILTIN_SQRTPD,
17379 IX86_BUILTIN_SQRTSD,
17381 IX86_BUILTIN_UNPCKHPD,
17382 IX86_BUILTIN_UNPCKLPD,
17384 IX86_BUILTIN_SHUFPD,
17386 IX86_BUILTIN_LOADUPD,
17387 IX86_BUILTIN_STOREUPD,
17388 IX86_BUILTIN_MOVSD,
17390 IX86_BUILTIN_LOADHPD,
17391 IX86_BUILTIN_LOADLPD,
17393 IX86_BUILTIN_CVTDQ2PD,
17394 IX86_BUILTIN_CVTDQ2PS,
17396 IX86_BUILTIN_CVTPD2DQ,
17397 IX86_BUILTIN_CVTPD2PI,
17398 IX86_BUILTIN_CVTPD2PS,
17399 IX86_BUILTIN_CVTTPD2DQ,
17400 IX86_BUILTIN_CVTTPD2PI,
17402 IX86_BUILTIN_CVTPI2PD,
17403 IX86_BUILTIN_CVTSI2SD,
17404 IX86_BUILTIN_CVTSI642SD,
17406 IX86_BUILTIN_CVTSD2SI,
17407 IX86_BUILTIN_CVTSD2SI64,
17408 IX86_BUILTIN_CVTSD2SS,
17409 IX86_BUILTIN_CVTSS2SD,
17410 IX86_BUILTIN_CVTTSD2SI,
17411 IX86_BUILTIN_CVTTSD2SI64,
17413 IX86_BUILTIN_CVTPS2DQ,
17414 IX86_BUILTIN_CVTPS2PD,
17415 IX86_BUILTIN_CVTTPS2DQ,
17417 IX86_BUILTIN_MOVNTI,
17418 IX86_BUILTIN_MOVNTPD,
17419 IX86_BUILTIN_MOVNTDQ,
17422 IX86_BUILTIN_MASKMOVDQU,
17423 IX86_BUILTIN_MOVMSKPD,
17424 IX86_BUILTIN_PMOVMSKB128,
17426 IX86_BUILTIN_PACKSSWB128,
17427 IX86_BUILTIN_PACKSSDW128,
17428 IX86_BUILTIN_PACKUSWB128,
17430 IX86_BUILTIN_PADDB128,
17431 IX86_BUILTIN_PADDW128,
17432 IX86_BUILTIN_PADDD128,
17433 IX86_BUILTIN_PADDQ128,
17434 IX86_BUILTIN_PADDSB128,
17435 IX86_BUILTIN_PADDSW128,
17436 IX86_BUILTIN_PADDUSB128,
17437 IX86_BUILTIN_PADDUSW128,
17438 IX86_BUILTIN_PSUBB128,
17439 IX86_BUILTIN_PSUBW128,
17440 IX86_BUILTIN_PSUBD128,
17441 IX86_BUILTIN_PSUBQ128,
17442 IX86_BUILTIN_PSUBSB128,
17443 IX86_BUILTIN_PSUBSW128,
17444 IX86_BUILTIN_PSUBUSB128,
17445 IX86_BUILTIN_PSUBUSW128,
17447 IX86_BUILTIN_PAND128,
17448 IX86_BUILTIN_PANDN128,
17449 IX86_BUILTIN_POR128,
17450 IX86_BUILTIN_PXOR128,
17452 IX86_BUILTIN_PAVGB128,
17453 IX86_BUILTIN_PAVGW128,
17455 IX86_BUILTIN_PCMPEQB128,
17456 IX86_BUILTIN_PCMPEQW128,
17457 IX86_BUILTIN_PCMPEQD128,
17458 IX86_BUILTIN_PCMPGTB128,
17459 IX86_BUILTIN_PCMPGTW128,
17460 IX86_BUILTIN_PCMPGTD128,
17462 IX86_BUILTIN_PMADDWD128,
17464 IX86_BUILTIN_PMAXSW128,
17465 IX86_BUILTIN_PMAXUB128,
17466 IX86_BUILTIN_PMINSW128,
17467 IX86_BUILTIN_PMINUB128,
17469 IX86_BUILTIN_PMULUDQ,
17470 IX86_BUILTIN_PMULUDQ128,
17471 IX86_BUILTIN_PMULHUW128,
17472 IX86_BUILTIN_PMULHW128,
17473 IX86_BUILTIN_PMULLW128,
17475 IX86_BUILTIN_PSADBW128,
17476 IX86_BUILTIN_PSHUFHW,
17477 IX86_BUILTIN_PSHUFLW,
17478 IX86_BUILTIN_PSHUFD,
17480 IX86_BUILTIN_PSLLDQI128,
17481 IX86_BUILTIN_PSLLWI128,
17482 IX86_BUILTIN_PSLLDI128,
17483 IX86_BUILTIN_PSLLQI128,
17484 IX86_BUILTIN_PSRAWI128,
17485 IX86_BUILTIN_PSRADI128,
17486 IX86_BUILTIN_PSRLDQI128,
17487 IX86_BUILTIN_PSRLWI128,
17488 IX86_BUILTIN_PSRLDI128,
17489 IX86_BUILTIN_PSRLQI128,
17491 IX86_BUILTIN_PSLLDQ128,
17492 IX86_BUILTIN_PSLLW128,
17493 IX86_BUILTIN_PSLLD128,
17494 IX86_BUILTIN_PSLLQ128,
17495 IX86_BUILTIN_PSRAW128,
17496 IX86_BUILTIN_PSRAD128,
17497 IX86_BUILTIN_PSRLW128,
17498 IX86_BUILTIN_PSRLD128,
17499 IX86_BUILTIN_PSRLQ128,
17501 IX86_BUILTIN_PUNPCKHBW128,
17502 IX86_BUILTIN_PUNPCKHWD128,
17503 IX86_BUILTIN_PUNPCKHDQ128,
17504 IX86_BUILTIN_PUNPCKHQDQ128,
17505 IX86_BUILTIN_PUNPCKLBW128,
17506 IX86_BUILTIN_PUNPCKLWD128,
17507 IX86_BUILTIN_PUNPCKLDQ128,
17508 IX86_BUILTIN_PUNPCKLQDQ128,
17510 IX86_BUILTIN_CLFLUSH,
17511 IX86_BUILTIN_MFENCE,
17512 IX86_BUILTIN_LFENCE,
17515 IX86_BUILTIN_ADDSUBPS,
17516 IX86_BUILTIN_HADDPS,
17517 IX86_BUILTIN_HSUBPS,
17518 IX86_BUILTIN_MOVSHDUP,
17519 IX86_BUILTIN_MOVSLDUP,
17520 IX86_BUILTIN_ADDSUBPD,
17521 IX86_BUILTIN_HADDPD,
17522 IX86_BUILTIN_HSUBPD,
17523 IX86_BUILTIN_LDDQU,
17525 IX86_BUILTIN_MONITOR,
17526 IX86_BUILTIN_MWAIT,
17529 IX86_BUILTIN_PHADDW,
17530 IX86_BUILTIN_PHADDD,
17531 IX86_BUILTIN_PHADDSW,
17532 IX86_BUILTIN_PHSUBW,
17533 IX86_BUILTIN_PHSUBD,
17534 IX86_BUILTIN_PHSUBSW,
17535 IX86_BUILTIN_PMADDUBSW,
17536 IX86_BUILTIN_PMULHRSW,
17537 IX86_BUILTIN_PSHUFB,
17538 IX86_BUILTIN_PSIGNB,
17539 IX86_BUILTIN_PSIGNW,
17540 IX86_BUILTIN_PSIGND,
17541 IX86_BUILTIN_PALIGNR,
17542 IX86_BUILTIN_PABSB,
17543 IX86_BUILTIN_PABSW,
17544 IX86_BUILTIN_PABSD,
17546 IX86_BUILTIN_PHADDW128,
17547 IX86_BUILTIN_PHADDD128,
17548 IX86_BUILTIN_PHADDSW128,
17549 IX86_BUILTIN_PHSUBW128,
17550 IX86_BUILTIN_PHSUBD128,
17551 IX86_BUILTIN_PHSUBSW128,
17552 IX86_BUILTIN_PMADDUBSW128,
17553 IX86_BUILTIN_PMULHRSW128,
17554 IX86_BUILTIN_PSHUFB128,
17555 IX86_BUILTIN_PSIGNB128,
17556 IX86_BUILTIN_PSIGNW128,
17557 IX86_BUILTIN_PSIGND128,
17558 IX86_BUILTIN_PALIGNR128,
17559 IX86_BUILTIN_PABSB128,
17560 IX86_BUILTIN_PABSW128,
17561 IX86_BUILTIN_PABSD128,
17563 /* AMDFAM10 - SSE4A New Instructions. */
17564 IX86_BUILTIN_MOVNTSD,
17565 IX86_BUILTIN_MOVNTSS,
17566 IX86_BUILTIN_EXTRQI,
17567 IX86_BUILTIN_EXTRQ,
17568 IX86_BUILTIN_INSERTQI,
17569 IX86_BUILTIN_INSERTQ,
17572 IX86_BUILTIN_BLENDPD,
17573 IX86_BUILTIN_BLENDPS,
17574 IX86_BUILTIN_BLENDVPD,
17575 IX86_BUILTIN_BLENDVPS,
17576 IX86_BUILTIN_PBLENDVB128,
17577 IX86_BUILTIN_PBLENDW128,
17582 IX86_BUILTIN_INSERTPS128,
17584 IX86_BUILTIN_MOVNTDQA,
17585 IX86_BUILTIN_MPSADBW128,
17586 IX86_BUILTIN_PACKUSDW128,
17587 IX86_BUILTIN_PCMPEQQ,
17588 IX86_BUILTIN_PHMINPOSUW128,
17590 IX86_BUILTIN_PMAXSB128,
17591 IX86_BUILTIN_PMAXSD128,
17592 IX86_BUILTIN_PMAXUD128,
17593 IX86_BUILTIN_PMAXUW128,
17595 IX86_BUILTIN_PMINSB128,
17596 IX86_BUILTIN_PMINSD128,
17597 IX86_BUILTIN_PMINUD128,
17598 IX86_BUILTIN_PMINUW128,
17600 IX86_BUILTIN_PMOVSXBW128,
17601 IX86_BUILTIN_PMOVSXBD128,
17602 IX86_BUILTIN_PMOVSXBQ128,
17603 IX86_BUILTIN_PMOVSXWD128,
17604 IX86_BUILTIN_PMOVSXWQ128,
17605 IX86_BUILTIN_PMOVSXDQ128,
17607 IX86_BUILTIN_PMOVZXBW128,
17608 IX86_BUILTIN_PMOVZXBD128,
17609 IX86_BUILTIN_PMOVZXBQ128,
17610 IX86_BUILTIN_PMOVZXWD128,
17611 IX86_BUILTIN_PMOVZXWQ128,
17612 IX86_BUILTIN_PMOVZXDQ128,
17614 IX86_BUILTIN_PMULDQ128,
17615 IX86_BUILTIN_PMULLD128,
17617 IX86_BUILTIN_ROUNDPD,
17618 IX86_BUILTIN_ROUNDPS,
17619 IX86_BUILTIN_ROUNDSD,
17620 IX86_BUILTIN_ROUNDSS,
17622 IX86_BUILTIN_PTESTZ,
17623 IX86_BUILTIN_PTESTC,
17624 IX86_BUILTIN_PTESTNZC,
17626 IX86_BUILTIN_VEC_INIT_V2SI,
17627 IX86_BUILTIN_VEC_INIT_V4HI,
17628 IX86_BUILTIN_VEC_INIT_V8QI,
17629 IX86_BUILTIN_VEC_EXT_V2DF,
17630 IX86_BUILTIN_VEC_EXT_V2DI,
17631 IX86_BUILTIN_VEC_EXT_V4SF,
17632 IX86_BUILTIN_VEC_EXT_V4SI,
17633 IX86_BUILTIN_VEC_EXT_V8HI,
17634 IX86_BUILTIN_VEC_EXT_V2SI,
17635 IX86_BUILTIN_VEC_EXT_V4HI,
17636 IX86_BUILTIN_VEC_EXT_V16QI,
17637 IX86_BUILTIN_VEC_SET_V2DI,
17638 IX86_BUILTIN_VEC_SET_V4SF,
17639 IX86_BUILTIN_VEC_SET_V4SI,
17640 IX86_BUILTIN_VEC_SET_V8HI,
17641 IX86_BUILTIN_VEC_SET_V4HI,
17642 IX86_BUILTIN_VEC_SET_V16QI,
17644 IX86_BUILTIN_VEC_PACK_SFIX,
17647 IX86_BUILTIN_CRC32QI,
17648 IX86_BUILTIN_CRC32HI,
17649 IX86_BUILTIN_CRC32SI,
17650 IX86_BUILTIN_CRC32DI,
17652 IX86_BUILTIN_PCMPESTRI128,
17653 IX86_BUILTIN_PCMPESTRM128,
17654 IX86_BUILTIN_PCMPESTRA128,
17655 IX86_BUILTIN_PCMPESTRC128,
17656 IX86_BUILTIN_PCMPESTRO128,
17657 IX86_BUILTIN_PCMPESTRS128,
17658 IX86_BUILTIN_PCMPESTRZ128,
17659 IX86_BUILTIN_PCMPISTRI128,
17660 IX86_BUILTIN_PCMPISTRM128,
17661 IX86_BUILTIN_PCMPISTRA128,
17662 IX86_BUILTIN_PCMPISTRC128,
17663 IX86_BUILTIN_PCMPISTRO128,
17664 IX86_BUILTIN_PCMPISTRS128,
17665 IX86_BUILTIN_PCMPISTRZ128,
17667 IX86_BUILTIN_PCMPGTQ,
17669 /* AES instructions */
17670 IX86_BUILTIN_AESENC128,
17671 IX86_BUILTIN_AESENCLAST128,
17672 IX86_BUILTIN_AESDEC128,
17673 IX86_BUILTIN_AESDECLAST128,
17674 IX86_BUILTIN_AESIMC128,
17675 IX86_BUILTIN_AESKEYGENASSIST128,
17677 /* PCLMUL instruction */
17678 IX86_BUILTIN_PCLMULQDQ128,
17680 /* TFmode support builtins. */
17682 IX86_BUILTIN_FABSQ,
17683 IX86_BUILTIN_COPYSIGNQ,
17685 /* SSE5 instructions */
17686 IX86_BUILTIN_FMADDSS,
17687 IX86_BUILTIN_FMADDSD,
17688 IX86_BUILTIN_FMADDPS,
17689 IX86_BUILTIN_FMADDPD,
17690 IX86_BUILTIN_FMSUBSS,
17691 IX86_BUILTIN_FMSUBSD,
17692 IX86_BUILTIN_FMSUBPS,
17693 IX86_BUILTIN_FMSUBPD,
17694 IX86_BUILTIN_FNMADDSS,
17695 IX86_BUILTIN_FNMADDSD,
17696 IX86_BUILTIN_FNMADDPS,
17697 IX86_BUILTIN_FNMADDPD,
17698 IX86_BUILTIN_FNMSUBSS,
17699 IX86_BUILTIN_FNMSUBSD,
17700 IX86_BUILTIN_FNMSUBPS,
17701 IX86_BUILTIN_FNMSUBPD,
17702 IX86_BUILTIN_PCMOV_V2DI,
17703 IX86_BUILTIN_PCMOV_V4SI,
17704 IX86_BUILTIN_PCMOV_V8HI,
17705 IX86_BUILTIN_PCMOV_V16QI,
17706 IX86_BUILTIN_PCMOV_V4SF,
17707 IX86_BUILTIN_PCMOV_V2DF,
17708 IX86_BUILTIN_PPERM,
17709 IX86_BUILTIN_PERMPS,
17710 IX86_BUILTIN_PERMPD,
17711 IX86_BUILTIN_PMACSSWW,
17712 IX86_BUILTIN_PMACSWW,
17713 IX86_BUILTIN_PMACSSWD,
17714 IX86_BUILTIN_PMACSWD,
17715 IX86_BUILTIN_PMACSSDD,
17716 IX86_BUILTIN_PMACSDD,
17717 IX86_BUILTIN_PMACSSDQL,
17718 IX86_BUILTIN_PMACSSDQH,
17719 IX86_BUILTIN_PMACSDQL,
17720 IX86_BUILTIN_PMACSDQH,
17721 IX86_BUILTIN_PMADCSSWD,
17722 IX86_BUILTIN_PMADCSWD,
17723 IX86_BUILTIN_PHADDBW,
17724 IX86_BUILTIN_PHADDBD,
17725 IX86_BUILTIN_PHADDBQ,
17726 IX86_BUILTIN_PHADDWD,
17727 IX86_BUILTIN_PHADDWQ,
17728 IX86_BUILTIN_PHADDDQ,
17729 IX86_BUILTIN_PHADDUBW,
17730 IX86_BUILTIN_PHADDUBD,
17731 IX86_BUILTIN_PHADDUBQ,
17732 IX86_BUILTIN_PHADDUWD,
17733 IX86_BUILTIN_PHADDUWQ,
17734 IX86_BUILTIN_PHADDUDQ,
17735 IX86_BUILTIN_PHSUBBW,
17736 IX86_BUILTIN_PHSUBWD,
17737 IX86_BUILTIN_PHSUBDQ,
17738 IX86_BUILTIN_PROTB,
17739 IX86_BUILTIN_PROTW,
17740 IX86_BUILTIN_PROTD,
17741 IX86_BUILTIN_PROTQ,
17742 IX86_BUILTIN_PROTB_IMM,
17743 IX86_BUILTIN_PROTW_IMM,
17744 IX86_BUILTIN_PROTD_IMM,
17745 IX86_BUILTIN_PROTQ_IMM,
17746 IX86_BUILTIN_PSHLB,
17747 IX86_BUILTIN_PSHLW,
17748 IX86_BUILTIN_PSHLD,
17749 IX86_BUILTIN_PSHLQ,
17750 IX86_BUILTIN_PSHAB,
17751 IX86_BUILTIN_PSHAW,
17752 IX86_BUILTIN_PSHAD,
17753 IX86_BUILTIN_PSHAQ,
17754 IX86_BUILTIN_FRCZSS,
17755 IX86_BUILTIN_FRCZSD,
17756 IX86_BUILTIN_FRCZPS,
17757 IX86_BUILTIN_FRCZPD,
17758 IX86_BUILTIN_CVTPH2PS,
17759 IX86_BUILTIN_CVTPS2PH,
17761 IX86_BUILTIN_COMEQSS,
17762 IX86_BUILTIN_COMNESS,
17763 IX86_BUILTIN_COMLTSS,
17764 IX86_BUILTIN_COMLESS,
17765 IX86_BUILTIN_COMGTSS,
17766 IX86_BUILTIN_COMGESS,
17767 IX86_BUILTIN_COMUEQSS,
17768 IX86_BUILTIN_COMUNESS,
17769 IX86_BUILTIN_COMULTSS,
17770 IX86_BUILTIN_COMULESS,
17771 IX86_BUILTIN_COMUGTSS,
17772 IX86_BUILTIN_COMUGESS,
17773 IX86_BUILTIN_COMORDSS,
17774 IX86_BUILTIN_COMUNORDSS,
17775 IX86_BUILTIN_COMFALSESS,
17776 IX86_BUILTIN_COMTRUESS,
17778 IX86_BUILTIN_COMEQSD,
17779 IX86_BUILTIN_COMNESD,
17780 IX86_BUILTIN_COMLTSD,
17781 IX86_BUILTIN_COMLESD,
17782 IX86_BUILTIN_COMGTSD,
17783 IX86_BUILTIN_COMGESD,
17784 IX86_BUILTIN_COMUEQSD,
17785 IX86_BUILTIN_COMUNESD,
17786 IX86_BUILTIN_COMULTSD,
17787 IX86_BUILTIN_COMULESD,
17788 IX86_BUILTIN_COMUGTSD,
17789 IX86_BUILTIN_COMUGESD,
17790 IX86_BUILTIN_COMORDSD,
17791 IX86_BUILTIN_COMUNORDSD,
17792 IX86_BUILTIN_COMFALSESD,
17793 IX86_BUILTIN_COMTRUESD,
17795 IX86_BUILTIN_COMEQPS,
17796 IX86_BUILTIN_COMNEPS,
17797 IX86_BUILTIN_COMLTPS,
17798 IX86_BUILTIN_COMLEPS,
17799 IX86_BUILTIN_COMGTPS,
17800 IX86_BUILTIN_COMGEPS,
17801 IX86_BUILTIN_COMUEQPS,
17802 IX86_BUILTIN_COMUNEPS,
17803 IX86_BUILTIN_COMULTPS,
17804 IX86_BUILTIN_COMULEPS,
17805 IX86_BUILTIN_COMUGTPS,
17806 IX86_BUILTIN_COMUGEPS,
17807 IX86_BUILTIN_COMORDPS,
17808 IX86_BUILTIN_COMUNORDPS,
17809 IX86_BUILTIN_COMFALSEPS,
17810 IX86_BUILTIN_COMTRUEPS,
17812 IX86_BUILTIN_COMEQPD,
17813 IX86_BUILTIN_COMNEPD,
17814 IX86_BUILTIN_COMLTPD,
17815 IX86_BUILTIN_COMLEPD,
17816 IX86_BUILTIN_COMGTPD,
17817 IX86_BUILTIN_COMGEPD,
17818 IX86_BUILTIN_COMUEQPD,
17819 IX86_BUILTIN_COMUNEPD,
17820 IX86_BUILTIN_COMULTPD,
17821 IX86_BUILTIN_COMULEPD,
17822 IX86_BUILTIN_COMUGTPD,
17823 IX86_BUILTIN_COMUGEPD,
17824 IX86_BUILTIN_COMORDPD,
17825 IX86_BUILTIN_COMUNORDPD,
17826 IX86_BUILTIN_COMFALSEPD,
17827 IX86_BUILTIN_COMTRUEPD,
17829 IX86_BUILTIN_PCOMEQUB,
17830 IX86_BUILTIN_PCOMNEUB,
17831 IX86_BUILTIN_PCOMLTUB,
17832 IX86_BUILTIN_PCOMLEUB,
17833 IX86_BUILTIN_PCOMGTUB,
17834 IX86_BUILTIN_PCOMGEUB,
17835 IX86_BUILTIN_PCOMFALSEUB,
17836 IX86_BUILTIN_PCOMTRUEUB,
17837 IX86_BUILTIN_PCOMEQUW,
17838 IX86_BUILTIN_PCOMNEUW,
17839 IX86_BUILTIN_PCOMLTUW,
17840 IX86_BUILTIN_PCOMLEUW,
17841 IX86_BUILTIN_PCOMGTUW,
17842 IX86_BUILTIN_PCOMGEUW,
17843 IX86_BUILTIN_PCOMFALSEUW,
17844 IX86_BUILTIN_PCOMTRUEUW,
17845 IX86_BUILTIN_PCOMEQUD,
17846 IX86_BUILTIN_PCOMNEUD,
17847 IX86_BUILTIN_PCOMLTUD,
17848 IX86_BUILTIN_PCOMLEUD,
17849 IX86_BUILTIN_PCOMGTUD,
17850 IX86_BUILTIN_PCOMGEUD,
17851 IX86_BUILTIN_PCOMFALSEUD,
17852 IX86_BUILTIN_PCOMTRUEUD,
17853 IX86_BUILTIN_PCOMEQUQ,
17854 IX86_BUILTIN_PCOMNEUQ,
17855 IX86_BUILTIN_PCOMLTUQ,
17856 IX86_BUILTIN_PCOMLEUQ,
17857 IX86_BUILTIN_PCOMGTUQ,
17858 IX86_BUILTIN_PCOMGEUQ,
17859 IX86_BUILTIN_PCOMFALSEUQ,
17860 IX86_BUILTIN_PCOMTRUEUQ,
17862 IX86_BUILTIN_PCOMEQB,
17863 IX86_BUILTIN_PCOMNEB,
17864 IX86_BUILTIN_PCOMLTB,
17865 IX86_BUILTIN_PCOMLEB,
17866 IX86_BUILTIN_PCOMGTB,
17867 IX86_BUILTIN_PCOMGEB,
17868 IX86_BUILTIN_PCOMFALSEB,
17869 IX86_BUILTIN_PCOMTRUEB,
17870 IX86_BUILTIN_PCOMEQW,
17871 IX86_BUILTIN_PCOMNEW,
17872 IX86_BUILTIN_PCOMLTW,
17873 IX86_BUILTIN_PCOMLEW,
17874 IX86_BUILTIN_PCOMGTW,
17875 IX86_BUILTIN_PCOMGEW,
17876 IX86_BUILTIN_PCOMFALSEW,
17877 IX86_BUILTIN_PCOMTRUEW,
17878 IX86_BUILTIN_PCOMEQD,
17879 IX86_BUILTIN_PCOMNED,
17880 IX86_BUILTIN_PCOMLTD,
17881 IX86_BUILTIN_PCOMLED,
17882 IX86_BUILTIN_PCOMGTD,
17883 IX86_BUILTIN_PCOMGED,
17884 IX86_BUILTIN_PCOMFALSED,
17885 IX86_BUILTIN_PCOMTRUED,
17886 IX86_BUILTIN_PCOMEQQ,
17887 IX86_BUILTIN_PCOMNEQ,
17888 IX86_BUILTIN_PCOMLTQ,
17889 IX86_BUILTIN_PCOMLEQ,
17890 IX86_BUILTIN_PCOMGTQ,
17891 IX86_BUILTIN_PCOMGEQ,
17892 IX86_BUILTIN_PCOMFALSEQ,
17893 IX86_BUILTIN_PCOMTRUEQ,
17898 /* Table for the ix86 builtin decls. */
17899 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17901 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17902 * if the target_flags include one of MASK. Stores the function decl
17903 * in the ix86_builtins array.
17904 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17907 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17909 tree decl = NULL_TREE;
17911 if (mask & ix86_isa_flags
17912 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17914 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17916 ix86_builtins[(int) code] = decl;
17922 /* Like def_builtin, but also marks the function decl "const". */
17925 def_builtin_const (int mask, const char *name, tree type,
17926 enum ix86_builtins code)
17928 tree decl = def_builtin (mask, name, type, code);
17930 TREE_READONLY (decl) = 1;
17934 /* Bits for builtin_description.flag. */
17936 /* Set when we don't support the comparison natively, and should
17937 swap_comparison in order to support it. */
17938 #define BUILTIN_DESC_SWAP_OPERANDS 1
17940 struct builtin_description
17942 const unsigned int mask;
17943 const enum insn_code icode;
17944 const char *const name;
17945 const enum ix86_builtins code;
17946 const enum rtx_code comparison;
17950 static const struct builtin_description bdesc_comi[] =
17952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17978 static const struct builtin_description bdesc_ptest[] =
17981 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17982 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17983 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17986 static const struct builtin_description bdesc_pcmpestr[] =
17989 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17990 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17992 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17993 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17994 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17995 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17998 static const struct builtin_description bdesc_pcmpistr[] =
18001 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18002 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18003 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18004 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18005 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18006 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18007 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18010 /* Special builtin types */
18011 enum ix86_special_builtin_type
18013 SPECIAL_FTYPE_UNKNOWN,
18015 V16QI_FTYPE_PCCHAR,
18016 V4SF_FTYPE_PCFLOAT,
18017 V2DF_FTYPE_PCDOUBLE,
18018 V4SF_FTYPE_V4SF_PCV2SF,
18019 V2DF_FTYPE_V2DF_PCDOUBLE,
18021 VOID_FTYPE_PV2SF_V4SF,
18022 VOID_FTYPE_PV2DI_V2DI,
18023 VOID_FTYPE_PCHAR_V16QI,
18024 VOID_FTYPE_PFLOAT_V4SF,
18025 VOID_FTYPE_PDOUBLE_V2DF,
18027 VOID_FTYPE_PINT_INT
18030 /* Builtin types */
18031 enum ix86_builtin_type
18034 FLOAT128_FTYPE_FLOAT128,
18036 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18054 V4SF_FTYPE_V4SF_VEC_MERGE,
18062 V2DF_FTYPE_V2DF_VEC_MERGE,
18072 V16QI_FTYPE_V16QI_V16QI,
18073 V16QI_FTYPE_V8HI_V8HI,
18074 V8QI_FTYPE_V8QI_V8QI,
18075 V8QI_FTYPE_V4HI_V4HI,
18076 V8HI_FTYPE_V8HI_V8HI,
18077 V8HI_FTYPE_V8HI_V8HI_COUNT,
18078 V8HI_FTYPE_V16QI_V16QI,
18079 V8HI_FTYPE_V4SI_V4SI,
18080 V8HI_FTYPE_V8HI_SI_COUNT,
18081 V4SI_FTYPE_V4SI_V4SI,
18082 V4SI_FTYPE_V4SI_V4SI_COUNT,
18083 V4SI_FTYPE_V8HI_V8HI,
18084 V4SI_FTYPE_V4SF_V4SF,
18085 V4SI_FTYPE_V2DF_V2DF,
18086 V4SI_FTYPE_V4SI_SI_COUNT,
18087 V4HI_FTYPE_V4HI_V4HI,
18088 V4HI_FTYPE_V4HI_V4HI_COUNT,
18089 V4HI_FTYPE_V8QI_V8QI,
18090 V4HI_FTYPE_V2SI_V2SI,
18091 V4HI_FTYPE_V4HI_SI_COUNT,
18092 V4SF_FTYPE_V4SF_V4SF,
18093 V4SF_FTYPE_V4SF_V4SF_SWAP,
18094 V4SF_FTYPE_V4SF_V2SI,
18095 V4SF_FTYPE_V4SF_V2DF,
18096 V4SF_FTYPE_V4SF_DI,
18097 V4SF_FTYPE_V4SF_SI,
18098 V2DI_FTYPE_V2DI_V2DI,
18099 V2DI_FTYPE_V2DI_V2DI_COUNT,
18100 V2DI_FTYPE_V16QI_V16QI,
18101 V2DI_FTYPE_V4SI_V4SI,
18102 V2DI_FTYPE_V2DI_V16QI,
18103 V2DI_FTYPE_V2DF_V2DF,
18104 V2DI_FTYPE_V2DI_SI_COUNT,
18105 V2SI_FTYPE_V2SI_V2SI,
18106 V2SI_FTYPE_V2SI_V2SI_COUNT,
18107 V2SI_FTYPE_V4HI_V4HI,
18108 V2SI_FTYPE_V2SF_V2SF,
18109 V2SI_FTYPE_V2SI_SI_COUNT,
18110 V2DF_FTYPE_V2DF_V2DF,
18111 V2DF_FTYPE_V2DF_V2DF_SWAP,
18112 V2DF_FTYPE_V2DF_V4SF,
18113 V2DF_FTYPE_V2DF_DI,
18114 V2DF_FTYPE_V2DF_SI,
18115 V2SF_FTYPE_V2SF_V2SF,
18116 V1DI_FTYPE_V1DI_V1DI,
18117 V1DI_FTYPE_V1DI_V1DI_COUNT,
18118 V1DI_FTYPE_V8QI_V8QI,
18119 V1DI_FTYPE_V2SI_V2SI,
18120 V1DI_FTYPE_V1DI_SI_COUNT,
18121 UINT64_FTYPE_UINT64_UINT64,
18122 UINT_FTYPE_UINT_UINT,
18123 UINT_FTYPE_UINT_USHORT,
18124 UINT_FTYPE_UINT_UCHAR,
18125 V8HI_FTYPE_V8HI_INT,
18126 V4SI_FTYPE_V4SI_INT,
18127 V4HI_FTYPE_V4HI_INT,
18128 V4SF_FTYPE_V4SF_INT,
18129 V2DI_FTYPE_V2DI_INT,
18130 V2DI2TI_FTYPE_V2DI_INT,
18131 V2DF_FTYPE_V2DF_INT,
18132 V16QI_FTYPE_V16QI_V16QI_V16QI,
18133 V4SF_FTYPE_V4SF_V4SF_V4SF,
18134 V2DF_FTYPE_V2DF_V2DF_V2DF,
18135 V16QI_FTYPE_V16QI_V16QI_INT,
18136 V8HI_FTYPE_V8HI_V8HI_INT,
18137 V4SI_FTYPE_V4SI_V4SI_INT,
18138 V4SF_FTYPE_V4SF_V4SF_INT,
18139 V2DI_FTYPE_V2DI_V2DI_INT,
18140 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18141 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18142 V2DF_FTYPE_V2DF_V2DF_INT,
18143 V2DI_FTYPE_V2DI_UINT_UINT,
18144 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18147 /* Special builtins with variable number of arguments. */
18148 static const struct builtin_description bdesc_special_args[] =
18151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18154 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18159 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18166 /* SSE or 3DNow!A */
18167 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18168 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18185 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18188 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18191 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18192 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18195 /* Builtins with variable number of arguments. */
18196 static const struct builtin_description bdesc_args[] =
18199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18280 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18281 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18282 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18285 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18286 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18287 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18288 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18289 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18290 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18295 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18297 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18308 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18309 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18310 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18340 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18341 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18345 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18347 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18348 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18360 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18366 /* SSE MMX or 3Dnow!A */
18367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18372 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18376 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18377 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18379 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18400 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18401 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18407 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18408 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18409 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18410 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18438 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18442 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18444 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18445 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18451 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18453 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18454 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18455 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18456 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18457 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18458 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18459 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18460 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18471 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18472 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18474 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18476 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18477 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18489 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18490 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18491 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18507 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18516 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18521 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18522 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18523 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18526 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18531 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18532 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18534 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18539 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18549 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18556 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18558 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18559 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18560 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18638 /* SSE4.1 and SSE5 */
18639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18645 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18646 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18647 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18648 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18649 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18652 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18653 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18654 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18655 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18658 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18659 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18661 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18663 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18670 { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18671 { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18675 enum multi_arg_type {
18685 MULTI_ARG_3_PERMPS,
18686 MULTI_ARG_3_PERMPD,
18693 MULTI_ARG_2_DI_IMM,
18694 MULTI_ARG_2_SI_IMM,
18695 MULTI_ARG_2_HI_IMM,
18696 MULTI_ARG_2_QI_IMM,
18697 MULTI_ARG_2_SF_CMP,
18698 MULTI_ARG_2_DF_CMP,
18699 MULTI_ARG_2_DI_CMP,
18700 MULTI_ARG_2_SI_CMP,
18701 MULTI_ARG_2_HI_CMP,
18702 MULTI_ARG_2_QI_CMP,
18725 static const struct builtin_description bdesc_multi_arg[] =
18727 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18728 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18729 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18730 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18731 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18732 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18733 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18734 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18735 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18736 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18737 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18738 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18739 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18740 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18741 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18742 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18743 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18744 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18745 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18746 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18747 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18748 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18749 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18750 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18751 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18752 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18753 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18754 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18755 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18756 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18757 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18758 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18759 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18760 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18761 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18762 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18763 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18764 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18765 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18766 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18767 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18768 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18769 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18770 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18771 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18772 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18773 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18774 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18775 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18776 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18777 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18778 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18779 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18780 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18781 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18963 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18964 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18967 ix86_init_mmx_sse_builtins (void)
18969 const struct builtin_description * d;
18972 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18973 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18974 tree V1DI_type_node
18975 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
18976 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18977 tree V2DI_type_node
18978 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18979 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18980 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18981 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18982 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18983 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18984 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18986 tree pchar_type_node = build_pointer_type (char_type_node);
18987 tree pcchar_type_node
18988 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
18989 tree pfloat_type_node = build_pointer_type (float_type_node);
18990 tree pcfloat_type_node
18991 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
18992 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
18993 tree pcv2sf_type_node
18994 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
18995 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18996 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18999 tree int_ftype_v4sf_v4sf
19000 = build_function_type_list (integer_type_node,
19001 V4SF_type_node, V4SF_type_node, NULL_TREE);
19002 tree v4si_ftype_v4sf_v4sf
19003 = build_function_type_list (V4SI_type_node,
19004 V4SF_type_node, V4SF_type_node, NULL_TREE);
19005 /* MMX/SSE/integer conversions. */
19006 tree int_ftype_v4sf
19007 = build_function_type_list (integer_type_node,
19008 V4SF_type_node, NULL_TREE);
19009 tree int64_ftype_v4sf
19010 = build_function_type_list (long_long_integer_type_node,
19011 V4SF_type_node, NULL_TREE);
19012 tree int_ftype_v8qi
19013 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19014 tree v4sf_ftype_v4sf_int
19015 = build_function_type_list (V4SF_type_node,
19016 V4SF_type_node, integer_type_node, NULL_TREE);
19017 tree v4sf_ftype_v4sf_int64
19018 = build_function_type_list (V4SF_type_node,
19019 V4SF_type_node, long_long_integer_type_node,
19021 tree v4sf_ftype_v4sf_v2si
19022 = build_function_type_list (V4SF_type_node,
19023 V4SF_type_node, V2SI_type_node, NULL_TREE);
19025 /* Miscellaneous. */
19026 tree v8qi_ftype_v4hi_v4hi
19027 = build_function_type_list (V8QI_type_node,
19028 V4HI_type_node, V4HI_type_node, NULL_TREE);
19029 tree v4hi_ftype_v2si_v2si
19030 = build_function_type_list (V4HI_type_node,
19031 V2SI_type_node, V2SI_type_node, NULL_TREE);
19032 tree v4sf_ftype_v4sf_v4sf_int
19033 = build_function_type_list (V4SF_type_node,
19034 V4SF_type_node, V4SF_type_node,
19035 integer_type_node, NULL_TREE);
19036 tree v2si_ftype_v4hi_v4hi
19037 = build_function_type_list (V2SI_type_node,
19038 V4HI_type_node, V4HI_type_node, NULL_TREE);
19039 tree v4hi_ftype_v4hi_int
19040 = build_function_type_list (V4HI_type_node,
19041 V4HI_type_node, integer_type_node, NULL_TREE);
19042 tree v2si_ftype_v2si_int
19043 = build_function_type_list (V2SI_type_node,
19044 V2SI_type_node, integer_type_node, NULL_TREE);
19045 tree v1di_ftype_v1di_int
19046 = build_function_type_list (V1DI_type_node,
19047 V1DI_type_node, integer_type_node, NULL_TREE);
19049 tree void_ftype_void
19050 = build_function_type (void_type_node, void_list_node);
19051 tree void_ftype_unsigned
19052 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19053 tree void_ftype_unsigned_unsigned
19054 = build_function_type_list (void_type_node, unsigned_type_node,
19055 unsigned_type_node, NULL_TREE);
19056 tree void_ftype_pcvoid_unsigned_unsigned
19057 = build_function_type_list (void_type_node, const_ptr_type_node,
19058 unsigned_type_node, unsigned_type_node,
19060 tree unsigned_ftype_void
19061 = build_function_type (unsigned_type_node, void_list_node);
19062 tree v2si_ftype_v4sf
19063 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19064 /* Loads/stores. */
19065 tree void_ftype_v8qi_v8qi_pchar
19066 = build_function_type_list (void_type_node,
19067 V8QI_type_node, V8QI_type_node,
19068 pchar_type_node, NULL_TREE);
19069 tree v4sf_ftype_pcfloat
19070 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19071 tree v4sf_ftype_v4sf_pcv2sf
19072 = build_function_type_list (V4SF_type_node,
19073 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19074 tree void_ftype_pv2sf_v4sf
19075 = build_function_type_list (void_type_node,
19076 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19077 tree void_ftype_pfloat_v4sf
19078 = build_function_type_list (void_type_node,
19079 pfloat_type_node, V4SF_type_node, NULL_TREE);
19080 tree void_ftype_pdi_di
19081 = build_function_type_list (void_type_node,
19082 pdi_type_node, long_long_unsigned_type_node,
19084 tree void_ftype_pv2di_v2di
19085 = build_function_type_list (void_type_node,
19086 pv2di_type_node, V2DI_type_node, NULL_TREE);
19087 /* Normal vector unops. */
19088 tree v4sf_ftype_v4sf
19089 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19090 tree v16qi_ftype_v16qi
19091 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19092 tree v8hi_ftype_v8hi
19093 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19094 tree v4si_ftype_v4si
19095 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19096 tree v8qi_ftype_v8qi
19097 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19098 tree v4hi_ftype_v4hi
19099 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19101 /* Normal vector binops. */
19102 tree v4sf_ftype_v4sf_v4sf
19103 = build_function_type_list (V4SF_type_node,
19104 V4SF_type_node, V4SF_type_node, NULL_TREE);
19105 tree v8qi_ftype_v8qi_v8qi
19106 = build_function_type_list (V8QI_type_node,
19107 V8QI_type_node, V8QI_type_node, NULL_TREE);
19108 tree v4hi_ftype_v4hi_v4hi
19109 = build_function_type_list (V4HI_type_node,
19110 V4HI_type_node, V4HI_type_node, NULL_TREE);
19111 tree v2si_ftype_v2si_v2si
19112 = build_function_type_list (V2SI_type_node,
19113 V2SI_type_node, V2SI_type_node, NULL_TREE);
19114 tree v1di_ftype_v1di_v1di
19115 = build_function_type_list (V1DI_type_node,
19116 V1DI_type_node, V1DI_type_node, NULL_TREE);
19117 tree v1di_ftype_v1di_v1di_int
19118 = build_function_type_list (V1DI_type_node,
19119 V1DI_type_node, V1DI_type_node,
19120 integer_type_node, NULL_TREE);
19121 tree v2si_ftype_v2sf
19122 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19123 tree v2sf_ftype_v2si
19124 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19125 tree v2si_ftype_v2si
19126 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19127 tree v2sf_ftype_v2sf
19128 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19129 tree v2sf_ftype_v2sf_v2sf
19130 = build_function_type_list (V2SF_type_node,
19131 V2SF_type_node, V2SF_type_node, NULL_TREE);
19132 tree v2si_ftype_v2sf_v2sf
19133 = build_function_type_list (V2SI_type_node,
19134 V2SF_type_node, V2SF_type_node, NULL_TREE);
19135 tree pint_type_node = build_pointer_type (integer_type_node);
19136 tree pdouble_type_node = build_pointer_type (double_type_node);
19137 tree pcdouble_type_node = build_pointer_type (
19138 build_type_variant (double_type_node, 1, 0));
19139 tree int_ftype_v2df_v2df
19140 = build_function_type_list (integer_type_node,
19141 V2DF_type_node, V2DF_type_node, NULL_TREE);
19143 tree void_ftype_pcvoid
19144 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19145 tree v4sf_ftype_v4si
19146 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19147 tree v4si_ftype_v4sf
19148 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19149 tree v2df_ftype_v4si
19150 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19151 tree v4si_ftype_v2df
19152 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19153 tree v4si_ftype_v2df_v2df
19154 = build_function_type_list (V4SI_type_node,
19155 V2DF_type_node, V2DF_type_node, NULL_TREE);
19156 tree v2si_ftype_v2df
19157 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19158 tree v4sf_ftype_v2df
19159 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19160 tree v2df_ftype_v2si
19161 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19162 tree v2df_ftype_v4sf
19163 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19164 tree int_ftype_v2df
19165 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19166 tree int64_ftype_v2df
19167 = build_function_type_list (long_long_integer_type_node,
19168 V2DF_type_node, NULL_TREE);
19169 tree v2df_ftype_v2df_int
19170 = build_function_type_list (V2DF_type_node,
19171 V2DF_type_node, integer_type_node, NULL_TREE);
19172 tree v2df_ftype_v2df_int64
19173 = build_function_type_list (V2DF_type_node,
19174 V2DF_type_node, long_long_integer_type_node,
19176 tree v4sf_ftype_v4sf_v2df
19177 = build_function_type_list (V4SF_type_node,
19178 V4SF_type_node, V2DF_type_node, NULL_TREE);
19179 tree v2df_ftype_v2df_v4sf
19180 = build_function_type_list (V2DF_type_node,
19181 V2DF_type_node, V4SF_type_node, NULL_TREE);
19182 tree v2df_ftype_v2df_v2df_int
19183 = build_function_type_list (V2DF_type_node,
19184 V2DF_type_node, V2DF_type_node,
19187 tree v2df_ftype_v2df_pcdouble
19188 = build_function_type_list (V2DF_type_node,
19189 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19190 tree void_ftype_pdouble_v2df
19191 = build_function_type_list (void_type_node,
19192 pdouble_type_node, V2DF_type_node, NULL_TREE);
19193 tree void_ftype_pint_int
19194 = build_function_type_list (void_type_node,
19195 pint_type_node, integer_type_node, NULL_TREE);
19196 tree void_ftype_v16qi_v16qi_pchar
19197 = build_function_type_list (void_type_node,
19198 V16QI_type_node, V16QI_type_node,
19199 pchar_type_node, NULL_TREE);
19200 tree v2df_ftype_pcdouble
19201 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19202 tree v2df_ftype_v2df_v2df
19203 = build_function_type_list (V2DF_type_node,
19204 V2DF_type_node, V2DF_type_node, NULL_TREE);
19205 tree v16qi_ftype_v16qi_v16qi
19206 = build_function_type_list (V16QI_type_node,
19207 V16QI_type_node, V16QI_type_node, NULL_TREE);
19208 tree v8hi_ftype_v8hi_v8hi
19209 = build_function_type_list (V8HI_type_node,
19210 V8HI_type_node, V8HI_type_node, NULL_TREE);
19211 tree v4si_ftype_v4si_v4si
19212 = build_function_type_list (V4SI_type_node,
19213 V4SI_type_node, V4SI_type_node, NULL_TREE);
19214 tree v2di_ftype_v2di_v2di
19215 = build_function_type_list (V2DI_type_node,
19216 V2DI_type_node, V2DI_type_node, NULL_TREE);
19217 tree v2di_ftype_v2df_v2df
19218 = build_function_type_list (V2DI_type_node,
19219 V2DF_type_node, V2DF_type_node, NULL_TREE);
19220 tree v2df_ftype_v2df
19221 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19222 tree v2di_ftype_v2di_int
19223 = build_function_type_list (V2DI_type_node,
19224 V2DI_type_node, integer_type_node, NULL_TREE);
19225 tree v2di_ftype_v2di_v2di_int
19226 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19227 V2DI_type_node, integer_type_node, NULL_TREE);
19228 tree v4si_ftype_v4si_int
19229 = build_function_type_list (V4SI_type_node,
19230 V4SI_type_node, integer_type_node, NULL_TREE);
19231 tree v8hi_ftype_v8hi_int
19232 = build_function_type_list (V8HI_type_node,
19233 V8HI_type_node, integer_type_node, NULL_TREE);
19234 tree v4si_ftype_v8hi_v8hi
19235 = build_function_type_list (V4SI_type_node,
19236 V8HI_type_node, V8HI_type_node, NULL_TREE);
19237 tree v1di_ftype_v8qi_v8qi
19238 = build_function_type_list (V1DI_type_node,
19239 V8QI_type_node, V8QI_type_node, NULL_TREE);
19240 tree v1di_ftype_v2si_v2si
19241 = build_function_type_list (V1DI_type_node,
19242 V2SI_type_node, V2SI_type_node, NULL_TREE);
19243 tree v2di_ftype_v16qi_v16qi
19244 = build_function_type_list (V2DI_type_node,
19245 V16QI_type_node, V16QI_type_node, NULL_TREE);
19246 tree v2di_ftype_v4si_v4si
19247 = build_function_type_list (V2DI_type_node,
19248 V4SI_type_node, V4SI_type_node, NULL_TREE);
19249 tree int_ftype_v16qi
19250 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19251 tree v16qi_ftype_pcchar
19252 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19253 tree void_ftype_pchar_v16qi
19254 = build_function_type_list (void_type_node,
19255 pchar_type_node, V16QI_type_node, NULL_TREE);
19257 tree v2di_ftype_v2di_unsigned_unsigned
19258 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19259 unsigned_type_node, unsigned_type_node,
19261 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19262 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19263 unsigned_type_node, unsigned_type_node,
19265 tree v2di_ftype_v2di_v16qi
19266 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19268 tree v2df_ftype_v2df_v2df_v2df
19269 = build_function_type_list (V2DF_type_node,
19270 V2DF_type_node, V2DF_type_node,
19271 V2DF_type_node, NULL_TREE);
19272 tree v4sf_ftype_v4sf_v4sf_v4sf
19273 = build_function_type_list (V4SF_type_node,
19274 V4SF_type_node, V4SF_type_node,
19275 V4SF_type_node, NULL_TREE);
19276 tree v8hi_ftype_v16qi
19277 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19279 tree v4si_ftype_v16qi
19280 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19282 tree v2di_ftype_v16qi
19283 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19285 tree v4si_ftype_v8hi
19286 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19288 tree v2di_ftype_v8hi
19289 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19291 tree v2di_ftype_v4si
19292 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19294 tree v2di_ftype_pv2di
19295 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19297 tree v16qi_ftype_v16qi_v16qi_int
19298 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19299 V16QI_type_node, integer_type_node,
19301 tree v16qi_ftype_v16qi_v16qi_v16qi
19302 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19303 V16QI_type_node, V16QI_type_node,
19305 tree v8hi_ftype_v8hi_v8hi_int
19306 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19307 V8HI_type_node, integer_type_node,
19309 tree v4si_ftype_v4si_v4si_int
19310 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19311 V4SI_type_node, integer_type_node,
19313 tree int_ftype_v2di_v2di
19314 = build_function_type_list (integer_type_node,
19315 V2DI_type_node, V2DI_type_node,
19317 tree int_ftype_v16qi_int_v16qi_int_int
19318 = build_function_type_list (integer_type_node,
19325 tree v16qi_ftype_v16qi_int_v16qi_int_int
19326 = build_function_type_list (V16QI_type_node,
19333 tree int_ftype_v16qi_v16qi_int
19334 = build_function_type_list (integer_type_node,
19340 /* SSE5 instructions */
19341 tree v2di_ftype_v2di_v2di_v2di
19342 = build_function_type_list (V2DI_type_node,
19348 tree v4si_ftype_v4si_v4si_v4si
19349 = build_function_type_list (V4SI_type_node,
19355 tree v4si_ftype_v4si_v4si_v2di
19356 = build_function_type_list (V4SI_type_node,
19362 tree v8hi_ftype_v8hi_v8hi_v8hi
19363 = build_function_type_list (V8HI_type_node,
19369 tree v8hi_ftype_v8hi_v8hi_v4si
19370 = build_function_type_list (V8HI_type_node,
19376 tree v2df_ftype_v2df_v2df_v16qi
19377 = build_function_type_list (V2DF_type_node,
19383 tree v4sf_ftype_v4sf_v4sf_v16qi
19384 = build_function_type_list (V4SF_type_node,
19390 tree v2di_ftype_v2di_si
19391 = build_function_type_list (V2DI_type_node,
19396 tree v4si_ftype_v4si_si
19397 = build_function_type_list (V4SI_type_node,
19402 tree v8hi_ftype_v8hi_si
19403 = build_function_type_list (V8HI_type_node,
19408 tree v16qi_ftype_v16qi_si
19409 = build_function_type_list (V16QI_type_node,
19413 tree v4sf_ftype_v4hi
19414 = build_function_type_list (V4SF_type_node,
19418 tree v4hi_ftype_v4sf
19419 = build_function_type_list (V4HI_type_node,
19423 tree v2di_ftype_v2di
19424 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19426 tree v16qi_ftype_v8hi_v8hi
19427 = build_function_type_list (V16QI_type_node,
19428 V8HI_type_node, V8HI_type_node,
19430 tree v8hi_ftype_v4si_v4si
19431 = build_function_type_list (V8HI_type_node,
19432 V4SI_type_node, V4SI_type_node,
19434 tree v8hi_ftype_v16qi_v16qi
19435 = build_function_type_list (V8HI_type_node,
19436 V16QI_type_node, V16QI_type_node,
19438 tree v4hi_ftype_v8qi_v8qi
19439 = build_function_type_list (V4HI_type_node,
19440 V8QI_type_node, V8QI_type_node,
19442 tree unsigned_ftype_unsigned_uchar
19443 = build_function_type_list (unsigned_type_node,
19444 unsigned_type_node,
19445 unsigned_char_type_node,
19447 tree unsigned_ftype_unsigned_ushort
19448 = build_function_type_list (unsigned_type_node,
19449 unsigned_type_node,
19450 short_unsigned_type_node,
19452 tree unsigned_ftype_unsigned_unsigned
19453 = build_function_type_list (unsigned_type_node,
19454 unsigned_type_node,
19455 unsigned_type_node,
19457 tree uint64_ftype_uint64_uint64
19458 = build_function_type_list (long_long_unsigned_type_node,
19459 long_long_unsigned_type_node,
19460 long_long_unsigned_type_node,
19462 tree float_ftype_float
19463 = build_function_type_list (float_type_node,
19469 /* The __float80 type. */
19470 if (TYPE_MODE (long_double_type_node) == XFmode)
19471 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19475 /* The __float80 type. */
19476 tree float80_type_node = make_node (REAL_TYPE);
19478 TYPE_PRECISION (float80_type_node) = 80;
19479 layout_type (float80_type_node);
19480 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19486 tree float128_type_node = make_node (REAL_TYPE);
19488 TYPE_PRECISION (float128_type_node) = 128;
19489 layout_type (float128_type_node);
19490 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19493 /* TFmode support builtins. */
19494 ftype = build_function_type (float128_type_node,
19496 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19498 ftype = build_function_type_list (float128_type_node,
19499 float128_type_node,
19501 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19503 ftype = build_function_type_list (float128_type_node,
19504 float128_type_node,
19505 float128_type_node,
19507 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19510 /* Add all special builtins with variable number of operands. */
19511 for (i = 0, d = bdesc_special_args;
19512 i < ARRAY_SIZE (bdesc_special_args);
19520 switch ((enum ix86_special_builtin_type) d->flag)
19522 case VOID_FTYPE_VOID:
19523 type = void_ftype_void;
19525 case V16QI_FTYPE_PCCHAR:
19526 type = v16qi_ftype_pcchar;
19528 case V4SF_FTYPE_PCFLOAT:
19529 type = v4sf_ftype_pcfloat;
19531 case V2DI_FTYPE_PV2DI:
19532 type = v2di_ftype_pv2di;
19534 case V2DF_FTYPE_PCDOUBLE:
19535 type = v2df_ftype_pcdouble;
19537 case V4SF_FTYPE_V4SF_PCV2SF:
19538 type = v4sf_ftype_v4sf_pcv2sf;
19540 case V2DF_FTYPE_V2DF_PCDOUBLE:
19541 type = v2df_ftype_v2df_pcdouble;
19543 case VOID_FTYPE_PV2SF_V4SF:
19544 type = void_ftype_pv2sf_v4sf;
19546 case VOID_FTYPE_PV2DI_V2DI:
19547 type = void_ftype_pv2di_v2di;
19549 case VOID_FTYPE_PCHAR_V16QI:
19550 type = void_ftype_pchar_v16qi;
19552 case VOID_FTYPE_PFLOAT_V4SF:
19553 type = void_ftype_pfloat_v4sf;
19555 case VOID_FTYPE_PDOUBLE_V2DF:
19556 type = void_ftype_pdouble_v2df;
19558 case VOID_FTYPE_PDI_DI:
19559 type = void_ftype_pdi_di;
19561 case VOID_FTYPE_PINT_INT:
19562 type = void_ftype_pint_int;
19565 gcc_unreachable ();
19568 def_builtin (d->mask, d->name, type, d->code);
19571 /* Add all builtins with variable number of operands. */
19572 for (i = 0, d = bdesc_args;
19573 i < ARRAY_SIZE (bdesc_args);
19581 switch ((enum ix86_builtin_type) d->flag)
19583 case FLOAT_FTYPE_FLOAT:
19584 type = float_ftype_float;
19586 case INT64_FTYPE_V4SF:
19587 type = int64_ftype_v4sf;
19589 case INT64_FTYPE_V2DF:
19590 type = int64_ftype_v2df;
19592 case INT_FTYPE_V16QI:
19593 type = int_ftype_v16qi;
19595 case INT_FTYPE_V8QI:
19596 type = int_ftype_v8qi;
19598 case INT_FTYPE_V4SF:
19599 type = int_ftype_v4sf;
19601 case INT_FTYPE_V2DF:
19602 type = int_ftype_v2df;
19604 case V16QI_FTYPE_V16QI:
19605 type = v16qi_ftype_v16qi;
19607 case V8HI_FTYPE_V8HI:
19608 type = v8hi_ftype_v8hi;
19610 case V8HI_FTYPE_V16QI:
19611 type = v8hi_ftype_v16qi;
19613 case V8QI_FTYPE_V8QI:
19614 type = v8qi_ftype_v8qi;
19616 case V4SI_FTYPE_V4SI:
19617 type = v4si_ftype_v4si;
19619 case V4SI_FTYPE_V16QI:
19620 type = v4si_ftype_v16qi;
19622 case V4SI_FTYPE_V8HI:
19623 type = v4si_ftype_v8hi;
19625 case V4SI_FTYPE_V4SF:
19626 type = v4si_ftype_v4sf;
19628 case V4SI_FTYPE_V2DF:
19629 type = v4si_ftype_v2df;
19631 case V4HI_FTYPE_V4HI:
19632 type = v4hi_ftype_v4hi;
19634 case V4SF_FTYPE_V4SF:
19635 case V4SF_FTYPE_V4SF_VEC_MERGE:
19636 type = v4sf_ftype_v4sf;
19638 case V4SF_FTYPE_V4SI:
19639 type = v4sf_ftype_v4si;
19641 case V4SF_FTYPE_V2DF:
19642 type = v4sf_ftype_v2df;
19644 case V2DI_FTYPE_V2DI:
19645 type = v2di_ftype_v2di;
19647 case V2DI_FTYPE_V16QI:
19648 type = v2di_ftype_v16qi;
19650 case V2DI_FTYPE_V8HI:
19651 type = v2di_ftype_v8hi;
19653 case V2DI_FTYPE_V4SI:
19654 type = v2di_ftype_v4si;
19656 case V2SI_FTYPE_V2SI:
19657 type = v2si_ftype_v2si;
19659 case V2SI_FTYPE_V4SF:
19660 type = v2si_ftype_v4sf;
19662 case V2SI_FTYPE_V2DF:
19663 type = v2si_ftype_v2df;
19665 case V2SI_FTYPE_V2SF:
19666 type = v2si_ftype_v2sf;
19668 case V2DF_FTYPE_V4SF:
19669 type = v2df_ftype_v4sf;
19671 case V2DF_FTYPE_V2DF:
19672 case V2DF_FTYPE_V2DF_VEC_MERGE:
19673 type = v2df_ftype_v2df;
19675 case V2DF_FTYPE_V2SI:
19676 type = v2df_ftype_v2si;
19678 case V2DF_FTYPE_V4SI:
19679 type = v2df_ftype_v4si;
19681 case V2SF_FTYPE_V2SF:
19682 type = v2sf_ftype_v2sf;
19684 case V2SF_FTYPE_V2SI:
19685 type = v2sf_ftype_v2si;
19687 case V16QI_FTYPE_V16QI_V16QI:
19688 type = v16qi_ftype_v16qi_v16qi;
19690 case V16QI_FTYPE_V8HI_V8HI:
19691 type = v16qi_ftype_v8hi_v8hi;
19693 case V8QI_FTYPE_V8QI_V8QI:
19694 type = v8qi_ftype_v8qi_v8qi;
19696 case V8QI_FTYPE_V4HI_V4HI:
19697 type = v8qi_ftype_v4hi_v4hi;
19699 case V8HI_FTYPE_V8HI_V8HI:
19700 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19701 type = v8hi_ftype_v8hi_v8hi;
19703 case V8HI_FTYPE_V16QI_V16QI:
19704 type = v8hi_ftype_v16qi_v16qi;
19706 case V8HI_FTYPE_V4SI_V4SI:
19707 type = v8hi_ftype_v4si_v4si;
19709 case V8HI_FTYPE_V8HI_SI_COUNT:
19710 type = v8hi_ftype_v8hi_int;
19712 case V4SI_FTYPE_V4SI_V4SI:
19713 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19714 type = v4si_ftype_v4si_v4si;
19716 case V4SI_FTYPE_V8HI_V8HI:
19717 type = v4si_ftype_v8hi_v8hi;
19719 case V4SI_FTYPE_V4SF_V4SF:
19720 type = v4si_ftype_v4sf_v4sf;
19722 case V4SI_FTYPE_V2DF_V2DF:
19723 type = v4si_ftype_v2df_v2df;
19725 case V4SI_FTYPE_V4SI_SI_COUNT:
19726 type = v4si_ftype_v4si_int;
19728 case V4HI_FTYPE_V4HI_V4HI:
19729 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19730 type = v4hi_ftype_v4hi_v4hi;
19732 case V4HI_FTYPE_V8QI_V8QI:
19733 type = v4hi_ftype_v8qi_v8qi;
19735 case V4HI_FTYPE_V2SI_V2SI:
19736 type = v4hi_ftype_v2si_v2si;
19738 case V4HI_FTYPE_V4HI_SI_COUNT:
19739 type = v4hi_ftype_v4hi_int;
19741 case V4SF_FTYPE_V4SF_V4SF:
19742 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19743 type = v4sf_ftype_v4sf_v4sf;
19745 case V4SF_FTYPE_V4SF_V2SI:
19746 type = v4sf_ftype_v4sf_v2si;
19748 case V4SF_FTYPE_V4SF_V2DF:
19749 type = v4sf_ftype_v4sf_v2df;
19751 case V4SF_FTYPE_V4SF_DI:
19752 type = v4sf_ftype_v4sf_int64;
19754 case V4SF_FTYPE_V4SF_SI:
19755 type = v4sf_ftype_v4sf_int;
19757 case V2DI_FTYPE_V2DI_V2DI:
19758 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19759 type = v2di_ftype_v2di_v2di;
19761 case V2DI_FTYPE_V16QI_V16QI:
19762 type = v2di_ftype_v16qi_v16qi;
19764 case V2DI_FTYPE_V4SI_V4SI:
19765 type = v2di_ftype_v4si_v4si;
19767 case V2DI_FTYPE_V2DI_V16QI:
19768 type = v2di_ftype_v2di_v16qi;
19770 case V2DI_FTYPE_V2DF_V2DF:
19771 type = v2di_ftype_v2df_v2df;
19773 case V2DI_FTYPE_V2DI_SI_COUNT:
19774 type = v2di_ftype_v2di_int;
19776 case V2SI_FTYPE_V2SI_V2SI:
19777 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19778 type = v2si_ftype_v2si_v2si;
19780 case V2SI_FTYPE_V4HI_V4HI:
19781 type = v2si_ftype_v4hi_v4hi;
19783 case V2SI_FTYPE_V2SF_V2SF:
19784 type = v2si_ftype_v2sf_v2sf;
19786 case V2SI_FTYPE_V2SI_SI_COUNT:
19787 type = v2si_ftype_v2si_int;
19789 case V2DF_FTYPE_V2DF_V2DF:
19790 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19791 type = v2df_ftype_v2df_v2df;
19793 case V2DF_FTYPE_V2DF_V4SF:
19794 type = v2df_ftype_v2df_v4sf;
19796 case V2DF_FTYPE_V2DF_DI:
19797 type = v2df_ftype_v2df_int64;
19799 case V2DF_FTYPE_V2DF_SI:
19800 type = v2df_ftype_v2df_int;
19802 case V2SF_FTYPE_V2SF_V2SF:
19803 type = v2sf_ftype_v2sf_v2sf;
19805 case V1DI_FTYPE_V1DI_V1DI:
19806 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19807 type = v1di_ftype_v1di_v1di;
19809 case V1DI_FTYPE_V8QI_V8QI:
19810 type = v1di_ftype_v8qi_v8qi;
19812 case V1DI_FTYPE_V2SI_V2SI:
19813 type = v1di_ftype_v2si_v2si;
19815 case V1DI_FTYPE_V1DI_SI_COUNT:
19816 type = v1di_ftype_v1di_int;
19818 case UINT64_FTYPE_UINT64_UINT64:
19819 type = uint64_ftype_uint64_uint64;
19821 case UINT_FTYPE_UINT_UINT:
19822 type = unsigned_ftype_unsigned_unsigned;
19824 case UINT_FTYPE_UINT_USHORT:
19825 type = unsigned_ftype_unsigned_ushort;
19827 case UINT_FTYPE_UINT_UCHAR:
19828 type = unsigned_ftype_unsigned_uchar;
19830 case V8HI_FTYPE_V8HI_INT:
19831 type = v8hi_ftype_v8hi_int;
19833 case V4SI_FTYPE_V4SI_INT:
19834 type = v4si_ftype_v4si_int;
19836 case V4HI_FTYPE_V4HI_INT:
19837 type = v4hi_ftype_v4hi_int;
19839 case V4SF_FTYPE_V4SF_INT:
19840 type = v4sf_ftype_v4sf_int;
19842 case V2DI_FTYPE_V2DI_INT:
19843 case V2DI2TI_FTYPE_V2DI_INT:
19844 type = v2di_ftype_v2di_int;
19846 case V2DF_FTYPE_V2DF_INT:
19847 type = v2df_ftype_v2df_int;
19849 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19850 type = v16qi_ftype_v16qi_v16qi_v16qi;
19852 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19853 type = v4sf_ftype_v4sf_v4sf_v4sf;
19855 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19856 type = v2df_ftype_v2df_v2df_v2df;
19858 case V16QI_FTYPE_V16QI_V16QI_INT:
19859 type = v16qi_ftype_v16qi_v16qi_int;
19861 case V8HI_FTYPE_V8HI_V8HI_INT:
19862 type = v8hi_ftype_v8hi_v8hi_int;
19864 case V4SI_FTYPE_V4SI_V4SI_INT:
19865 type = v4si_ftype_v4si_v4si_int;
19867 case V4SF_FTYPE_V4SF_V4SF_INT:
19868 type = v4sf_ftype_v4sf_v4sf_int;
19870 case V2DI_FTYPE_V2DI_V2DI_INT:
19871 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
19872 type = v2di_ftype_v2di_v2di_int;
19874 case V2DF_FTYPE_V2DF_V2DF_INT:
19875 type = v2df_ftype_v2df_v2df_int;
19877 case V2DI_FTYPE_V2DI_UINT_UINT:
19878 type = v2di_ftype_v2di_unsigned_unsigned;
19880 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
19881 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
19883 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
19884 type = v1di_ftype_v1di_v1di_int;
19887 gcc_unreachable ();
19890 def_builtin_const (d->mask, d->name, type, d->code);
19893 /* pcmpestr[im] insns. */
19894 for (i = 0, d = bdesc_pcmpestr;
19895 i < ARRAY_SIZE (bdesc_pcmpestr);
19898 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19899 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19901 ftype = int_ftype_v16qi_int_v16qi_int_int;
19902 def_builtin_const (d->mask, d->name, ftype, d->code);
19905 /* pcmpistr[im] insns. */
19906 for (i = 0, d = bdesc_pcmpistr;
19907 i < ARRAY_SIZE (bdesc_pcmpistr);
19910 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19911 ftype = v16qi_ftype_v16qi_v16qi_int;
19913 ftype = int_ftype_v16qi_v16qi_int;
19914 def_builtin_const (d->mask, d->name, ftype, d->code);
19917 /* comi/ucomi insns. */
19918 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19919 if (d->mask == OPTION_MASK_ISA_SSE2)
19920 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19922 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19925 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19926 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19929 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19930 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19932 /* SSE or 3DNow!A */
19933 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19936 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19938 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19939 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19942 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19943 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19948 /* Define AES built-in functions only if AES is enabled. */
19949 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
19950 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
19951 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
19952 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
19953 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
19954 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
19960 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
19961 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
19964 /* Access to the vec_init patterns. */
19965 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19966 integer_type_node, NULL_TREE);
19967 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19969 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19970 short_integer_type_node,
19971 short_integer_type_node,
19972 short_integer_type_node, NULL_TREE);
19973 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19975 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19976 char_type_node, char_type_node,
19977 char_type_node, char_type_node,
19978 char_type_node, char_type_node,
19979 char_type_node, NULL_TREE);
19980 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19982 /* Access to the vec_extract patterns. */
19983 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19984 integer_type_node, NULL_TREE);
19985 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19987 ftype = build_function_type_list (long_long_integer_type_node,
19988 V2DI_type_node, integer_type_node,
19990 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19992 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19993 integer_type_node, NULL_TREE);
19994 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19996 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19997 integer_type_node, NULL_TREE);
19998 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20000 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20001 integer_type_node, NULL_TREE);
20002 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20004 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20005 integer_type_node, NULL_TREE);
20006 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20008 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20009 integer_type_node, NULL_TREE);
20010 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20012 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20013 integer_type_node, NULL_TREE);
20014 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20016 /* Access to the vec_set patterns. */
20017 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20019 integer_type_node, NULL_TREE);
20020 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20022 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20024 integer_type_node, NULL_TREE);
20025 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20027 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20029 integer_type_node, NULL_TREE);
20030 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20032 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20034 integer_type_node, NULL_TREE);
20035 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20037 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20039 integer_type_node, NULL_TREE);
20040 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20042 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20044 integer_type_node, NULL_TREE);
20045 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20047 /* Add SSE5 multi-arg argument instructions */
20048 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20050 tree mtype = NULL_TREE;
20055 switch ((enum multi_arg_type)d->flag)
20057 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20058 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20059 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20060 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20061 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20062 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20063 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20064 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20065 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20066 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20067 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20068 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20069 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20070 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20071 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20072 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20073 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20074 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20075 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20076 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20077 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20078 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20079 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20080 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20081 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20082 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20083 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20084 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20085 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20086 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20087 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20088 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20089 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20090 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20091 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20092 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20093 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20094 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20095 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20096 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20097 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20098 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20099 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20100 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20101 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20102 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20103 case MULTI_ARG_UNKNOWN:
20105 gcc_unreachable ();
20109 def_builtin_const (d->mask, d->name, mtype, d->code);
20114 ix86_init_builtins (void)
20117 ix86_init_mmx_sse_builtins ();
20120 /* Errors in the source file can cause expand_expr to return const0_rtx
20121 where we expect a vector. To avoid crashing, use one of the vector
20122 clear instructions. */
20124 safe_vector_operand (rtx x, enum machine_mode mode)
20126 if (x == const0_rtx)
20127 x = CONST0_RTX (mode);
20131 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20134 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20137 tree arg0 = CALL_EXPR_ARG (exp, 0);
20138 tree arg1 = CALL_EXPR_ARG (exp, 1);
20139 rtx op0 = expand_normal (arg0);
20140 rtx op1 = expand_normal (arg1);
20141 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20142 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20143 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20145 if (VECTOR_MODE_P (mode0))
20146 op0 = safe_vector_operand (op0, mode0);
20147 if (VECTOR_MODE_P (mode1))
20148 op1 = safe_vector_operand (op1, mode1);
20150 if (optimize || !target
20151 || GET_MODE (target) != tmode
20152 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20153 target = gen_reg_rtx (tmode);
20155 if (GET_MODE (op1) == SImode && mode1 == TImode)
20157 rtx x = gen_reg_rtx (V4SImode);
20158 emit_insn (gen_sse2_loadd (x, op1));
20159 op1 = gen_lowpart (TImode, x);
20162 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20163 op0 = copy_to_mode_reg (mode0, op0);
20164 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20165 op1 = copy_to_mode_reg (mode1, op1);
20167 pat = GEN_FCN (icode) (target, op0, op1);
20176 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20179 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20180 enum multi_arg_type m_type,
20181 enum insn_code sub_code)
20186 bool comparison_p = false;
20188 bool last_arg_constant = false;
20189 int num_memory = 0;
20192 enum machine_mode mode;
20195 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20199 case MULTI_ARG_3_SF:
20200 case MULTI_ARG_3_DF:
20201 case MULTI_ARG_3_DI:
20202 case MULTI_ARG_3_SI:
20203 case MULTI_ARG_3_SI_DI:
20204 case MULTI_ARG_3_HI:
20205 case MULTI_ARG_3_HI_SI:
20206 case MULTI_ARG_3_QI:
20207 case MULTI_ARG_3_PERMPS:
20208 case MULTI_ARG_3_PERMPD:
20212 case MULTI_ARG_2_SF:
20213 case MULTI_ARG_2_DF:
20214 case MULTI_ARG_2_DI:
20215 case MULTI_ARG_2_SI:
20216 case MULTI_ARG_2_HI:
20217 case MULTI_ARG_2_QI:
20221 case MULTI_ARG_2_DI_IMM:
20222 case MULTI_ARG_2_SI_IMM:
20223 case MULTI_ARG_2_HI_IMM:
20224 case MULTI_ARG_2_QI_IMM:
20226 last_arg_constant = true;
20229 case MULTI_ARG_1_SF:
20230 case MULTI_ARG_1_DF:
20231 case MULTI_ARG_1_DI:
20232 case MULTI_ARG_1_SI:
20233 case MULTI_ARG_1_HI:
20234 case MULTI_ARG_1_QI:
20235 case MULTI_ARG_1_SI_DI:
20236 case MULTI_ARG_1_HI_DI:
20237 case MULTI_ARG_1_HI_SI:
20238 case MULTI_ARG_1_QI_DI:
20239 case MULTI_ARG_1_QI_SI:
20240 case MULTI_ARG_1_QI_HI:
20241 case MULTI_ARG_1_PH2PS:
20242 case MULTI_ARG_1_PS2PH:
20246 case MULTI_ARG_2_SF_CMP:
20247 case MULTI_ARG_2_DF_CMP:
20248 case MULTI_ARG_2_DI_CMP:
20249 case MULTI_ARG_2_SI_CMP:
20250 case MULTI_ARG_2_HI_CMP:
20251 case MULTI_ARG_2_QI_CMP:
20253 comparison_p = true;
20256 case MULTI_ARG_2_SF_TF:
20257 case MULTI_ARG_2_DF_TF:
20258 case MULTI_ARG_2_DI_TF:
20259 case MULTI_ARG_2_SI_TF:
20260 case MULTI_ARG_2_HI_TF:
20261 case MULTI_ARG_2_QI_TF:
20266 case MULTI_ARG_UNKNOWN:
20268 gcc_unreachable ();
20271 if (optimize || !target
20272 || GET_MODE (target) != tmode
20273 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20274 target = gen_reg_rtx (tmode);
20276 gcc_assert (nargs <= 4);
20278 for (i = 0; i < nargs; i++)
20280 tree arg = CALL_EXPR_ARG (exp, i);
20281 rtx op = expand_normal (arg);
20282 int adjust = (comparison_p) ? 1 : 0;
20283 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20285 if (last_arg_constant && i == nargs-1)
20287 if (GET_CODE (op) != CONST_INT)
20289 error ("last argument must be an immediate");
20290 return gen_reg_rtx (tmode);
20295 if (VECTOR_MODE_P (mode))
20296 op = safe_vector_operand (op, mode);
20298 /* If we aren't optimizing, only allow one memory operand to be
20300 if (memory_operand (op, mode))
20303 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20306 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20308 op = force_reg (mode, op);
20312 args[i].mode = mode;
20318 pat = GEN_FCN (icode) (target, args[0].op);
20323 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20324 GEN_INT ((int)sub_code));
20325 else if (! comparison_p)
20326 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20329 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20333 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20338 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20342 gcc_unreachable ();
20352 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20353 insns with vec_merge. */
20356 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20360 tree arg0 = CALL_EXPR_ARG (exp, 0);
20361 rtx op1, op0 = expand_normal (arg0);
20362 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20363 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20365 if (optimize || !target
20366 || GET_MODE (target) != tmode
20367 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20368 target = gen_reg_rtx (tmode);
20370 if (VECTOR_MODE_P (mode0))
20371 op0 = safe_vector_operand (op0, mode0);
20373 if ((optimize && !register_operand (op0, mode0))
20374 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20375 op0 = copy_to_mode_reg (mode0, op0);
20378 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20379 op1 = copy_to_mode_reg (mode0, op1);
20381 pat = GEN_FCN (icode) (target, op0, op1);
20388 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20391 ix86_expand_sse_compare (const struct builtin_description *d,
20392 tree exp, rtx target, bool swap)
20395 tree arg0 = CALL_EXPR_ARG (exp, 0);
20396 tree arg1 = CALL_EXPR_ARG (exp, 1);
20397 rtx op0 = expand_normal (arg0);
20398 rtx op1 = expand_normal (arg1);
20400 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20401 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20402 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20403 enum rtx_code comparison = d->comparison;
20405 if (VECTOR_MODE_P (mode0))
20406 op0 = safe_vector_operand (op0, mode0);
20407 if (VECTOR_MODE_P (mode1))
20408 op1 = safe_vector_operand (op1, mode1);
20410 /* Swap operands if we have a comparison that isn't available in
20414 rtx tmp = gen_reg_rtx (mode1);
20415 emit_move_insn (tmp, op1);
20420 if (optimize || !target
20421 || GET_MODE (target) != tmode
20422 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20423 target = gen_reg_rtx (tmode);
20425 if ((optimize && !register_operand (op0, mode0))
20426 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20427 op0 = copy_to_mode_reg (mode0, op0);
20428 if ((optimize && !register_operand (op1, mode1))
20429 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20430 op1 = copy_to_mode_reg (mode1, op1);
20432 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20433 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20440 /* Subroutine of ix86_expand_builtin to take care of insns with
20441 variable number of operands. */
20444 ix86_expand_args_builtin (const struct builtin_description *d,
20445 tree exp, rtx target)
20447 rtx pat, real_target;
20448 unsigned int i, nargs;
20449 unsigned int nargs_constant = 0;
20450 int num_memory = 0;
20454 enum machine_mode mode;
20456 bool last_arg_count = false;
20457 enum insn_code icode = d->icode;
20458 const struct insn_data *insn_p = &insn_data[icode];
20459 enum machine_mode tmode = insn_p->operand[0].mode;
20460 enum machine_mode rmode = VOIDmode;
20462 enum rtx_code comparison = d->comparison;
20464 switch ((enum ix86_builtin_type) d->flag)
20466 case FLOAT128_FTYPE_FLOAT128:
20467 case FLOAT_FTYPE_FLOAT:
20468 case INT64_FTYPE_V4SF:
20469 case INT64_FTYPE_V2DF:
20470 case INT_FTYPE_V16QI:
20471 case INT_FTYPE_V8QI:
20472 case INT_FTYPE_V4SF:
20473 case INT_FTYPE_V2DF:
20474 case V16QI_FTYPE_V16QI:
20475 case V8HI_FTYPE_V8HI:
20476 case V8HI_FTYPE_V16QI:
20477 case V8QI_FTYPE_V8QI:
20478 case V4SI_FTYPE_V4SI:
20479 case V4SI_FTYPE_V16QI:
20480 case V4SI_FTYPE_V4SF:
20481 case V4SI_FTYPE_V8HI:
20482 case V4SI_FTYPE_V2DF:
20483 case V4HI_FTYPE_V4HI:
20484 case V4SF_FTYPE_V4SF:
20485 case V4SF_FTYPE_V4SI:
20486 case V4SF_FTYPE_V2DF:
20487 case V2DI_FTYPE_V2DI:
20488 case V2DI_FTYPE_V16QI:
20489 case V2DI_FTYPE_V8HI:
20490 case V2DI_FTYPE_V4SI:
20491 case V2DF_FTYPE_V2DF:
20492 case V2DF_FTYPE_V4SI:
20493 case V2DF_FTYPE_V4SF:
20494 case V2DF_FTYPE_V2SI:
20495 case V2SI_FTYPE_V2SI:
20496 case V2SI_FTYPE_V4SF:
20497 case V2SI_FTYPE_V2SF:
20498 case V2SI_FTYPE_V2DF:
20499 case V2SF_FTYPE_V2SF:
20500 case V2SF_FTYPE_V2SI:
20503 case V4SF_FTYPE_V4SF_VEC_MERGE:
20504 case V2DF_FTYPE_V2DF_VEC_MERGE:
20505 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20506 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20507 case V16QI_FTYPE_V16QI_V16QI:
20508 case V16QI_FTYPE_V8HI_V8HI:
20509 case V8QI_FTYPE_V8QI_V8QI:
20510 case V8QI_FTYPE_V4HI_V4HI:
20511 case V8HI_FTYPE_V8HI_V8HI:
20512 case V8HI_FTYPE_V16QI_V16QI:
20513 case V8HI_FTYPE_V4SI_V4SI:
20514 case V4SI_FTYPE_V4SI_V4SI:
20515 case V4SI_FTYPE_V8HI_V8HI:
20516 case V4SI_FTYPE_V4SF_V4SF:
20517 case V4SI_FTYPE_V2DF_V2DF:
20518 case V4HI_FTYPE_V4HI_V4HI:
20519 case V4HI_FTYPE_V8QI_V8QI:
20520 case V4HI_FTYPE_V2SI_V2SI:
20521 case V4SF_FTYPE_V4SF_V4SF:
20522 case V4SF_FTYPE_V4SF_V2SI:
20523 case V4SF_FTYPE_V4SF_V2DF:
20524 case V4SF_FTYPE_V4SF_DI:
20525 case V4SF_FTYPE_V4SF_SI:
20526 case V2DI_FTYPE_V2DI_V2DI:
20527 case V2DI_FTYPE_V16QI_V16QI:
20528 case V2DI_FTYPE_V4SI_V4SI:
20529 case V2DI_FTYPE_V2DI_V16QI:
20530 case V2DI_FTYPE_V2DF_V2DF:
20531 case V2SI_FTYPE_V2SI_V2SI:
20532 case V2SI_FTYPE_V4HI_V4HI:
20533 case V2SI_FTYPE_V2SF_V2SF:
20534 case V2DF_FTYPE_V2DF_V2DF:
20535 case V2DF_FTYPE_V2DF_V4SF:
20536 case V2DF_FTYPE_V2DF_DI:
20537 case V2DF_FTYPE_V2DF_SI:
20538 case V2SF_FTYPE_V2SF_V2SF:
20539 case V1DI_FTYPE_V1DI_V1DI:
20540 case V1DI_FTYPE_V8QI_V8QI:
20541 case V1DI_FTYPE_V2SI_V2SI:
20542 if (comparison == UNKNOWN)
20543 return ix86_expand_binop_builtin (icode, exp, target);
20546 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20547 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20548 gcc_assert (comparison != UNKNOWN);
20552 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20553 case V8HI_FTYPE_V8HI_SI_COUNT:
20554 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20555 case V4SI_FTYPE_V4SI_SI_COUNT:
20556 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20557 case V4HI_FTYPE_V4HI_SI_COUNT:
20558 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20559 case V2DI_FTYPE_V2DI_SI_COUNT:
20560 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20561 case V2SI_FTYPE_V2SI_SI_COUNT:
20562 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20563 case V1DI_FTYPE_V1DI_SI_COUNT:
20565 last_arg_count = true;
20567 case UINT64_FTYPE_UINT64_UINT64:
20568 case UINT_FTYPE_UINT_UINT:
20569 case UINT_FTYPE_UINT_USHORT:
20570 case UINT_FTYPE_UINT_UCHAR:
20573 case V2DI2TI_FTYPE_V2DI_INT:
20576 nargs_constant = 1;
20578 case V8HI_FTYPE_V8HI_INT:
20579 case V4SI_FTYPE_V4SI_INT:
20580 case V4HI_FTYPE_V4HI_INT:
20581 case V4SF_FTYPE_V4SF_INT:
20582 case V2DI_FTYPE_V2DI_INT:
20583 case V2DF_FTYPE_V2DF_INT:
20585 nargs_constant = 1;
20587 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20588 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20589 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20592 case V16QI_FTYPE_V16QI_V16QI_INT:
20593 case V8HI_FTYPE_V8HI_V8HI_INT:
20594 case V4SI_FTYPE_V4SI_V4SI_INT:
20595 case V4SF_FTYPE_V4SF_V4SF_INT:
20596 case V2DI_FTYPE_V2DI_V2DI_INT:
20597 case V2DF_FTYPE_V2DF_V2DF_INT:
20599 nargs_constant = 1;
20601 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20604 nargs_constant = 1;
20606 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20609 nargs_constant = 1;
20611 case V2DI_FTYPE_V2DI_UINT_UINT:
20613 nargs_constant = 2;
20615 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20617 nargs_constant = 2;
20620 gcc_unreachable ();
20623 gcc_assert (nargs <= ARRAY_SIZE (args));
20625 if (comparison != UNKNOWN)
20627 gcc_assert (nargs == 2);
20628 return ix86_expand_sse_compare (d, exp, target, swap);
20631 if (rmode == VOIDmode || rmode == tmode)
20635 || GET_MODE (target) != tmode
20636 || ! (*insn_p->operand[0].predicate) (target, tmode))
20637 target = gen_reg_rtx (tmode);
20638 real_target = target;
20642 target = gen_reg_rtx (rmode);
20643 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
20646 for (i = 0; i < nargs; i++)
20648 tree arg = CALL_EXPR_ARG (exp, i);
20649 rtx op = expand_normal (arg);
20650 enum machine_mode mode = insn_p->operand[i + 1].mode;
20651 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
20653 if (last_arg_count && (i + 1) == nargs)
20655 /* SIMD shift insns take either an 8-bit immediate or
20656 register as count. But builtin functions take int as
20657 count. If count doesn't match, we put it in register. */
20660 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
20661 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
20662 op = copy_to_reg (op);
20665 else if ((nargs - i) <= nargs_constant)
20670 case CODE_FOR_sse4_1_roundpd:
20671 case CODE_FOR_sse4_1_roundps:
20672 case CODE_FOR_sse4_1_roundsd:
20673 case CODE_FOR_sse4_1_roundss:
20674 case CODE_FOR_sse4_1_blendps:
20675 error ("the last argument must be a 4-bit immediate");
20678 case CODE_FOR_sse4_1_blendpd:
20679 error ("the last argument must be a 2-bit immediate");
20683 switch (nargs_constant)
20686 if ((nargs - i) == nargs_constant)
20688 error ("the next to last argument must be an 8-bit immediate");
20692 error ("the last argument must be an 8-bit immediate");
20695 gcc_unreachable ();
20702 if (VECTOR_MODE_P (mode))
20703 op = safe_vector_operand (op, mode);
20705 /* If we aren't optimizing, only allow one memory operand to
20707 if (memory_operand (op, mode))
20710 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
20712 if (optimize || !match || num_memory > 1)
20713 op = copy_to_mode_reg (mode, op);
20717 op = copy_to_reg (op);
20718 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
20723 args[i].mode = mode;
20729 pat = GEN_FCN (icode) (real_target, args[0].op);
20732 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
20735 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
20739 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
20740 args[2].op, args[3].op);
20743 gcc_unreachable ();
20753 /* Subroutine of ix86_expand_builtin to take care of special insns
20754 with variable number of operands. */
20757 ix86_expand_special_args_builtin (const struct builtin_description *d,
20758 tree exp, rtx target)
20762 unsigned int i, nargs, arg_adjust, memory;
20766 enum machine_mode mode;
20768 enum insn_code icode = d->icode;
20769 bool last_arg_constant = false;
20770 const struct insn_data *insn_p = &insn_data[icode];
20771 enum machine_mode tmode = insn_p->operand[0].mode;
20772 enum { load, store } class;
20774 switch ((enum ix86_special_builtin_type) d->flag)
20776 case VOID_FTYPE_VOID:
20777 emit_insn (GEN_FCN (icode) (target));
20779 case V2DI_FTYPE_PV2DI:
20780 case V16QI_FTYPE_PCCHAR:
20781 case V4SF_FTYPE_PCFLOAT:
20782 case V2DF_FTYPE_PCDOUBLE:
20787 case VOID_FTYPE_PV2SF_V4SF:
20788 case VOID_FTYPE_PV2DI_V2DI:
20789 case VOID_FTYPE_PCHAR_V16QI:
20790 case VOID_FTYPE_PFLOAT_V4SF:
20791 case VOID_FTYPE_PDOUBLE_V2DF:
20792 case VOID_FTYPE_PDI_DI:
20793 case VOID_FTYPE_PINT_INT:
20796 /* Reserve memory operand for target. */
20797 memory = ARRAY_SIZE (args);
20799 case V4SF_FTYPE_V4SF_PCV2SF:
20800 case V2DF_FTYPE_V2DF_PCDOUBLE:
20806 gcc_unreachable ();
20809 gcc_assert (nargs <= ARRAY_SIZE (args));
20811 if (class == store)
20813 arg = CALL_EXPR_ARG (exp, 0);
20814 op = expand_normal (arg);
20815 gcc_assert (target == 0);
20816 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
20824 || GET_MODE (target) != tmode
20825 || ! (*insn_p->operand[0].predicate) (target, tmode))
20826 target = gen_reg_rtx (tmode);
20829 for (i = 0; i < nargs; i++)
20831 enum machine_mode mode = insn_p->operand[i + 1].mode;
20834 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
20835 op = expand_normal (arg);
20836 match = (*insn_p->operand[i + 1].predicate) (op, mode);
20838 if (last_arg_constant && (i + 1) == nargs)
20844 error ("the last argument must be an 8-bit immediate");
20852 /* This must be the memory operand. */
20853 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
20854 gcc_assert (GET_MODE (op) == mode
20855 || GET_MODE (op) == VOIDmode);
20859 /* This must be register. */
20860 if (VECTOR_MODE_P (mode))
20861 op = safe_vector_operand (op, mode);
20863 gcc_assert (GET_MODE (op) == mode
20864 || GET_MODE (op) == VOIDmode);
20865 op = copy_to_mode_reg (mode, op);
20870 args[i].mode = mode;
20876 pat = GEN_FCN (icode) (target, args[0].op);
20879 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20882 gcc_unreachable ();
20888 return class == store ? 0 : target;
20891 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20894 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20898 tree arg0 = CALL_EXPR_ARG (exp, 0);
20899 tree arg1 = CALL_EXPR_ARG (exp, 1);
20900 rtx op0 = expand_normal (arg0);
20901 rtx op1 = expand_normal (arg1);
20902 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20903 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20904 enum rtx_code comparison = d->comparison;
20906 if (VECTOR_MODE_P (mode0))
20907 op0 = safe_vector_operand (op0, mode0);
20908 if (VECTOR_MODE_P (mode1))
20909 op1 = safe_vector_operand (op1, mode1);
20911 /* Swap operands if we have a comparison that isn't available in
20913 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20920 target = gen_reg_rtx (SImode);
20921 emit_move_insn (target, const0_rtx);
20922 target = gen_rtx_SUBREG (QImode, target, 0);
20924 if ((optimize && !register_operand (op0, mode0))
20925 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20926 op0 = copy_to_mode_reg (mode0, op0);
20927 if ((optimize && !register_operand (op1, mode1))
20928 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20929 op1 = copy_to_mode_reg (mode1, op1);
20931 pat = GEN_FCN (d->icode) (op0, op1);
20935 emit_insn (gen_rtx_SET (VOIDmode,
20936 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20937 gen_rtx_fmt_ee (comparison, QImode,
20941 return SUBREG_REG (target);
20944 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20947 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20951 tree arg0 = CALL_EXPR_ARG (exp, 0);
20952 tree arg1 = CALL_EXPR_ARG (exp, 1);
20953 rtx op0 = expand_normal (arg0);
20954 rtx op1 = expand_normal (arg1);
20955 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20956 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20957 enum rtx_code comparison = d->comparison;
20959 if (VECTOR_MODE_P (mode0))
20960 op0 = safe_vector_operand (op0, mode0);
20961 if (VECTOR_MODE_P (mode1))
20962 op1 = safe_vector_operand (op1, mode1);
20964 target = gen_reg_rtx (SImode);
20965 emit_move_insn (target, const0_rtx);
20966 target = gen_rtx_SUBREG (QImode, target, 0);
20968 if ((optimize && !register_operand (op0, mode0))
20969 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20970 op0 = copy_to_mode_reg (mode0, op0);
20971 if ((optimize && !register_operand (op1, mode1))
20972 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20973 op1 = copy_to_mode_reg (mode1, op1);
20975 pat = GEN_FCN (d->icode) (op0, op1);
20979 emit_insn (gen_rtx_SET (VOIDmode,
20980 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20981 gen_rtx_fmt_ee (comparison, QImode,
20985 return SUBREG_REG (target);
20988 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20991 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20992 tree exp, rtx target)
20995 tree arg0 = CALL_EXPR_ARG (exp, 0);
20996 tree arg1 = CALL_EXPR_ARG (exp, 1);
20997 tree arg2 = CALL_EXPR_ARG (exp, 2);
20998 tree arg3 = CALL_EXPR_ARG (exp, 3);
20999 tree arg4 = CALL_EXPR_ARG (exp, 4);
21000 rtx scratch0, scratch1;
21001 rtx op0 = expand_normal (arg0);
21002 rtx op1 = expand_normal (arg1);
21003 rtx op2 = expand_normal (arg2);
21004 rtx op3 = expand_normal (arg3);
21005 rtx op4 = expand_normal (arg4);
21006 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
21008 tmode0 = insn_data[d->icode].operand[0].mode;
21009 tmode1 = insn_data[d->icode].operand[1].mode;
21010 modev2 = insn_data[d->icode].operand[2].mode;
21011 modei3 = insn_data[d->icode].operand[3].mode;
21012 modev4 = insn_data[d->icode].operand[4].mode;
21013 modei5 = insn_data[d->icode].operand[5].mode;
21014 modeimm = insn_data[d->icode].operand[6].mode;
21016 if (VECTOR_MODE_P (modev2))
21017 op0 = safe_vector_operand (op0, modev2);
21018 if (VECTOR_MODE_P (modev4))
21019 op2 = safe_vector_operand (op2, modev4);
21021 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21022 op0 = copy_to_mode_reg (modev2, op0);
21023 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
21024 op1 = copy_to_mode_reg (modei3, op1);
21025 if ((optimize && !register_operand (op2, modev4))
21026 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
21027 op2 = copy_to_mode_reg (modev4, op2);
21028 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
21029 op3 = copy_to_mode_reg (modei5, op3);
21031 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
21033 error ("the fifth argument must be a 8-bit immediate");
21037 if (d->code == IX86_BUILTIN_PCMPESTRI128)
21039 if (optimize || !target
21040 || GET_MODE (target) != tmode0
21041 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21042 target = gen_reg_rtx (tmode0);
21044 scratch1 = gen_reg_rtx (tmode1);
21046 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
21048 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
21050 if (optimize || !target
21051 || GET_MODE (target) != tmode1
21052 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21053 target = gen_reg_rtx (tmode1);
21055 scratch0 = gen_reg_rtx (tmode0);
21057 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
21061 gcc_assert (d->flag);
21063 scratch0 = gen_reg_rtx (tmode0);
21064 scratch1 = gen_reg_rtx (tmode1);
21066 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
21076 target = gen_reg_rtx (SImode);
21077 emit_move_insn (target, const0_rtx);
21078 target = gen_rtx_SUBREG (QImode, target, 0);
21081 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21082 gen_rtx_fmt_ee (EQ, QImode,
21083 gen_rtx_REG ((enum machine_mode) d->flag,
21086 return SUBREG_REG (target);
21093 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
21096 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
21097 tree exp, rtx target)
21100 tree arg0 = CALL_EXPR_ARG (exp, 0);
21101 tree arg1 = CALL_EXPR_ARG (exp, 1);
21102 tree arg2 = CALL_EXPR_ARG (exp, 2);
21103 rtx scratch0, scratch1;
21104 rtx op0 = expand_normal (arg0);
21105 rtx op1 = expand_normal (arg1);
21106 rtx op2 = expand_normal (arg2);
21107 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
21109 tmode0 = insn_data[d->icode].operand[0].mode;
21110 tmode1 = insn_data[d->icode].operand[1].mode;
21111 modev2 = insn_data[d->icode].operand[2].mode;
21112 modev3 = insn_data[d->icode].operand[3].mode;
21113 modeimm = insn_data[d->icode].operand[4].mode;
21115 if (VECTOR_MODE_P (modev2))
21116 op0 = safe_vector_operand (op0, modev2);
21117 if (VECTOR_MODE_P (modev3))
21118 op1 = safe_vector_operand (op1, modev3);
21120 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21121 op0 = copy_to_mode_reg (modev2, op0);
21122 if ((optimize && !register_operand (op1, modev3))
21123 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
21124 op1 = copy_to_mode_reg (modev3, op1);
21126 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
21128 error ("the third argument must be a 8-bit immediate");
21132 if (d->code == IX86_BUILTIN_PCMPISTRI128)
21134 if (optimize || !target
21135 || GET_MODE (target) != tmode0
21136 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21137 target = gen_reg_rtx (tmode0);
21139 scratch1 = gen_reg_rtx (tmode1);
21141 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
21143 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
21145 if (optimize || !target
21146 || GET_MODE (target) != tmode1
21147 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21148 target = gen_reg_rtx (tmode1);
21150 scratch0 = gen_reg_rtx (tmode0);
21152 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
21156 gcc_assert (d->flag);
21158 scratch0 = gen_reg_rtx (tmode0);
21159 scratch1 = gen_reg_rtx (tmode1);
21161 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
21171 target = gen_reg_rtx (SImode);
21172 emit_move_insn (target, const0_rtx);
21173 target = gen_rtx_SUBREG (QImode, target, 0);
21176 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21177 gen_rtx_fmt_ee (EQ, QImode,
21178 gen_rtx_REG ((enum machine_mode) d->flag,
21181 return SUBREG_REG (target);
21187 /* Return the integer constant in ARG. Constrain it to be in the range
21188 of the subparts of VEC_TYPE; issue an error if not. */
21191 get_element_number (tree vec_type, tree arg)
21193 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21195 if (!host_integerp (arg, 1)
21196 || (elt = tree_low_cst (arg, 1), elt > max))
21198 error ("selector must be an integer constant in the range 0..%wi", max);
21205 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21206 ix86_expand_vector_init. We DO have language-level syntax for this, in
21207 the form of (type){ init-list }. Except that since we can't place emms
21208 instructions from inside the compiler, we can't allow the use of MMX
21209 registers unless the user explicitly asks for it. So we do *not* define
21210 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21211 we have builtins invoked by mmintrin.h that gives us license to emit
21212 these sorts of instructions. */
21215 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21217 enum machine_mode tmode = TYPE_MODE (type);
21218 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21219 int i, n_elt = GET_MODE_NUNITS (tmode);
21220 rtvec v = rtvec_alloc (n_elt);
21222 gcc_assert (VECTOR_MODE_P (tmode));
21223 gcc_assert (call_expr_nargs (exp) == n_elt);
21225 for (i = 0; i < n_elt; ++i)
21227 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21228 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21231 if (!target || !register_operand (target, tmode))
21232 target = gen_reg_rtx (tmode);
21234 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21238 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21239 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21240 had a language-level syntax for referencing vector elements. */
21243 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21245 enum machine_mode tmode, mode0;
21250 arg0 = CALL_EXPR_ARG (exp, 0);
21251 arg1 = CALL_EXPR_ARG (exp, 1);
21253 op0 = expand_normal (arg0);
21254 elt = get_element_number (TREE_TYPE (arg0), arg1);
21256 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21257 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21258 gcc_assert (VECTOR_MODE_P (mode0));
21260 op0 = force_reg (mode0, op0);
21262 if (optimize || !target || !register_operand (target, tmode))
21263 target = gen_reg_rtx (tmode);
21265 ix86_expand_vector_extract (true, target, op0, elt);
21270 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21271 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21272 a language-level syntax for referencing vector elements. */
21275 ix86_expand_vec_set_builtin (tree exp)
21277 enum machine_mode tmode, mode1;
21278 tree arg0, arg1, arg2;
21280 rtx op0, op1, target;
21282 arg0 = CALL_EXPR_ARG (exp, 0);
21283 arg1 = CALL_EXPR_ARG (exp, 1);
21284 arg2 = CALL_EXPR_ARG (exp, 2);
21286 tmode = TYPE_MODE (TREE_TYPE (arg0));
21287 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21288 gcc_assert (VECTOR_MODE_P (tmode));
21290 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21291 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21292 elt = get_element_number (TREE_TYPE (arg0), arg2);
21294 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21295 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21297 op0 = force_reg (tmode, op0);
21298 op1 = force_reg (mode1, op1);
21300 /* OP0 is the source of these builtin functions and shouldn't be
21301 modified. Create a copy, use it and return it as target. */
21302 target = gen_reg_rtx (tmode);
21303 emit_move_insn (target, op0);
21304 ix86_expand_vector_set (true, target, op1, elt);
21309 /* Expand an expression EXP that calls a built-in function,
21310 with result going to TARGET if that's convenient
21311 (and in mode MODE if that's convenient).
21312 SUBTARGET may be used as the target for computing one of EXP's operands.
21313 IGNORE is nonzero if the value is to be ignored. */
21316 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21317 enum machine_mode mode ATTRIBUTE_UNUSED,
21318 int ignore ATTRIBUTE_UNUSED)
21320 const struct builtin_description *d;
21322 enum insn_code icode;
21323 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21324 tree arg0, arg1, arg2;
21325 rtx op0, op1, op2, pat;
21326 enum machine_mode mode0, mode1, mode2;
21327 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21331 case IX86_BUILTIN_MASKMOVQ:
21332 case IX86_BUILTIN_MASKMOVDQU:
21333 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21334 ? CODE_FOR_mmx_maskmovq
21335 : CODE_FOR_sse2_maskmovdqu);
21336 /* Note the arg order is different from the operand order. */
21337 arg1 = CALL_EXPR_ARG (exp, 0);
21338 arg2 = CALL_EXPR_ARG (exp, 1);
21339 arg0 = CALL_EXPR_ARG (exp, 2);
21340 op0 = expand_normal (arg0);
21341 op1 = expand_normal (arg1);
21342 op2 = expand_normal (arg2);
21343 mode0 = insn_data[icode].operand[0].mode;
21344 mode1 = insn_data[icode].operand[1].mode;
21345 mode2 = insn_data[icode].operand[2].mode;
21347 op0 = force_reg (Pmode, op0);
21348 op0 = gen_rtx_MEM (mode1, op0);
21350 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21351 op0 = copy_to_mode_reg (mode0, op0);
21352 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21353 op1 = copy_to_mode_reg (mode1, op1);
21354 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21355 op2 = copy_to_mode_reg (mode2, op2);
21356 pat = GEN_FCN (icode) (op0, op1, op2);
21362 case IX86_BUILTIN_LDMXCSR:
21363 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21364 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21365 emit_move_insn (target, op0);
21366 emit_insn (gen_sse_ldmxcsr (target));
21369 case IX86_BUILTIN_STMXCSR:
21370 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21371 emit_insn (gen_sse_stmxcsr (target));
21372 return copy_to_mode_reg (SImode, target);
21374 case IX86_BUILTIN_CLFLUSH:
21375 arg0 = CALL_EXPR_ARG (exp, 0);
21376 op0 = expand_normal (arg0);
21377 icode = CODE_FOR_sse2_clflush;
21378 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21379 op0 = copy_to_mode_reg (Pmode, op0);
21381 emit_insn (gen_sse2_clflush (op0));
21384 case IX86_BUILTIN_MONITOR:
21385 arg0 = CALL_EXPR_ARG (exp, 0);
21386 arg1 = CALL_EXPR_ARG (exp, 1);
21387 arg2 = CALL_EXPR_ARG (exp, 2);
21388 op0 = expand_normal (arg0);
21389 op1 = expand_normal (arg1);
21390 op2 = expand_normal (arg2);
21392 op0 = copy_to_mode_reg (Pmode, op0);
21394 op1 = copy_to_mode_reg (SImode, op1);
21396 op2 = copy_to_mode_reg (SImode, op2);
21398 emit_insn (gen_sse3_monitor (op0, op1, op2));
21400 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21403 case IX86_BUILTIN_MWAIT:
21404 arg0 = CALL_EXPR_ARG (exp, 0);
21405 arg1 = CALL_EXPR_ARG (exp, 1);
21406 op0 = expand_normal (arg0);
21407 op1 = expand_normal (arg1);
21409 op0 = copy_to_mode_reg (SImode, op0);
21411 op1 = copy_to_mode_reg (SImode, op1);
21412 emit_insn (gen_sse3_mwait (op0, op1));
21415 case IX86_BUILTIN_VEC_INIT_V2SI:
21416 case IX86_BUILTIN_VEC_INIT_V4HI:
21417 case IX86_BUILTIN_VEC_INIT_V8QI:
21418 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21420 case IX86_BUILTIN_VEC_EXT_V2DF:
21421 case IX86_BUILTIN_VEC_EXT_V2DI:
21422 case IX86_BUILTIN_VEC_EXT_V4SF:
21423 case IX86_BUILTIN_VEC_EXT_V4SI:
21424 case IX86_BUILTIN_VEC_EXT_V8HI:
21425 case IX86_BUILTIN_VEC_EXT_V2SI:
21426 case IX86_BUILTIN_VEC_EXT_V4HI:
21427 case IX86_BUILTIN_VEC_EXT_V16QI:
21428 return ix86_expand_vec_ext_builtin (exp, target);
21430 case IX86_BUILTIN_VEC_SET_V2DI:
21431 case IX86_BUILTIN_VEC_SET_V4SF:
21432 case IX86_BUILTIN_VEC_SET_V4SI:
21433 case IX86_BUILTIN_VEC_SET_V8HI:
21434 case IX86_BUILTIN_VEC_SET_V4HI:
21435 case IX86_BUILTIN_VEC_SET_V16QI:
21436 return ix86_expand_vec_set_builtin (exp);
21438 case IX86_BUILTIN_INFQ:
21440 REAL_VALUE_TYPE inf;
21444 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21446 tmp = validize_mem (force_const_mem (mode, tmp));
21449 target = gen_reg_rtx (mode);
21451 emit_move_insn (target, tmp);
21459 for (i = 0, d = bdesc_special_args;
21460 i < ARRAY_SIZE (bdesc_special_args);
21462 if (d->code == fcode)
21463 return ix86_expand_special_args_builtin (d, exp, target);
21465 for (i = 0, d = bdesc_args;
21466 i < ARRAY_SIZE (bdesc_args);
21468 if (d->code == fcode)
21469 return ix86_expand_args_builtin (d, exp, target);
21471 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21472 if (d->code == fcode)
21473 return ix86_expand_sse_comi (d, exp, target);
21475 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21476 if (d->code == fcode)
21477 return ix86_expand_sse_ptest (d, exp, target);
21479 for (i = 0, d = bdesc_pcmpestr;
21480 i < ARRAY_SIZE (bdesc_pcmpestr);
21482 if (d->code == fcode)
21483 return ix86_expand_sse_pcmpestr (d, exp, target);
21485 for (i = 0, d = bdesc_pcmpistr;
21486 i < ARRAY_SIZE (bdesc_pcmpistr);
21488 if (d->code == fcode)
21489 return ix86_expand_sse_pcmpistr (d, exp, target);
21491 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21492 if (d->code == fcode)
21493 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21494 (enum multi_arg_type)d->flag,
21497 gcc_unreachable ();
21500 /* Returns a function decl for a vectorized version of the builtin function
21501 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21502 if it is not available. */
21505 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21508 enum machine_mode in_mode, out_mode;
21511 if (TREE_CODE (type_out) != VECTOR_TYPE
21512 || TREE_CODE (type_in) != VECTOR_TYPE)
21515 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21516 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21517 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21518 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21522 case BUILT_IN_SQRT:
21523 if (out_mode == DFmode && out_n == 2
21524 && in_mode == DFmode && in_n == 2)
21525 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21528 case BUILT_IN_SQRTF:
21529 if (out_mode == SFmode && out_n == 4
21530 && in_mode == SFmode && in_n == 4)
21531 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21534 case BUILT_IN_LRINT:
21535 if (out_mode == SImode && out_n == 4
21536 && in_mode == DFmode && in_n == 2)
21537 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21540 case BUILT_IN_LRINTF:
21541 if (out_mode == SImode && out_n == 4
21542 && in_mode == SFmode && in_n == 4)
21543 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21550 /* Dispatch to a handler for a vectorization library. */
21551 if (ix86_veclib_handler)
21552 return (*ix86_veclib_handler)(fn, type_out, type_in);
21557 /* Handler for an SVML-style interface to
21558 a library with vectorized intrinsics. */
21561 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21564 tree fntype, new_fndecl, args;
21567 enum machine_mode el_mode, in_mode;
21570 /* The SVML is suitable for unsafe math only. */
21571 if (!flag_unsafe_math_optimizations)
21574 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21575 n = TYPE_VECTOR_SUBPARTS (type_out);
21576 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21577 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21578 if (el_mode != in_mode
21586 case BUILT_IN_LOG10:
21588 case BUILT_IN_TANH:
21590 case BUILT_IN_ATAN:
21591 case BUILT_IN_ATAN2:
21592 case BUILT_IN_ATANH:
21593 case BUILT_IN_CBRT:
21594 case BUILT_IN_SINH:
21596 case BUILT_IN_ASINH:
21597 case BUILT_IN_ASIN:
21598 case BUILT_IN_COSH:
21600 case BUILT_IN_ACOSH:
21601 case BUILT_IN_ACOS:
21602 if (el_mode != DFmode || n != 2)
21606 case BUILT_IN_EXPF:
21607 case BUILT_IN_LOGF:
21608 case BUILT_IN_LOG10F:
21609 case BUILT_IN_POWF:
21610 case BUILT_IN_TANHF:
21611 case BUILT_IN_TANF:
21612 case BUILT_IN_ATANF:
21613 case BUILT_IN_ATAN2F:
21614 case BUILT_IN_ATANHF:
21615 case BUILT_IN_CBRTF:
21616 case BUILT_IN_SINHF:
21617 case BUILT_IN_SINF:
21618 case BUILT_IN_ASINHF:
21619 case BUILT_IN_ASINF:
21620 case BUILT_IN_COSHF:
21621 case BUILT_IN_COSF:
21622 case BUILT_IN_ACOSHF:
21623 case BUILT_IN_ACOSF:
21624 if (el_mode != SFmode || n != 4)
21632 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21634 if (fn == BUILT_IN_LOGF)
21635 strcpy (name, "vmlsLn4");
21636 else if (fn == BUILT_IN_LOG)
21637 strcpy (name, "vmldLn2");
21640 sprintf (name, "vmls%s", bname+10);
21641 name[strlen (name)-1] = '4';
21644 sprintf (name, "vmld%s2", bname+10);
21646 /* Convert to uppercase. */
21650 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21651 args = TREE_CHAIN (args))
21655 fntype = build_function_type_list (type_out, type_in, NULL);
21657 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21659 /* Build a function declaration for the vectorized function. */
21660 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21661 TREE_PUBLIC (new_fndecl) = 1;
21662 DECL_EXTERNAL (new_fndecl) = 1;
21663 DECL_IS_NOVOPS (new_fndecl) = 1;
21664 TREE_READONLY (new_fndecl) = 1;
21669 /* Handler for an ACML-style interface to
21670 a library with vectorized intrinsics. */
21673 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21675 char name[20] = "__vr.._";
21676 tree fntype, new_fndecl, args;
21679 enum machine_mode el_mode, in_mode;
21682 /* The ACML is 64bits only and suitable for unsafe math only as
21683 it does not correctly support parts of IEEE with the required
21684 precision such as denormals. */
21686 || !flag_unsafe_math_optimizations)
21689 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21690 n = TYPE_VECTOR_SUBPARTS (type_out);
21691 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21692 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21693 if (el_mode != in_mode
21703 case BUILT_IN_LOG2:
21704 case BUILT_IN_LOG10:
21707 if (el_mode != DFmode
21712 case BUILT_IN_SINF:
21713 case BUILT_IN_COSF:
21714 case BUILT_IN_EXPF:
21715 case BUILT_IN_POWF:
21716 case BUILT_IN_LOGF:
21717 case BUILT_IN_LOG2F:
21718 case BUILT_IN_LOG10F:
21721 if (el_mode != SFmode
21730 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21731 sprintf (name + 7, "%s", bname+10);
21734 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21735 args = TREE_CHAIN (args))
21739 fntype = build_function_type_list (type_out, type_in, NULL);
21741 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21743 /* Build a function declaration for the vectorized function. */
21744 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21745 TREE_PUBLIC (new_fndecl) = 1;
21746 DECL_EXTERNAL (new_fndecl) = 1;
21747 DECL_IS_NOVOPS (new_fndecl) = 1;
21748 TREE_READONLY (new_fndecl) = 1;
21754 /* Returns a decl of a function that implements conversion of the
21755 input vector of type TYPE, or NULL_TREE if it is not available. */
21758 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21760 if (TREE_CODE (type) != VECTOR_TYPE)
21766 switch (TYPE_MODE (type))
21769 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21774 case FIX_TRUNC_EXPR:
21775 switch (TYPE_MODE (type))
21778 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21788 /* Returns a code for a target-specific builtin that implements
21789 reciprocal of the function, or NULL_TREE if not available. */
21792 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21793 bool sqrt ATTRIBUTE_UNUSED)
21795 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21796 && flag_finite_math_only && !flag_trapping_math
21797 && flag_unsafe_math_optimizations))
21801 /* Machine dependent builtins. */
21804 /* Vectorized version of sqrt to rsqrt conversion. */
21805 case IX86_BUILTIN_SQRTPS_NR:
21806 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21812 /* Normal builtins. */
21815 /* Sqrt to rsqrt conversion. */
21816 case BUILT_IN_SQRTF:
21817 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21824 /* Store OPERAND to the memory after reload is completed. This means
21825 that we can't easily use assign_stack_local. */
21827 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21831 gcc_assert (reload_completed);
21832 if (TARGET_RED_ZONE)
21834 result = gen_rtx_MEM (mode,
21835 gen_rtx_PLUS (Pmode,
21837 GEN_INT (-RED_ZONE_SIZE)));
21838 emit_move_insn (result, operand);
21840 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21846 operand = gen_lowpart (DImode, operand);
21850 gen_rtx_SET (VOIDmode,
21851 gen_rtx_MEM (DImode,
21852 gen_rtx_PRE_DEC (DImode,
21853 stack_pointer_rtx)),
21857 gcc_unreachable ();
21859 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21868 split_di (&operand, 1, operands, operands + 1);
21870 gen_rtx_SET (VOIDmode,
21871 gen_rtx_MEM (SImode,
21872 gen_rtx_PRE_DEC (Pmode,
21873 stack_pointer_rtx)),
21876 gen_rtx_SET (VOIDmode,
21877 gen_rtx_MEM (SImode,
21878 gen_rtx_PRE_DEC (Pmode,
21879 stack_pointer_rtx)),
21884 /* Store HImodes as SImodes. */
21885 operand = gen_lowpart (SImode, operand);
21889 gen_rtx_SET (VOIDmode,
21890 gen_rtx_MEM (GET_MODE (operand),
21891 gen_rtx_PRE_DEC (SImode,
21892 stack_pointer_rtx)),
21896 gcc_unreachable ();
21898 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21903 /* Free operand from the memory. */
21905 ix86_free_from_memory (enum machine_mode mode)
21907 if (!TARGET_RED_ZONE)
21911 if (mode == DImode || TARGET_64BIT)
21915 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21916 to pop or add instruction if registers are available. */
21917 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21918 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21923 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21924 QImode must go into class Q_REGS.
21925 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21926 movdf to do mem-to-mem moves through integer regs. */
21928 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21930 enum machine_mode mode = GET_MODE (x);
21932 /* We're only allowed to return a subclass of CLASS. Many of the
21933 following checks fail for NO_REGS, so eliminate that early. */
21934 if (regclass == NO_REGS)
21937 /* All classes can load zeros. */
21938 if (x == CONST0_RTX (mode))
21941 /* Force constants into memory if we are loading a (nonzero) constant into
21942 an MMX or SSE register. This is because there are no MMX/SSE instructions
21943 to load from a constant. */
21945 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21948 /* Prefer SSE regs only, if we can use them for math. */
21949 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21950 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21952 /* Floating-point constants need more complex checks. */
21953 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21955 /* General regs can load everything. */
21956 if (reg_class_subset_p (regclass, GENERAL_REGS))
21959 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21960 zero above. We only want to wind up preferring 80387 registers if
21961 we plan on doing computation with them. */
21963 && standard_80387_constant_p (x))
21965 /* Limit class to non-sse. */
21966 if (regclass == FLOAT_SSE_REGS)
21968 if (regclass == FP_TOP_SSE_REGS)
21970 if (regclass == FP_SECOND_SSE_REGS)
21971 return FP_SECOND_REG;
21972 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21979 /* Generally when we see PLUS here, it's the function invariant
21980 (plus soft-fp const_int). Which can only be computed into general
21982 if (GET_CODE (x) == PLUS)
21983 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21985 /* QImode constants are easy to load, but non-constant QImode data
21986 must go into Q_REGS. */
21987 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21989 if (reg_class_subset_p (regclass, Q_REGS))
21991 if (reg_class_subset_p (Q_REGS, regclass))
21999 /* Discourage putting floating-point values in SSE registers unless
22000 SSE math is being used, and likewise for the 387 registers. */
22002 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22004 enum machine_mode mode = GET_MODE (x);
22006 /* Restrict the output reload class to the register bank that we are doing
22007 math on. If we would like not to return a subset of CLASS, reject this
22008 alternative: if reload cannot do this, it will still use its choice. */
22009 mode = GET_MODE (x);
22010 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22011 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22013 if (X87_FLOAT_MODE_P (mode))
22015 if (regclass == FP_TOP_SSE_REGS)
22017 else if (regclass == FP_SECOND_SSE_REGS)
22018 return FP_SECOND_REG;
22020 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22026 /* If we are copying between general and FP registers, we need a memory
22027 location. The same is true for SSE and MMX registers.
22029 To optimize register_move_cost performance, allow inline variant.
22031 The macro can't work reliably when one of the CLASSES is class containing
22032 registers from multiple units (SSE, MMX, integer). We avoid this by never
22033 combining those units in single alternative in the machine description.
22034 Ensure that this constraint holds to avoid unexpected surprises.
22036 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22037 enforce these sanity checks. */
22040 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22041 enum machine_mode mode, int strict)
22043 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22044 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22045 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22046 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22047 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22048 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22050 gcc_assert (!strict);
22054 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22057 /* ??? This is a lie. We do have moves between mmx/general, and for
22058 mmx/sse2. But by saying we need secondary memory we discourage the
22059 register allocator from using the mmx registers unless needed. */
22060 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22063 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22065 /* SSE1 doesn't have any direct moves from other classes. */
22069 /* If the target says that inter-unit moves are more expensive
22070 than moving through memory, then don't generate them. */
22071 if (!TARGET_INTER_UNIT_MOVES)
22074 /* Between SSE and general, we have moves no larger than word size. */
22075 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22083 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22084 enum machine_mode mode, int strict)
22086 return inline_secondary_memory_needed (class1, class2, mode, strict);
22089 /* Return true if the registers in CLASS cannot represent the change from
22090 modes FROM to TO. */
22093 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22094 enum reg_class regclass)
22099 /* x87 registers can't do subreg at all, as all values are reformatted
22100 to extended precision. */
22101 if (MAYBE_FLOAT_CLASS_P (regclass))
22104 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22106 /* Vector registers do not support QI or HImode loads. If we don't
22107 disallow a change to these modes, reload will assume it's ok to
22108 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22109 the vec_dupv4hi pattern. */
22110 if (GET_MODE_SIZE (from) < 4)
22113 /* Vector registers do not support subreg with nonzero offsets, which
22114 are otherwise valid for integer registers. Since we can't see
22115 whether we have a nonzero offset from here, prohibit all
22116 nonparadoxical subregs changing size. */
22117 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22124 /* Return the cost of moving data of mode M between a
22125 register and memory. A value of 2 is the default; this cost is
22126 relative to those in `REGISTER_MOVE_COST'.
22128 This function is used extensively by register_move_cost that is used to
22129 build tables at startup. Make it inline in this case.
22130 When IN is 2, return maximum of in and out move cost.
22132 If moving between registers and memory is more expensive than
22133 between two registers, you should define this macro to express the
22136 Model also increased moving costs of QImode registers in non
22140 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22144 if (FLOAT_CLASS_P (regclass))
22162 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22163 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22165 if (SSE_CLASS_P (regclass))
22168 switch (GET_MODE_SIZE (mode))
22183 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22184 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22186 if (MMX_CLASS_P (regclass))
22189 switch (GET_MODE_SIZE (mode))
22201 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22202 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22204 switch (GET_MODE_SIZE (mode))
22207 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22210 return ix86_cost->int_store[0];
22211 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22212 cost = ix86_cost->movzbl_load;
22214 cost = ix86_cost->int_load[0];
22216 return MAX (cost, ix86_cost->int_store[0]);
22222 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22224 return ix86_cost->movzbl_load;
22226 return ix86_cost->int_store[0] + 4;
22231 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22232 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22234 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22235 if (mode == TFmode)
22238 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22240 cost = ix86_cost->int_load[2];
22242 cost = ix86_cost->int_store[2];
22243 return (cost * (((int) GET_MODE_SIZE (mode)
22244 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22249 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22251 return inline_memory_move_cost (mode, regclass, in);
22255 /* Return the cost of moving data from a register in class CLASS1 to
22256 one in class CLASS2.
22258 It is not required that the cost always equal 2 when FROM is the same as TO;
22259 on some machines it is expensive to move between registers if they are not
22260 general registers. */
22263 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22264 enum reg_class class2)
22266 /* In case we require secondary memory, compute cost of the store followed
22267 by load. In order to avoid bad register allocation choices, we need
22268 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22270 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22274 cost += inline_memory_move_cost (mode, class1, 2);
22275 cost += inline_memory_move_cost (mode, class2, 2);
22277 /* In case of copying from general_purpose_register we may emit multiple
22278 stores followed by single load causing memory size mismatch stall.
22279 Count this as arbitrarily high cost of 20. */
22280 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22283 /* In the case of FP/MMX moves, the registers actually overlap, and we
22284 have to switch modes in order to treat them differently. */
22285 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22286 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22292 /* Moves between SSE/MMX and integer unit are expensive. */
22293 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22294 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22296 /* ??? By keeping returned value relatively high, we limit the number
22297 of moves between integer and MMX/SSE registers for all targets.
22298 Additionally, high value prevents problem with x86_modes_tieable_p(),
22299 where integer modes in MMX/SSE registers are not tieable
22300 because of missing QImode and HImode moves to, from or between
22301 MMX/SSE registers. */
22302 return MAX (8, ix86_cost->mmxsse_to_integer);
22304 if (MAYBE_FLOAT_CLASS_P (class1))
22305 return ix86_cost->fp_move;
22306 if (MAYBE_SSE_CLASS_P (class1))
22307 return ix86_cost->sse_move;
22308 if (MAYBE_MMX_CLASS_P (class1))
22309 return ix86_cost->mmx_move;
22313 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22316 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22318 /* Flags and only flags can only hold CCmode values. */
22319 if (CC_REGNO_P (regno))
22320 return GET_MODE_CLASS (mode) == MODE_CC;
22321 if (GET_MODE_CLASS (mode) == MODE_CC
22322 || GET_MODE_CLASS (mode) == MODE_RANDOM
22323 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22325 if (FP_REGNO_P (regno))
22326 return VALID_FP_MODE_P (mode);
22327 if (SSE_REGNO_P (regno))
22329 /* We implement the move patterns for all vector modes into and
22330 out of SSE registers, even when no operation instructions
22332 return (VALID_SSE_REG_MODE (mode)
22333 || VALID_SSE2_REG_MODE (mode)
22334 || VALID_MMX_REG_MODE (mode)
22335 || VALID_MMX_REG_MODE_3DNOW (mode));
22337 if (MMX_REGNO_P (regno))
22339 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22340 so if the register is available at all, then we can move data of
22341 the given mode into or out of it. */
22342 return (VALID_MMX_REG_MODE (mode)
22343 || VALID_MMX_REG_MODE_3DNOW (mode));
22346 if (mode == QImode)
22348 /* Take care for QImode values - they can be in non-QI regs,
22349 but then they do cause partial register stalls. */
22350 if (regno < 4 || TARGET_64BIT)
22352 if (!TARGET_PARTIAL_REG_STALL)
22354 return reload_in_progress || reload_completed;
22356 /* We handle both integer and floats in the general purpose registers. */
22357 else if (VALID_INT_MODE_P (mode))
22359 else if (VALID_FP_MODE_P (mode))
22361 else if (VALID_DFP_MODE_P (mode))
22363 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22364 on to use that value in smaller contexts, this can easily force a
22365 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22366 supporting DImode, allow it. */
22367 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22373 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22374 tieable integer mode. */
22377 ix86_tieable_integer_mode_p (enum machine_mode mode)
22386 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22389 return TARGET_64BIT;
22396 /* Return true if MODE1 is accessible in a register that can hold MODE2
22397 without copying. That is, all register classes that can hold MODE2
22398 can also hold MODE1. */
22401 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22403 if (mode1 == mode2)
22406 if (ix86_tieable_integer_mode_p (mode1)
22407 && ix86_tieable_integer_mode_p (mode2))
22410 /* MODE2 being XFmode implies fp stack or general regs, which means we
22411 can tie any smaller floating point modes to it. Note that we do not
22412 tie this with TFmode. */
22413 if (mode2 == XFmode)
22414 return mode1 == SFmode || mode1 == DFmode;
22416 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22417 that we can tie it with SFmode. */
22418 if (mode2 == DFmode)
22419 return mode1 == SFmode;
22421 /* If MODE2 is only appropriate for an SSE register, then tie with
22422 any other mode acceptable to SSE registers. */
22423 if (GET_MODE_SIZE (mode2) == 16
22424 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22425 return (GET_MODE_SIZE (mode1) == 16
22426 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22428 /* If MODE2 is appropriate for an MMX register, then tie
22429 with any other mode acceptable to MMX registers. */
22430 if (GET_MODE_SIZE (mode2) == 8
22431 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22432 return (GET_MODE_SIZE (mode1) == 8
22433 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22438 /* Compute a (partial) cost for rtx X. Return true if the complete
22439 cost has been computed, and false if subexpressions should be
22440 scanned. In either case, *TOTAL contains the cost result. */
22443 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22445 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22446 enum machine_mode mode = GET_MODE (x);
22454 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22456 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22458 else if (flag_pic && SYMBOLIC_CONST (x)
22460 || (!GET_CODE (x) != LABEL_REF
22461 && (GET_CODE (x) != SYMBOL_REF
22462 || !SYMBOL_REF_LOCAL_P (x)))))
22469 if (mode == VOIDmode)
22472 switch (standard_80387_constant_p (x))
22477 default: /* Other constants */
22482 /* Start with (MEM (SYMBOL_REF)), since that's where
22483 it'll probably end up. Add a penalty for size. */
22484 *total = (COSTS_N_INSNS (1)
22485 + (flag_pic != 0 && !TARGET_64BIT)
22486 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22492 /* The zero extensions is often completely free on x86_64, so make
22493 it as cheap as possible. */
22494 if (TARGET_64BIT && mode == DImode
22495 && GET_MODE (XEXP (x, 0)) == SImode)
22497 else if (TARGET_ZERO_EXTEND_WITH_AND)
22498 *total = ix86_cost->add;
22500 *total = ix86_cost->movzx;
22504 *total = ix86_cost->movsx;
22508 if (CONST_INT_P (XEXP (x, 1))
22509 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22511 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22514 *total = ix86_cost->add;
22517 if ((value == 2 || value == 3)
22518 && ix86_cost->lea <= ix86_cost->shift_const)
22520 *total = ix86_cost->lea;
22530 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22532 if (CONST_INT_P (XEXP (x, 1)))
22534 if (INTVAL (XEXP (x, 1)) > 32)
22535 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22537 *total = ix86_cost->shift_const * 2;
22541 if (GET_CODE (XEXP (x, 1)) == AND)
22542 *total = ix86_cost->shift_var * 2;
22544 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22549 if (CONST_INT_P (XEXP (x, 1)))
22550 *total = ix86_cost->shift_const;
22552 *total = ix86_cost->shift_var;
22557 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22559 /* ??? SSE scalar cost should be used here. */
22560 *total = ix86_cost->fmul;
22563 else if (X87_FLOAT_MODE_P (mode))
22565 *total = ix86_cost->fmul;
22568 else if (FLOAT_MODE_P (mode))
22570 /* ??? SSE vector cost should be used here. */
22571 *total = ix86_cost->fmul;
22576 rtx op0 = XEXP (x, 0);
22577 rtx op1 = XEXP (x, 1);
22579 if (CONST_INT_P (XEXP (x, 1)))
22581 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22582 for (nbits = 0; value != 0; value &= value - 1)
22586 /* This is arbitrary. */
22589 /* Compute costs correctly for widening multiplication. */
22590 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22591 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22592 == GET_MODE_SIZE (mode))
22594 int is_mulwiden = 0;
22595 enum machine_mode inner_mode = GET_MODE (op0);
22597 if (GET_CODE (op0) == GET_CODE (op1))
22598 is_mulwiden = 1, op1 = XEXP (op1, 0);
22599 else if (CONST_INT_P (op1))
22601 if (GET_CODE (op0) == SIGN_EXTEND)
22602 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22605 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22609 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22612 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22613 + nbits * ix86_cost->mult_bit
22614 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22623 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22624 /* ??? SSE cost should be used here. */
22625 *total = ix86_cost->fdiv;
22626 else if (X87_FLOAT_MODE_P (mode))
22627 *total = ix86_cost->fdiv;
22628 else if (FLOAT_MODE_P (mode))
22629 /* ??? SSE vector cost should be used here. */
22630 *total = ix86_cost->fdiv;
22632 *total = ix86_cost->divide[MODE_INDEX (mode)];
22636 if (GET_MODE_CLASS (mode) == MODE_INT
22637 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22639 if (GET_CODE (XEXP (x, 0)) == PLUS
22640 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22641 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22642 && CONSTANT_P (XEXP (x, 1)))
22644 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22645 if (val == 2 || val == 4 || val == 8)
22647 *total = ix86_cost->lea;
22648 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22649 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22651 *total += rtx_cost (XEXP (x, 1), outer_code);
22655 else if (GET_CODE (XEXP (x, 0)) == MULT
22656 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22658 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22659 if (val == 2 || val == 4 || val == 8)
22661 *total = ix86_cost->lea;
22662 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22663 *total += rtx_cost (XEXP (x, 1), outer_code);
22667 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22669 *total = ix86_cost->lea;
22670 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22671 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22672 *total += rtx_cost (XEXP (x, 1), outer_code);
22679 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22681 /* ??? SSE cost should be used here. */
22682 *total = ix86_cost->fadd;
22685 else if (X87_FLOAT_MODE_P (mode))
22687 *total = ix86_cost->fadd;
22690 else if (FLOAT_MODE_P (mode))
22692 /* ??? SSE vector cost should be used here. */
22693 *total = ix86_cost->fadd;
22701 if (!TARGET_64BIT && mode == DImode)
22703 *total = (ix86_cost->add * 2
22704 + (rtx_cost (XEXP (x, 0), outer_code)
22705 << (GET_MODE (XEXP (x, 0)) != DImode))
22706 + (rtx_cost (XEXP (x, 1), outer_code)
22707 << (GET_MODE (XEXP (x, 1)) != DImode)));
22713 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22715 /* ??? SSE cost should be used here. */
22716 *total = ix86_cost->fchs;
22719 else if (X87_FLOAT_MODE_P (mode))
22721 *total = ix86_cost->fchs;
22724 else if (FLOAT_MODE_P (mode))
22726 /* ??? SSE vector cost should be used here. */
22727 *total = ix86_cost->fchs;
22733 if (!TARGET_64BIT && mode == DImode)
22734 *total = ix86_cost->add * 2;
22736 *total = ix86_cost->add;
22740 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22741 && XEXP (XEXP (x, 0), 1) == const1_rtx
22742 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22743 && XEXP (x, 1) == const0_rtx)
22745 /* This kind of construct is implemented using test[bwl].
22746 Treat it as if we had an AND. */
22747 *total = (ix86_cost->add
22748 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22749 + rtx_cost (const1_rtx, outer_code));
22755 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22760 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22761 /* ??? SSE cost should be used here. */
22762 *total = ix86_cost->fabs;
22763 else if (X87_FLOAT_MODE_P (mode))
22764 *total = ix86_cost->fabs;
22765 else if (FLOAT_MODE_P (mode))
22766 /* ??? SSE vector cost should be used here. */
22767 *total = ix86_cost->fabs;
22771 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22772 /* ??? SSE cost should be used here. */
22773 *total = ix86_cost->fsqrt;
22774 else if (X87_FLOAT_MODE_P (mode))
22775 *total = ix86_cost->fsqrt;
22776 else if (FLOAT_MODE_P (mode))
22777 /* ??? SSE vector cost should be used here. */
22778 *total = ix86_cost->fsqrt;
22782 if (XINT (x, 1) == UNSPEC_TP)
22793 static int current_machopic_label_num;
22795 /* Given a symbol name and its associated stub, write out the
22796 definition of the stub. */
22799 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22801 unsigned int length;
22802 char *binder_name, *symbol_name, lazy_ptr_name[32];
22803 int label = ++current_machopic_label_num;
22805 /* For 64-bit we shouldn't get here. */
22806 gcc_assert (!TARGET_64BIT);
22808 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22809 symb = (*targetm.strip_name_encoding) (symb);
22811 length = strlen (stub);
22812 binder_name = alloca (length + 32);
22813 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22815 length = strlen (symb);
22816 symbol_name = alloca (length + 32);
22817 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22819 sprintf (lazy_ptr_name, "L%d$lz", label);
22822 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22824 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22826 fprintf (file, "%s:\n", stub);
22827 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22831 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22832 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22833 fprintf (file, "\tjmp\t*%%edx\n");
22836 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22838 fprintf (file, "%s:\n", binder_name);
22842 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22843 fprintf (file, "\tpushl\t%%eax\n");
22846 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22848 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22850 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22851 fprintf (file, "%s:\n", lazy_ptr_name);
22852 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22853 fprintf (file, "\t.long %s\n", binder_name);
22857 darwin_x86_file_end (void)
22859 darwin_file_end ();
22862 #endif /* TARGET_MACHO */
22864 /* Order the registers for register allocator. */
22867 x86_order_regs_for_local_alloc (void)
22872 /* First allocate the local general purpose registers. */
22873 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22874 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22875 reg_alloc_order [pos++] = i;
22877 /* Global general purpose registers. */
22878 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22879 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22880 reg_alloc_order [pos++] = i;
22882 /* x87 registers come first in case we are doing FP math
22884 if (!TARGET_SSE_MATH)
22885 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22886 reg_alloc_order [pos++] = i;
22888 /* SSE registers. */
22889 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22890 reg_alloc_order [pos++] = i;
22891 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22892 reg_alloc_order [pos++] = i;
22894 /* x87 registers. */
22895 if (TARGET_SSE_MATH)
22896 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22897 reg_alloc_order [pos++] = i;
22899 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22900 reg_alloc_order [pos++] = i;
22902 /* Initialize the rest of array as we do not allocate some registers
22904 while (pos < FIRST_PSEUDO_REGISTER)
22905 reg_alloc_order [pos++] = 0;
22908 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22909 struct attribute_spec.handler. */
22911 ix86_handle_struct_attribute (tree *node, tree name,
22912 tree args ATTRIBUTE_UNUSED,
22913 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22916 if (DECL_P (*node))
22918 if (TREE_CODE (*node) == TYPE_DECL)
22919 type = &TREE_TYPE (*node);
22924 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22925 || TREE_CODE (*type) == UNION_TYPE)))
22927 warning (OPT_Wattributes, "%qs attribute ignored",
22928 IDENTIFIER_POINTER (name));
22929 *no_add_attrs = true;
22932 else if ((is_attribute_p ("ms_struct", name)
22933 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22934 || ((is_attribute_p ("gcc_struct", name)
22935 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22937 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22938 IDENTIFIER_POINTER (name));
22939 *no_add_attrs = true;
22946 ix86_ms_bitfield_layout_p (const_tree record_type)
22948 return (TARGET_MS_BITFIELD_LAYOUT &&
22949 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22950 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22953 /* Returns an expression indicating where the this parameter is
22954 located on entry to the FUNCTION. */
22957 x86_this_parameter (tree function)
22959 tree type = TREE_TYPE (function);
22960 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22965 const int *parm_regs;
22967 if (TARGET_64BIT_MS_ABI)
22968 parm_regs = x86_64_ms_abi_int_parameter_registers;
22970 parm_regs = x86_64_int_parameter_registers;
22971 return gen_rtx_REG (DImode, parm_regs[aggr]);
22974 nregs = ix86_function_regparm (type, function);
22976 if (nregs > 0 && !stdarg_p (type))
22980 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22981 regno = aggr ? DX_REG : CX_REG;
22989 return gen_rtx_MEM (SImode,
22990 plus_constant (stack_pointer_rtx, 4));
22993 return gen_rtx_REG (SImode, regno);
22996 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22999 /* Determine whether x86_output_mi_thunk can succeed. */
23002 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23003 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23004 HOST_WIDE_INT vcall_offset, const_tree function)
23006 /* 64-bit can handle anything. */
23010 /* For 32-bit, everything's fine if we have one free register. */
23011 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23014 /* Need a free register for vcall_offset. */
23018 /* Need a free register for GOT references. */
23019 if (flag_pic && !(*targetm.binds_local_p) (function))
23022 /* Otherwise ok. */
23026 /* Output the assembler code for a thunk function. THUNK_DECL is the
23027 declaration for the thunk function itself, FUNCTION is the decl for
23028 the target function. DELTA is an immediate constant offset to be
23029 added to THIS. If VCALL_OFFSET is nonzero, the word at
23030 *(*this + vcall_offset) should be added to THIS. */
23033 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23034 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23035 HOST_WIDE_INT vcall_offset, tree function)
23038 rtx this_param = x86_this_parameter (function);
23041 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23042 pull it in now and let DELTA benefit. */
23043 if (REG_P (this_param))
23044 this_reg = this_param;
23045 else if (vcall_offset)
23047 /* Put the this parameter into %eax. */
23048 xops[0] = this_param;
23049 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23051 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23053 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23056 this_reg = NULL_RTX;
23058 /* Adjust the this parameter by a fixed constant. */
23061 xops[0] = GEN_INT (delta);
23062 xops[1] = this_reg ? this_reg : this_param;
23065 if (!x86_64_general_operand (xops[0], DImode))
23067 tmp = gen_rtx_REG (DImode, R10_REG);
23069 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23071 xops[1] = this_param;
23073 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23076 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23079 /* Adjust the this parameter by a value stored in the vtable. */
23083 tmp = gen_rtx_REG (DImode, R10_REG);
23086 int tmp_regno = CX_REG;
23087 if (lookup_attribute ("fastcall",
23088 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23089 tmp_regno = AX_REG;
23090 tmp = gen_rtx_REG (SImode, tmp_regno);
23093 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23096 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23098 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23100 /* Adjust the this parameter. */
23101 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23102 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23104 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23105 xops[0] = GEN_INT (vcall_offset);
23107 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23108 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23110 xops[1] = this_reg;
23112 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23114 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23117 /* If necessary, drop THIS back to its stack slot. */
23118 if (this_reg && this_reg != this_param)
23120 xops[0] = this_reg;
23121 xops[1] = this_param;
23123 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23125 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23128 xops[0] = XEXP (DECL_RTL (function), 0);
23131 if (!flag_pic || (*targetm.binds_local_p) (function))
23132 output_asm_insn ("jmp\t%P0", xops);
23133 /* All thunks should be in the same object as their target,
23134 and thus binds_local_p should be true. */
23135 else if (TARGET_64BIT_MS_ABI)
23136 gcc_unreachable ();
23139 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23140 tmp = gen_rtx_CONST (Pmode, tmp);
23141 tmp = gen_rtx_MEM (QImode, tmp);
23143 output_asm_insn ("jmp\t%A0", xops);
23148 if (!flag_pic || (*targetm.binds_local_p) (function))
23149 output_asm_insn ("jmp\t%P0", xops);
23154 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23155 tmp = (gen_rtx_SYMBOL_REF
23157 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23158 tmp = gen_rtx_MEM (QImode, tmp);
23160 output_asm_insn ("jmp\t%0", xops);
23163 #endif /* TARGET_MACHO */
23165 tmp = gen_rtx_REG (SImode, CX_REG);
23166 output_set_got (tmp, NULL_RTX);
23169 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23170 output_asm_insn ("jmp\t{*}%1", xops);
23176 x86_file_start (void)
23178 default_file_start ();
23180 darwin_file_start ();
23182 if (X86_FILE_START_VERSION_DIRECTIVE)
23183 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23184 if (X86_FILE_START_FLTUSED)
23185 fputs ("\t.global\t__fltused\n", asm_out_file);
23186 if (ix86_asm_dialect == ASM_INTEL)
23187 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23191 x86_field_alignment (tree field, int computed)
23193 enum machine_mode mode;
23194 tree type = TREE_TYPE (field);
23196 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23198 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
23199 ? get_inner_array_type (type) : type);
23200 if (mode == DFmode || mode == DCmode
23201 || GET_MODE_CLASS (mode) == MODE_INT
23202 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23203 return MIN (32, computed);
23207 /* Output assembler code to FILE to increment profiler label # LABELNO
23208 for profiling a function entry. */
23210 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23214 #ifndef NO_PROFILE_COUNTERS
23215 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23218 if (!TARGET_64BIT_MS_ABI && flag_pic)
23219 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23221 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23225 #ifndef NO_PROFILE_COUNTERS
23226 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23227 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23229 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23233 #ifndef NO_PROFILE_COUNTERS
23234 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23235 PROFILE_COUNT_REGISTER);
23237 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23241 /* We don't have exact information about the insn sizes, but we may assume
23242 quite safely that we are informed about all 1 byte insns and memory
23243 address sizes. This is enough to eliminate unnecessary padding in
23247 min_insn_size (rtx insn)
23251 if (!INSN_P (insn) || !active_insn_p (insn))
23254 /* Discard alignments we've emit and jump instructions. */
23255 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23256 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23259 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23260 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23263 /* Important case - calls are always 5 bytes.
23264 It is common to have many calls in the row. */
23266 && symbolic_reference_mentioned_p (PATTERN (insn))
23267 && !SIBLING_CALL_P (insn))
23269 if (get_attr_length (insn) <= 1)
23272 /* For normal instructions we may rely on the sizes of addresses
23273 and the presence of symbol to require 4 bytes of encoding.
23274 This is not the case for jumps where references are PC relative. */
23275 if (!JUMP_P (insn))
23277 l = get_attr_length_address (insn);
23278 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23287 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23291 ix86_avoid_jump_misspredicts (void)
23293 rtx insn, start = get_insns ();
23294 int nbytes = 0, njumps = 0;
23297 /* Look for all minimal intervals of instructions containing 4 jumps.
23298 The intervals are bounded by START and INSN. NBYTES is the total
23299 size of instructions in the interval including INSN and not including
23300 START. When the NBYTES is smaller than 16 bytes, it is possible
23301 that the end of START and INSN ends up in the same 16byte page.
23303 The smallest offset in the page INSN can start is the case where START
23304 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23305 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23307 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23310 nbytes += min_insn_size (insn);
23312 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23313 INSN_UID (insn), min_insn_size (insn));
23315 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23316 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23324 start = NEXT_INSN (start);
23325 if ((JUMP_P (start)
23326 && GET_CODE (PATTERN (start)) != ADDR_VEC
23327 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23329 njumps--, isjump = 1;
23332 nbytes -= min_insn_size (start);
23334 gcc_assert (njumps >= 0);
23336 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23337 INSN_UID (start), INSN_UID (insn), nbytes);
23339 if (njumps == 3 && isjump && nbytes < 16)
23341 int padsize = 15 - nbytes + min_insn_size (insn);
23344 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23345 INSN_UID (insn), padsize);
23346 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23351 /* AMD Athlon works faster
23352 when RET is not destination of conditional jump or directly preceded
23353 by other jump instruction. We avoid the penalty by inserting NOP just
23354 before the RET instructions in such cases. */
23356 ix86_pad_returns (void)
23361 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23363 basic_block bb = e->src;
23364 rtx ret = BB_END (bb);
23366 bool replace = false;
23368 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23369 || !maybe_hot_bb_p (bb))
23371 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23372 if (active_insn_p (prev) || LABEL_P (prev))
23374 if (prev && LABEL_P (prev))
23379 FOR_EACH_EDGE (e, ei, bb->preds)
23380 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23381 && !(e->flags & EDGE_FALLTHRU))
23386 prev = prev_active_insn (ret);
23388 && ((JUMP_P (prev) && any_condjump_p (prev))
23391 /* Empty functions get branch mispredict even when the jump destination
23392 is not visible to us. */
23393 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23398 emit_insn_before (gen_return_internal_long (), ret);
23404 /* Implement machine specific optimizations. We implement padding of returns
23405 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23409 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23410 ix86_pad_returns ();
23411 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23412 ix86_avoid_jump_misspredicts ();
23415 /* Return nonzero when QImode register that must be represented via REX prefix
23418 x86_extended_QIreg_mentioned_p (rtx insn)
23421 extract_insn_cached (insn);
23422 for (i = 0; i < recog_data.n_operands; i++)
23423 if (REG_P (recog_data.operand[i])
23424 && REGNO (recog_data.operand[i]) >= 4)
23429 /* Return nonzero when P points to register encoded via REX prefix.
23430 Called via for_each_rtx. */
23432 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23434 unsigned int regno;
23437 regno = REGNO (*p);
23438 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23441 /* Return true when INSN mentions register that must be encoded using REX
23444 x86_extended_reg_mentioned_p (rtx insn)
23446 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23449 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23450 optabs would emit if we didn't have TFmode patterns. */
23453 x86_emit_floatuns (rtx operands[2])
23455 rtx neglab, donelab, i0, i1, f0, in, out;
23456 enum machine_mode mode, inmode;
23458 inmode = GET_MODE (operands[1]);
23459 gcc_assert (inmode == SImode || inmode == DImode);
23462 in = force_reg (inmode, operands[1]);
23463 mode = GET_MODE (out);
23464 neglab = gen_label_rtx ();
23465 donelab = gen_label_rtx ();
23466 f0 = gen_reg_rtx (mode);
23468 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23470 expand_float (out, in, 0);
23472 emit_jump_insn (gen_jump (donelab));
23475 emit_label (neglab);
23477 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23479 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23481 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23483 expand_float (f0, i0, 0);
23485 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23487 emit_label (donelab);
23490 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23491 with all elements equal to VAR. Return true if successful. */
23494 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23495 rtx target, rtx val)
23497 enum machine_mode smode, wsmode, wvmode;
23512 val = force_reg (GET_MODE_INNER (mode), val);
23513 x = gen_rtx_VEC_DUPLICATE (mode, val);
23514 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23520 if (TARGET_SSE || TARGET_3DNOW_A)
23522 val = gen_lowpart (SImode, val);
23523 x = gen_rtx_TRUNCATE (HImode, val);
23524 x = gen_rtx_VEC_DUPLICATE (mode, x);
23525 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23547 /* Extend HImode to SImode using a paradoxical SUBREG. */
23548 tmp1 = gen_reg_rtx (SImode);
23549 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23550 /* Insert the SImode value as low element of V4SImode vector. */
23551 tmp2 = gen_reg_rtx (V4SImode);
23552 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23553 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23554 CONST0_RTX (V4SImode),
23556 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23557 /* Cast the V4SImode vector back to a V8HImode vector. */
23558 tmp1 = gen_reg_rtx (V8HImode);
23559 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23560 /* Duplicate the low short through the whole low SImode word. */
23561 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23562 /* Cast the V8HImode vector back to a V4SImode vector. */
23563 tmp2 = gen_reg_rtx (V4SImode);
23564 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23565 /* Replicate the low element of the V4SImode vector. */
23566 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23567 /* Cast the V2SImode back to V8HImode, and store in target. */
23568 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23579 /* Extend QImode to SImode using a paradoxical SUBREG. */
23580 tmp1 = gen_reg_rtx (SImode);
23581 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23582 /* Insert the SImode value as low element of V4SImode vector. */
23583 tmp2 = gen_reg_rtx (V4SImode);
23584 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23585 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23586 CONST0_RTX (V4SImode),
23588 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23589 /* Cast the V4SImode vector back to a V16QImode vector. */
23590 tmp1 = gen_reg_rtx (V16QImode);
23591 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23592 /* Duplicate the low byte through the whole low SImode word. */
23593 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23594 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23595 /* Cast the V16QImode vector back to a V4SImode vector. */
23596 tmp2 = gen_reg_rtx (V4SImode);
23597 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23598 /* Replicate the low element of the V4SImode vector. */
23599 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23600 /* Cast the V2SImode back to V16QImode, and store in target. */
23601 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23609 /* Replicate the value once into the next wider mode and recurse. */
23610 val = convert_modes (wsmode, smode, val, true);
23611 x = expand_simple_binop (wsmode, ASHIFT, val,
23612 GEN_INT (GET_MODE_BITSIZE (smode)),
23613 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23614 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23616 x = gen_reg_rtx (wvmode);
23617 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23618 gcc_unreachable ();
23619 emit_move_insn (target, gen_lowpart (mode, x));
23627 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23628 whose ONE_VAR element is VAR, and other elements are zero. Return true
23632 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23633 rtx target, rtx var, int one_var)
23635 enum machine_mode vsimode;
23651 var = force_reg (GET_MODE_INNER (mode), var);
23652 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23653 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23658 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23659 new_target = gen_reg_rtx (mode);
23661 new_target = target;
23662 var = force_reg (GET_MODE_INNER (mode), var);
23663 x = gen_rtx_VEC_DUPLICATE (mode, var);
23664 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23665 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23668 /* We need to shuffle the value to the correct position, so
23669 create a new pseudo to store the intermediate result. */
23671 /* With SSE2, we can use the integer shuffle insns. */
23672 if (mode != V4SFmode && TARGET_SSE2)
23674 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23676 GEN_INT (one_var == 1 ? 0 : 1),
23677 GEN_INT (one_var == 2 ? 0 : 1),
23678 GEN_INT (one_var == 3 ? 0 : 1)));
23679 if (target != new_target)
23680 emit_move_insn (target, new_target);
23684 /* Otherwise convert the intermediate result to V4SFmode and
23685 use the SSE1 shuffle instructions. */
23686 if (mode != V4SFmode)
23688 tmp = gen_reg_rtx (V4SFmode);
23689 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23694 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23696 GEN_INT (one_var == 1 ? 0 : 1),
23697 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23698 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23700 if (mode != V4SFmode)
23701 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23702 else if (tmp != target)
23703 emit_move_insn (target, tmp);
23705 else if (target != new_target)
23706 emit_move_insn (target, new_target);
23711 vsimode = V4SImode;
23717 vsimode = V2SImode;
23723 /* Zero extend the variable element to SImode and recurse. */
23724 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23726 x = gen_reg_rtx (vsimode);
23727 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23729 gcc_unreachable ();
23731 emit_move_insn (target, gen_lowpart (mode, x));
23739 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23740 consisting of the values in VALS. It is known that all elements
23741 except ONE_VAR are constants. Return true if successful. */
23744 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23745 rtx target, rtx vals, int one_var)
23747 rtx var = XVECEXP (vals, 0, one_var);
23748 enum machine_mode wmode;
23751 const_vec = copy_rtx (vals);
23752 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23753 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23761 /* For the two element vectors, it's just as easy to use
23762 the general case. */
23778 /* There's no way to set one QImode entry easily. Combine
23779 the variable value with its adjacent constant value, and
23780 promote to an HImode set. */
23781 x = XVECEXP (vals, 0, one_var ^ 1);
23784 var = convert_modes (HImode, QImode, var, true);
23785 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23786 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23787 x = GEN_INT (INTVAL (x) & 0xff);
23791 var = convert_modes (HImode, QImode, var, true);
23792 x = gen_int_mode (INTVAL (x) << 8, HImode);
23794 if (x != const0_rtx)
23795 var = expand_simple_binop (HImode, IOR, var, x, var,
23796 1, OPTAB_LIB_WIDEN);
23798 x = gen_reg_rtx (wmode);
23799 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23800 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23802 emit_move_insn (target, gen_lowpart (mode, x));
23809 emit_move_insn (target, const_vec);
23810 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23814 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23815 all values variable, and none identical. */
23818 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23819 rtx target, rtx vals)
23821 enum machine_mode half_mode = GET_MODE_INNER (mode);
23822 rtx op0 = NULL, op1 = NULL;
23823 bool use_vec_concat = false;
23829 if (!mmx_ok && !TARGET_SSE)
23835 /* For the two element vectors, we always implement VEC_CONCAT. */
23836 op0 = XVECEXP (vals, 0, 0);
23837 op1 = XVECEXP (vals, 0, 1);
23838 use_vec_concat = true;
23842 half_mode = V2SFmode;
23845 half_mode = V2SImode;
23851 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23852 Recurse to load the two halves. */
23854 op0 = gen_reg_rtx (half_mode);
23855 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23856 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23858 op1 = gen_reg_rtx (half_mode);
23859 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23860 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23862 use_vec_concat = true;
23873 gcc_unreachable ();
23876 if (use_vec_concat)
23878 if (!register_operand (op0, half_mode))
23879 op0 = force_reg (half_mode, op0);
23880 if (!register_operand (op1, half_mode))
23881 op1 = force_reg (half_mode, op1);
23883 emit_insn (gen_rtx_SET (VOIDmode, target,
23884 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23888 int i, j, n_elts, n_words, n_elt_per_word;
23889 enum machine_mode inner_mode;
23890 rtx words[4], shift;
23892 inner_mode = GET_MODE_INNER (mode);
23893 n_elts = GET_MODE_NUNITS (mode);
23894 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23895 n_elt_per_word = n_elts / n_words;
23896 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23898 for (i = 0; i < n_words; ++i)
23900 rtx word = NULL_RTX;
23902 for (j = 0; j < n_elt_per_word; ++j)
23904 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23905 elt = convert_modes (word_mode, inner_mode, elt, true);
23911 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23912 word, 1, OPTAB_LIB_WIDEN);
23913 word = expand_simple_binop (word_mode, IOR, word, elt,
23914 word, 1, OPTAB_LIB_WIDEN);
23922 emit_move_insn (target, gen_lowpart (mode, words[0]));
23923 else if (n_words == 2)
23925 rtx tmp = gen_reg_rtx (mode);
23926 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23927 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23928 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23929 emit_move_insn (target, tmp);
23931 else if (n_words == 4)
23933 rtx tmp = gen_reg_rtx (V4SImode);
23934 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23935 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23936 emit_move_insn (target, gen_lowpart (mode, tmp));
23939 gcc_unreachable ();
23943 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23944 instructions unless MMX_OK is true. */
23947 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23949 enum machine_mode mode = GET_MODE (target);
23950 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23951 int n_elts = GET_MODE_NUNITS (mode);
23952 int n_var = 0, one_var = -1;
23953 bool all_same = true, all_const_zero = true;
23957 for (i = 0; i < n_elts; ++i)
23959 x = XVECEXP (vals, 0, i);
23960 if (!(CONST_INT_P (x)
23961 || GET_CODE (x) == CONST_DOUBLE
23962 || GET_CODE (x) == CONST_FIXED))
23963 n_var++, one_var = i;
23964 else if (x != CONST0_RTX (inner_mode))
23965 all_const_zero = false;
23966 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23970 /* Constants are best loaded from the constant pool. */
23973 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23977 /* If all values are identical, broadcast the value. */
23979 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23980 XVECEXP (vals, 0, 0)))
23983 /* Values where only one field is non-constant are best loaded from
23984 the pool and overwritten via move later. */
23988 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23989 XVECEXP (vals, 0, one_var),
23993 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23997 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24001 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24003 enum machine_mode mode = GET_MODE (target);
24004 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24005 bool use_vec_merge = false;
24014 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24015 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24017 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24019 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24020 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24026 use_vec_merge = TARGET_SSE4_1;
24034 /* For the two element vectors, we implement a VEC_CONCAT with
24035 the extraction of the other element. */
24037 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24038 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24041 op0 = val, op1 = tmp;
24043 op0 = tmp, op1 = val;
24045 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24046 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24051 use_vec_merge = TARGET_SSE4_1;
24058 use_vec_merge = true;
24062 /* tmp = target = A B C D */
24063 tmp = copy_to_reg (target);
24064 /* target = A A B B */
24065 emit_insn (gen_sse_unpcklps (target, target, target));
24066 /* target = X A B B */
24067 ix86_expand_vector_set (false, target, val, 0);
24068 /* target = A X C D */
24069 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24070 GEN_INT (1), GEN_INT (0),
24071 GEN_INT (2+4), GEN_INT (3+4)));
24075 /* tmp = target = A B C D */
24076 tmp = copy_to_reg (target);
24077 /* tmp = X B C D */
24078 ix86_expand_vector_set (false, tmp, val, 0);
24079 /* target = A B X D */
24080 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24081 GEN_INT (0), GEN_INT (1),
24082 GEN_INT (0+4), GEN_INT (3+4)));
24086 /* tmp = target = A B C D */
24087 tmp = copy_to_reg (target);
24088 /* tmp = X B C D */
24089 ix86_expand_vector_set (false, tmp, val, 0);
24090 /* target = A B X D */
24091 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24092 GEN_INT (0), GEN_INT (1),
24093 GEN_INT (2+4), GEN_INT (0+4)));
24097 gcc_unreachable ();
24102 use_vec_merge = TARGET_SSE4_1;
24106 /* Element 0 handled by vec_merge below. */
24109 use_vec_merge = true;
24115 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24116 store into element 0, then shuffle them back. */
24120 order[0] = GEN_INT (elt);
24121 order[1] = const1_rtx;
24122 order[2] = const2_rtx;
24123 order[3] = GEN_INT (3);
24124 order[elt] = const0_rtx;
24126 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24127 order[1], order[2], order[3]));
24129 ix86_expand_vector_set (false, target, val, 0);
24131 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24132 order[1], order[2], order[3]));
24136 /* For SSE1, we have to reuse the V4SF code. */
24137 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24138 gen_lowpart (SFmode, val), elt);
24143 use_vec_merge = TARGET_SSE2;
24146 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24150 use_vec_merge = TARGET_SSE4_1;
24160 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24161 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24162 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24166 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24168 emit_move_insn (mem, target);
24170 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24171 emit_move_insn (tmp, val);
24173 emit_move_insn (target, mem);
24178 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24180 enum machine_mode mode = GET_MODE (vec);
24181 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24182 bool use_vec_extr = false;
24195 use_vec_extr = true;
24199 use_vec_extr = TARGET_SSE4_1;
24211 tmp = gen_reg_rtx (mode);
24212 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24213 GEN_INT (elt), GEN_INT (elt),
24214 GEN_INT (elt+4), GEN_INT (elt+4)));
24218 tmp = gen_reg_rtx (mode);
24219 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24223 gcc_unreachable ();
24226 use_vec_extr = true;
24231 use_vec_extr = TARGET_SSE4_1;
24245 tmp = gen_reg_rtx (mode);
24246 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24247 GEN_INT (elt), GEN_INT (elt),
24248 GEN_INT (elt), GEN_INT (elt)));
24252 tmp = gen_reg_rtx (mode);
24253 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24257 gcc_unreachable ();
24260 use_vec_extr = true;
24265 /* For SSE1, we have to reuse the V4SF code. */
24266 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24267 gen_lowpart (V4SFmode, vec), elt);
24273 use_vec_extr = TARGET_SSE2;
24276 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24280 use_vec_extr = TARGET_SSE4_1;
24284 /* ??? Could extract the appropriate HImode element and shift. */
24291 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24292 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24294 /* Let the rtl optimizers know about the zero extension performed. */
24295 if (inner_mode == QImode || inner_mode == HImode)
24297 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24298 target = gen_lowpart (SImode, target);
24301 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24305 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24307 emit_move_insn (mem, vec);
24309 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24310 emit_move_insn (target, tmp);
24314 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24315 pattern to reduce; DEST is the destination; IN is the input vector. */
24318 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24320 rtx tmp1, tmp2, tmp3;
24322 tmp1 = gen_reg_rtx (V4SFmode);
24323 tmp2 = gen_reg_rtx (V4SFmode);
24324 tmp3 = gen_reg_rtx (V4SFmode);
24326 emit_insn (gen_sse_movhlps (tmp1, in, in));
24327 emit_insn (fn (tmp2, tmp1, in));
24329 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24330 GEN_INT (1), GEN_INT (1),
24331 GEN_INT (1+4), GEN_INT (1+4)));
24332 emit_insn (fn (dest, tmp2, tmp3));
24335 /* Target hook for scalar_mode_supported_p. */
24337 ix86_scalar_mode_supported_p (enum machine_mode mode)
24339 if (DECIMAL_FLOAT_MODE_P (mode))
24341 else if (mode == TFmode)
24342 return TARGET_64BIT;
24344 return default_scalar_mode_supported_p (mode);
24347 /* Implements target hook vector_mode_supported_p. */
24349 ix86_vector_mode_supported_p (enum machine_mode mode)
24351 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24353 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24355 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24357 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24362 /* Target hook for c_mode_for_suffix. */
24363 static enum machine_mode
24364 ix86_c_mode_for_suffix (char suffix)
24366 if (TARGET_64BIT && suffix == 'q')
24368 if (TARGET_MMX && suffix == 'w')
24374 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24376 We do this in the new i386 backend to maintain source compatibility
24377 with the old cc0-based compiler. */
24380 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24381 tree inputs ATTRIBUTE_UNUSED,
24384 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24386 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24391 /* Implements target vector targetm.asm.encode_section_info. This
24392 is not used by netware. */
24394 static void ATTRIBUTE_UNUSED
24395 ix86_encode_section_info (tree decl, rtx rtl, int first)
24397 default_encode_section_info (decl, rtl, first);
24399 if (TREE_CODE (decl) == VAR_DECL
24400 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24401 && ix86_in_large_data_p (decl))
24402 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24405 /* Worker function for REVERSE_CONDITION. */
24408 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24410 return (mode != CCFPmode && mode != CCFPUmode
24411 ? reverse_condition (code)
24412 : reverse_condition_maybe_unordered (code));
24415 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24419 output_387_reg_move (rtx insn, rtx *operands)
24421 if (REG_P (operands[0]))
24423 if (REG_P (operands[1])
24424 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24426 if (REGNO (operands[0]) == FIRST_STACK_REG)
24427 return output_387_ffreep (operands, 0);
24428 return "fstp\t%y0";
24430 if (STACK_TOP_P (operands[0]))
24431 return "fld%z1\t%y1";
24434 else if (MEM_P (operands[0]))
24436 gcc_assert (REG_P (operands[1]));
24437 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24438 return "fstp%z0\t%y0";
24441 /* There is no non-popping store to memory for XFmode.
24442 So if we need one, follow the store with a load. */
24443 if (GET_MODE (operands[0]) == XFmode)
24444 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24446 return "fst%z0\t%y0";
24453 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24454 FP status register is set. */
24457 ix86_emit_fp_unordered_jump (rtx label)
24459 rtx reg = gen_reg_rtx (HImode);
24462 emit_insn (gen_x86_fnstsw_1 (reg));
24464 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24466 emit_insn (gen_x86_sahf_1 (reg));
24468 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24469 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24473 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24475 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24476 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24479 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24480 gen_rtx_LABEL_REF (VOIDmode, label),
24482 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24484 emit_jump_insn (temp);
24485 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24488 /* Output code to perform a log1p XFmode calculation. */
24490 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24492 rtx label1 = gen_label_rtx ();
24493 rtx label2 = gen_label_rtx ();
24495 rtx tmp = gen_reg_rtx (XFmode);
24496 rtx tmp2 = gen_reg_rtx (XFmode);
24498 emit_insn (gen_absxf2 (tmp, op1));
24499 emit_insn (gen_cmpxf (tmp,
24500 CONST_DOUBLE_FROM_REAL_VALUE (
24501 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24503 emit_jump_insn (gen_bge (label1));
24505 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24506 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24507 emit_jump (label2);
24509 emit_label (label1);
24510 emit_move_insn (tmp, CONST1_RTX (XFmode));
24511 emit_insn (gen_addxf3 (tmp, op1, tmp));
24512 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24513 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24515 emit_label (label2);
24518 /* Output code to perform a Newton-Rhapson approximation of a single precision
24519 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24521 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24523 rtx x0, x1, e0, e1, two;
24525 x0 = gen_reg_rtx (mode);
24526 e0 = gen_reg_rtx (mode);
24527 e1 = gen_reg_rtx (mode);
24528 x1 = gen_reg_rtx (mode);
24530 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24532 if (VECTOR_MODE_P (mode))
24533 two = ix86_build_const_vector (SFmode, true, two);
24535 two = force_reg (mode, two);
24537 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24539 /* x0 = rcp(b) estimate */
24540 emit_insn (gen_rtx_SET (VOIDmode, x0,
24541 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24544 emit_insn (gen_rtx_SET (VOIDmode, e0,
24545 gen_rtx_MULT (mode, x0, b)));
24547 emit_insn (gen_rtx_SET (VOIDmode, e1,
24548 gen_rtx_MINUS (mode, two, e0)));
24550 emit_insn (gen_rtx_SET (VOIDmode, x1,
24551 gen_rtx_MULT (mode, x0, e1)));
24553 emit_insn (gen_rtx_SET (VOIDmode, res,
24554 gen_rtx_MULT (mode, a, x1)));
24557 /* Output code to perform a Newton-Rhapson approximation of a
24558 single precision floating point [reciprocal] square root. */
24560 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24563 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24566 x0 = gen_reg_rtx (mode);
24567 e0 = gen_reg_rtx (mode);
24568 e1 = gen_reg_rtx (mode);
24569 e2 = gen_reg_rtx (mode);
24570 e3 = gen_reg_rtx (mode);
24572 real_from_integer (&r, VOIDmode, -3, -1, 0);
24573 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24575 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24576 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24578 if (VECTOR_MODE_P (mode))
24580 mthree = ix86_build_const_vector (SFmode, true, mthree);
24581 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24584 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24585 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24587 /* x0 = rsqrt(a) estimate */
24588 emit_insn (gen_rtx_SET (VOIDmode, x0,
24589 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24592 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24597 zero = gen_reg_rtx (mode);
24598 mask = gen_reg_rtx (mode);
24600 zero = force_reg (mode, CONST0_RTX(mode));
24601 emit_insn (gen_rtx_SET (VOIDmode, mask,
24602 gen_rtx_NE (mode, zero, a)));
24604 emit_insn (gen_rtx_SET (VOIDmode, x0,
24605 gen_rtx_AND (mode, x0, mask)));
24609 emit_insn (gen_rtx_SET (VOIDmode, e0,
24610 gen_rtx_MULT (mode, x0, a)));
24612 emit_insn (gen_rtx_SET (VOIDmode, e1,
24613 gen_rtx_MULT (mode, e0, x0)));
24616 mthree = force_reg (mode, mthree);
24617 emit_insn (gen_rtx_SET (VOIDmode, e2,
24618 gen_rtx_PLUS (mode, e1, mthree)));
24620 mhalf = force_reg (mode, mhalf);
24622 /* e3 = -.5 * x0 */
24623 emit_insn (gen_rtx_SET (VOIDmode, e3,
24624 gen_rtx_MULT (mode, x0, mhalf)));
24626 /* e3 = -.5 * e0 */
24627 emit_insn (gen_rtx_SET (VOIDmode, e3,
24628 gen_rtx_MULT (mode, e0, mhalf)));
24629 /* ret = e2 * e3 */
24630 emit_insn (gen_rtx_SET (VOIDmode, res,
24631 gen_rtx_MULT (mode, e2, e3)));
24634 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24636 static void ATTRIBUTE_UNUSED
24637 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24640 /* With Binutils 2.15, the "@unwind" marker must be specified on
24641 every occurrence of the ".eh_frame" section, not just the first
24644 && strcmp (name, ".eh_frame") == 0)
24646 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24647 flags & SECTION_WRITE ? "aw" : "a");
24650 default_elf_asm_named_section (name, flags, decl);
24653 /* Return the mangling of TYPE if it is an extended fundamental type. */
24655 static const char *
24656 ix86_mangle_type (const_tree type)
24658 type = TYPE_MAIN_VARIANT (type);
24660 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24661 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24664 switch (TYPE_MODE (type))
24667 /* __float128 is "g". */
24670 /* "long double" or __float80 is "e". */
24677 /* For 32-bit code we can save PIC register setup by using
24678 __stack_chk_fail_local hidden function instead of calling
24679 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24680 register, so it is better to call __stack_chk_fail directly. */
24683 ix86_stack_protect_fail (void)
24685 return TARGET_64BIT
24686 ? default_external_stack_protect_fail ()
24687 : default_hidden_stack_protect_fail ();
24690 /* Select a format to encode pointers in exception handling data. CODE
24691 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24692 true if the symbol may be affected by dynamic relocations.
24694 ??? All x86 object file formats are capable of representing this.
24695 After all, the relocation needed is the same as for the call insn.
24696 Whether or not a particular assembler allows us to enter such, I
24697 guess we'll have to see. */
24699 asm_preferred_eh_data_format (int code, int global)
24703 int type = DW_EH_PE_sdata8;
24705 || ix86_cmodel == CM_SMALL_PIC
24706 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24707 type = DW_EH_PE_sdata4;
24708 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24710 if (ix86_cmodel == CM_SMALL
24711 || (ix86_cmodel == CM_MEDIUM && code))
24712 return DW_EH_PE_udata4;
24713 return DW_EH_PE_absptr;
24716 /* Expand copysign from SIGN to the positive value ABS_VALUE
24717 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24720 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24722 enum machine_mode mode = GET_MODE (sign);
24723 rtx sgn = gen_reg_rtx (mode);
24724 if (mask == NULL_RTX)
24726 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24727 if (!VECTOR_MODE_P (mode))
24729 /* We need to generate a scalar mode mask in this case. */
24730 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24731 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24732 mask = gen_reg_rtx (mode);
24733 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24737 mask = gen_rtx_NOT (mode, mask);
24738 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24739 gen_rtx_AND (mode, mask, sign)));
24740 emit_insn (gen_rtx_SET (VOIDmode, result,
24741 gen_rtx_IOR (mode, abs_value, sgn)));
24744 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24745 mask for masking out the sign-bit is stored in *SMASK, if that is
24748 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24750 enum machine_mode mode = GET_MODE (op0);
24753 xa = gen_reg_rtx (mode);
24754 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24755 if (!VECTOR_MODE_P (mode))
24757 /* We need to generate a scalar mode mask in this case. */
24758 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24759 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24760 mask = gen_reg_rtx (mode);
24761 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24763 emit_insn (gen_rtx_SET (VOIDmode, xa,
24764 gen_rtx_AND (mode, op0, mask)));
24772 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24773 swapping the operands if SWAP_OPERANDS is true. The expanded
24774 code is a forward jump to a newly created label in case the
24775 comparison is true. The generated label rtx is returned. */
24777 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24778 bool swap_operands)
24789 label = gen_label_rtx ();
24790 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24791 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24792 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24793 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24794 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24795 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24796 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24797 JUMP_LABEL (tmp) = label;
24802 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24803 using comparison code CODE. Operands are swapped for the comparison if
24804 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24806 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24807 bool swap_operands)
24809 enum machine_mode mode = GET_MODE (op0);
24810 rtx mask = gen_reg_rtx (mode);
24819 if (mode == DFmode)
24820 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24821 gen_rtx_fmt_ee (code, mode, op0, op1)));
24823 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24824 gen_rtx_fmt_ee (code, mode, op0, op1)));
24829 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24830 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24832 ix86_gen_TWO52 (enum machine_mode mode)
24834 REAL_VALUE_TYPE TWO52r;
24837 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24838 TWO52 = const_double_from_real_value (TWO52r, mode);
24839 TWO52 = force_reg (mode, TWO52);
24844 /* Expand SSE sequence for computing lround from OP1 storing
24847 ix86_expand_lround (rtx op0, rtx op1)
24849 /* C code for the stuff we're doing below:
24850 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24853 enum machine_mode mode = GET_MODE (op1);
24854 const struct real_format *fmt;
24855 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24858 /* load nextafter (0.5, 0.0) */
24859 fmt = REAL_MODE_FORMAT (mode);
24860 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24861 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24863 /* adj = copysign (0.5, op1) */
24864 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24865 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24867 /* adj = op1 + adj */
24868 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24870 /* op0 = (imode)adj */
24871 expand_fix (op0, adj, 0);
24874 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24877 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24879 /* C code for the stuff we're doing below (for do_floor):
24881 xi -= (double)xi > op1 ? 1 : 0;
24884 enum machine_mode fmode = GET_MODE (op1);
24885 enum machine_mode imode = GET_MODE (op0);
24886 rtx ireg, freg, label, tmp;
24888 /* reg = (long)op1 */
24889 ireg = gen_reg_rtx (imode);
24890 expand_fix (ireg, op1, 0);
24892 /* freg = (double)reg */
24893 freg = gen_reg_rtx (fmode);
24894 expand_float (freg, ireg, 0);
24896 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24897 label = ix86_expand_sse_compare_and_jump (UNLE,
24898 freg, op1, !do_floor);
24899 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24900 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24901 emit_move_insn (ireg, tmp);
24903 emit_label (label);
24904 LABEL_NUSES (label) = 1;
24906 emit_move_insn (op0, ireg);
24909 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24910 result in OPERAND0. */
24912 ix86_expand_rint (rtx operand0, rtx operand1)
24914 /* C code for the stuff we're doing below:
24915 xa = fabs (operand1);
24916 if (!isless (xa, 2**52))
24918 xa = xa + 2**52 - 2**52;
24919 return copysign (xa, operand1);
24921 enum machine_mode mode = GET_MODE (operand0);
24922 rtx res, xa, label, TWO52, mask;
24924 res = gen_reg_rtx (mode);
24925 emit_move_insn (res, operand1);
24927 /* xa = abs (operand1) */
24928 xa = ix86_expand_sse_fabs (res, &mask);
24930 /* if (!isless (xa, TWO52)) goto label; */
24931 TWO52 = ix86_gen_TWO52 (mode);
24932 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24934 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24935 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24937 ix86_sse_copysign_to_positive (res, xa, res, mask);
24939 emit_label (label);
24940 LABEL_NUSES (label) = 1;
24942 emit_move_insn (operand0, res);
24945 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24948 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24950 /* C code for the stuff we expand below.
24951 double xa = fabs (x), x2;
24952 if (!isless (xa, TWO52))
24954 xa = xa + TWO52 - TWO52;
24955 x2 = copysign (xa, x);
24964 enum machine_mode mode = GET_MODE (operand0);
24965 rtx xa, TWO52, tmp, label, one, res, mask;
24967 TWO52 = ix86_gen_TWO52 (mode);
24969 /* Temporary for holding the result, initialized to the input
24970 operand to ease control flow. */
24971 res = gen_reg_rtx (mode);
24972 emit_move_insn (res, operand1);
24974 /* xa = abs (operand1) */
24975 xa = ix86_expand_sse_fabs (res, &mask);
24977 /* if (!isless (xa, TWO52)) goto label; */
24978 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24980 /* xa = xa + TWO52 - TWO52; */
24981 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24982 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24984 /* xa = copysign (xa, operand1) */
24985 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24987 /* generate 1.0 or -1.0 */
24988 one = force_reg (mode,
24989 const_double_from_real_value (do_floor
24990 ? dconst1 : dconstm1, mode));
24992 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24993 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24994 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24995 gen_rtx_AND (mode, one, tmp)));
24996 /* We always need to subtract here to preserve signed zero. */
24997 tmp = expand_simple_binop (mode, MINUS,
24998 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24999 emit_move_insn (res, tmp);
25001 emit_label (label);
25002 LABEL_NUSES (label) = 1;
25004 emit_move_insn (operand0, res);
25007 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25010 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25012 /* C code for the stuff we expand below.
25013 double xa = fabs (x), x2;
25014 if (!isless (xa, TWO52))
25016 x2 = (double)(long)x;
25023 if (HONOR_SIGNED_ZEROS (mode))
25024 return copysign (x2, x);
25027 enum machine_mode mode = GET_MODE (operand0);
25028 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25030 TWO52 = ix86_gen_TWO52 (mode);
25032 /* Temporary for holding the result, initialized to the input
25033 operand to ease control flow. */
25034 res = gen_reg_rtx (mode);
25035 emit_move_insn (res, operand1);
25037 /* xa = abs (operand1) */
25038 xa = ix86_expand_sse_fabs (res, &mask);
25040 /* if (!isless (xa, TWO52)) goto label; */
25041 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25043 /* xa = (double)(long)x */
25044 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25045 expand_fix (xi, res, 0);
25046 expand_float (xa, xi, 0);
25049 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25051 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25052 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25053 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25054 gen_rtx_AND (mode, one, tmp)));
25055 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25056 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25057 emit_move_insn (res, tmp);
25059 if (HONOR_SIGNED_ZEROS (mode))
25060 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25062 emit_label (label);
25063 LABEL_NUSES (label) = 1;
25065 emit_move_insn (operand0, res);
25068 /* Expand SSE sequence for computing round from OPERAND1 storing
25069 into OPERAND0. Sequence that works without relying on DImode truncation
25070 via cvttsd2siq that is only available on 64bit targets. */
25072 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25074 /* C code for the stuff we expand below.
25075 double xa = fabs (x), xa2, x2;
25076 if (!isless (xa, TWO52))
25078 Using the absolute value and copying back sign makes
25079 -0.0 -> -0.0 correct.
25080 xa2 = xa + TWO52 - TWO52;
25085 else if (dxa > 0.5)
25087 x2 = copysign (xa2, x);
25090 enum machine_mode mode = GET_MODE (operand0);
25091 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25093 TWO52 = ix86_gen_TWO52 (mode);
25095 /* Temporary for holding the result, initialized to the input
25096 operand to ease control flow. */
25097 res = gen_reg_rtx (mode);
25098 emit_move_insn (res, operand1);
25100 /* xa = abs (operand1) */
25101 xa = ix86_expand_sse_fabs (res, &mask);
25103 /* if (!isless (xa, TWO52)) goto label; */
25104 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25106 /* xa2 = xa + TWO52 - TWO52; */
25107 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25108 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25110 /* dxa = xa2 - xa; */
25111 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25113 /* generate 0.5, 1.0 and -0.5 */
25114 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25115 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25116 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25120 tmp = gen_reg_rtx (mode);
25121 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25122 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25123 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25124 gen_rtx_AND (mode, one, tmp)));
25125 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25126 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25127 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25128 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25129 gen_rtx_AND (mode, one, tmp)));
25130 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25132 /* res = copysign (xa2, operand1) */
25133 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25135 emit_label (label);
25136 LABEL_NUSES (label) = 1;
25138 emit_move_insn (operand0, res);
25141 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25144 ix86_expand_trunc (rtx operand0, rtx operand1)
25146 /* C code for SSE variant we expand below.
25147 double xa = fabs (x), x2;
25148 if (!isless (xa, TWO52))
25150 x2 = (double)(long)x;
25151 if (HONOR_SIGNED_ZEROS (mode))
25152 return copysign (x2, x);
25155 enum machine_mode mode = GET_MODE (operand0);
25156 rtx xa, xi, TWO52, label, res, mask;
25158 TWO52 = ix86_gen_TWO52 (mode);
25160 /* Temporary for holding the result, initialized to the input
25161 operand to ease control flow. */
25162 res = gen_reg_rtx (mode);
25163 emit_move_insn (res, operand1);
25165 /* xa = abs (operand1) */
25166 xa = ix86_expand_sse_fabs (res, &mask);
25168 /* if (!isless (xa, TWO52)) goto label; */
25169 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25171 /* x = (double)(long)x */
25172 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25173 expand_fix (xi, res, 0);
25174 expand_float (res, xi, 0);
25176 if (HONOR_SIGNED_ZEROS (mode))
25177 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25179 emit_label (label);
25180 LABEL_NUSES (label) = 1;
25182 emit_move_insn (operand0, res);
25185 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25188 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25190 enum machine_mode mode = GET_MODE (operand0);
25191 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25193 /* C code for SSE variant we expand below.
25194 double xa = fabs (x), x2;
25195 if (!isless (xa, TWO52))
25197 xa2 = xa + TWO52 - TWO52;
25201 x2 = copysign (xa2, x);
25205 TWO52 = ix86_gen_TWO52 (mode);
25207 /* Temporary for holding the result, initialized to the input
25208 operand to ease control flow. */
25209 res = gen_reg_rtx (mode);
25210 emit_move_insn (res, operand1);
25212 /* xa = abs (operand1) */
25213 xa = ix86_expand_sse_fabs (res, &smask);
25215 /* if (!isless (xa, TWO52)) goto label; */
25216 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25218 /* res = xa + TWO52 - TWO52; */
25219 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25220 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25221 emit_move_insn (res, tmp);
25224 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25226 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25227 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25228 emit_insn (gen_rtx_SET (VOIDmode, mask,
25229 gen_rtx_AND (mode, mask, one)));
25230 tmp = expand_simple_binop (mode, MINUS,
25231 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25232 emit_move_insn (res, tmp);
25234 /* res = copysign (res, operand1) */
25235 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25237 emit_label (label);
25238 LABEL_NUSES (label) = 1;
25240 emit_move_insn (operand0, res);
25243 /* Expand SSE sequence for computing round from OPERAND1 storing
25246 ix86_expand_round (rtx operand0, rtx operand1)
25248 /* C code for the stuff we're doing below:
25249 double xa = fabs (x);
25250 if (!isless (xa, TWO52))
25252 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25253 return copysign (xa, x);
25255 enum machine_mode mode = GET_MODE (operand0);
25256 rtx res, TWO52, xa, label, xi, half, mask;
25257 const struct real_format *fmt;
25258 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25260 /* Temporary for holding the result, initialized to the input
25261 operand to ease control flow. */
25262 res = gen_reg_rtx (mode);
25263 emit_move_insn (res, operand1);
25265 TWO52 = ix86_gen_TWO52 (mode);
25266 xa = ix86_expand_sse_fabs (res, &mask);
25267 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25269 /* load nextafter (0.5, 0.0) */
25270 fmt = REAL_MODE_FORMAT (mode);
25271 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25272 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25274 /* xa = xa + 0.5 */
25275 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25276 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25278 /* xa = (double)(int64_t)xa */
25279 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25280 expand_fix (xi, xa, 0);
25281 expand_float (xa, xi, 0);
25283 /* res = copysign (xa, operand1) */
25284 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25286 emit_label (label);
25287 LABEL_NUSES (label) = 1;
25289 emit_move_insn (operand0, res);
25293 /* Validate whether a SSE5 instruction is valid or not.
25294 OPERANDS is the array of operands.
25295 NUM is the number of operands.
25296 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25297 NUM_MEMORY is the maximum number of memory operands to accept. */
25299 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
25305 /* Count the number of memory arguments */
25308 for (i = 0; i < num; i++)
25310 enum machine_mode mode = GET_MODE (operands[i]);
25311 if (register_operand (operands[i], mode))
25314 else if (memory_operand (operands[i], mode))
25316 mem_mask |= (1 << i);
25322 rtx pattern = PATTERN (insn);
25324 /* allow 0 for pcmov */
25325 if (GET_CODE (pattern) != SET
25326 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25328 || operands[i] != CONST0_RTX (mode))
25333 /* If there were no memory operations, allow the insn */
25337 /* Do not allow the destination register to be a memory operand. */
25338 else if (mem_mask & (1 << 0))
25341 /* If there are too many memory operations, disallow the instruction. While
25342 the hardware only allows 1 memory reference, before register allocation
25343 for some insns, we allow two memory operations sometimes in order to allow
25344 code like the following to be optimized:
25346 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25348 or similar cases that are vectorized into using the fmaddss
25350 else if (mem_count > num_memory)
25353 /* Don't allow more than one memory operation if not optimizing. */
25354 else if (mem_count > 1 && !optimize)
25357 else if (num == 4 && mem_count == 1)
25359 /* formats (destination is the first argument), example fmaddss:
25360 xmm1, xmm1, xmm2, xmm3/mem
25361 xmm1, xmm1, xmm2/mem, xmm3
25362 xmm1, xmm2, xmm3/mem, xmm1
25363 xmm1, xmm2/mem, xmm3, xmm1 */
25365 return ((mem_mask == (1 << 1))
25366 || (mem_mask == (1 << 2))
25367 || (mem_mask == (1 << 3)));
25369 /* format, example pmacsdd:
25370 xmm1, xmm2, xmm3/mem, xmm1 */
25372 return (mem_mask == (1 << 2));
25375 else if (num == 4 && num_memory == 2)
25377 /* If there are two memory operations, we can load one of the memory ops
25378 into the destination register. This is for optimizing the
25379 multiply/add ops, which the combiner has optimized both the multiply
25380 and the add insns to have a memory operation. We have to be careful
25381 that the destination doesn't overlap with the inputs. */
25382 rtx op0 = operands[0];
25384 if (reg_mentioned_p (op0, operands[1])
25385 || reg_mentioned_p (op0, operands[2])
25386 || reg_mentioned_p (op0, operands[3]))
25389 /* formats (destination is the first argument), example fmaddss:
25390 xmm1, xmm1, xmm2, xmm3/mem
25391 xmm1, xmm1, xmm2/mem, xmm3
25392 xmm1, xmm2, xmm3/mem, xmm1
25393 xmm1, xmm2/mem, xmm3, xmm1
25395 For the oc0 case, we will load either operands[1] or operands[3] into
25396 operands[0], so any combination of 2 memory operands is ok. */
25400 /* format, example pmacsdd:
25401 xmm1, xmm2, xmm3/mem, xmm1
25403 For the integer multiply/add instructions be more restrictive and
25404 require operands[2] and operands[3] to be the memory operands. */
25406 return (mem_mask == ((1 << 2) | (1 << 3)));
25409 else if (num == 3 && num_memory == 1)
25411 /* formats, example protb:
25412 xmm1, xmm2, xmm3/mem
25413 xmm1, xmm2/mem, xmm3 */
25415 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25417 /* format, example comeq:
25418 xmm1, xmm2, xmm3/mem */
25420 return (mem_mask == (1 << 2));
25424 gcc_unreachable ();
25430 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25431 hardware will allow by using the destination register to load one of the
25432 memory operations. Presently this is used by the multiply/add routines to
25433 allow 2 memory references. */
25436 ix86_expand_sse5_multiple_memory (rtx operands[],
25438 enum machine_mode mode)
25440 rtx op0 = operands[0];
25442 || memory_operand (op0, mode)
25443 || reg_mentioned_p (op0, operands[1])
25444 || reg_mentioned_p (op0, operands[2])
25445 || reg_mentioned_p (op0, operands[3]))
25446 gcc_unreachable ();
25448 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25449 the destination register. */
25450 if (memory_operand (operands[1], mode))
25452 emit_move_insn (op0, operands[1]);
25455 else if (memory_operand (operands[3], mode))
25457 emit_move_insn (op0, operands[3]);
25461 gcc_unreachable ();
25467 /* Table of valid machine attributes. */
25468 static const struct attribute_spec ix86_attribute_table[] =
25470 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25471 /* Stdcall attribute says callee is responsible for popping arguments
25472 if they are not variable. */
25473 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25474 /* Fastcall attribute says callee is responsible for popping arguments
25475 if they are not variable. */
25476 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25477 /* Cdecl attribute says the callee is a normal C declaration */
25478 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25479 /* Regparm attribute specifies how many integer arguments are to be
25480 passed in registers. */
25481 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25482 /* Sseregparm attribute says we are using x86_64 calling conventions
25483 for FP arguments. */
25484 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25485 /* force_align_arg_pointer says this function realigns the stack at entry. */
25486 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25487 false, true, true, ix86_handle_cconv_attribute },
25488 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25489 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25490 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25491 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25493 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25494 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25495 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25496 SUBTARGET_ATTRIBUTE_TABLE,
25498 { NULL, 0, 0, false, false, false, NULL }
25501 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25503 x86_builtin_vectorization_cost (bool runtime_test)
25505 /* If the branch of the runtime test is taken - i.e. - the vectorized
25506 version is skipped - this incurs a misprediction cost (because the
25507 vectorized version is expected to be the fall-through). So we subtract
25508 the latency of a mispredicted branch from the costs that are incured
25509 when the vectorized version is executed.
25511 TODO: The values in individual target tables have to be tuned or new
25512 fields may be needed. For eg. on K8, the default branch path is the
25513 not-taken path. If the taken path is predicted correctly, the minimum
25514 penalty of going down the taken-path is 1 cycle. If the taken-path is
25515 not predicted correctly, then the minimum penalty is 10 cycles. */
25519 return (-(ix86_cost->cond_taken_branch_cost));
25525 /* Initialize the GCC target structure. */
25526 #undef TARGET_ATTRIBUTE_TABLE
25527 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25528 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25529 # undef TARGET_MERGE_DECL_ATTRIBUTES
25530 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25533 #undef TARGET_COMP_TYPE_ATTRIBUTES
25534 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25536 #undef TARGET_INIT_BUILTINS
25537 #define TARGET_INIT_BUILTINS ix86_init_builtins
25538 #undef TARGET_EXPAND_BUILTIN
25539 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25541 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25542 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25543 ix86_builtin_vectorized_function
25545 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25546 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25548 #undef TARGET_BUILTIN_RECIPROCAL
25549 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25551 #undef TARGET_ASM_FUNCTION_EPILOGUE
25552 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25554 #undef TARGET_ENCODE_SECTION_INFO
25555 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25556 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25558 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25561 #undef TARGET_ASM_OPEN_PAREN
25562 #define TARGET_ASM_OPEN_PAREN ""
25563 #undef TARGET_ASM_CLOSE_PAREN
25564 #define TARGET_ASM_CLOSE_PAREN ""
25566 #undef TARGET_ASM_ALIGNED_HI_OP
25567 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25568 #undef TARGET_ASM_ALIGNED_SI_OP
25569 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25571 #undef TARGET_ASM_ALIGNED_DI_OP
25572 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25575 #undef TARGET_ASM_UNALIGNED_HI_OP
25576 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25577 #undef TARGET_ASM_UNALIGNED_SI_OP
25578 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25579 #undef TARGET_ASM_UNALIGNED_DI_OP
25580 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25582 #undef TARGET_SCHED_ADJUST_COST
25583 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25584 #undef TARGET_SCHED_ISSUE_RATE
25585 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25586 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25587 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25588 ia32_multipass_dfa_lookahead
25590 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25591 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25594 #undef TARGET_HAVE_TLS
25595 #define TARGET_HAVE_TLS true
25597 #undef TARGET_CANNOT_FORCE_CONST_MEM
25598 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25599 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25600 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25602 #undef TARGET_DELEGITIMIZE_ADDRESS
25603 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25605 #undef TARGET_MS_BITFIELD_LAYOUT_P
25606 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25609 #undef TARGET_BINDS_LOCAL_P
25610 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25612 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25613 #undef TARGET_BINDS_LOCAL_P
25614 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25617 #undef TARGET_ASM_OUTPUT_MI_THUNK
25618 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25619 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25620 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25622 #undef TARGET_ASM_FILE_START
25623 #define TARGET_ASM_FILE_START x86_file_start
25625 #undef TARGET_DEFAULT_TARGET_FLAGS
25626 #define TARGET_DEFAULT_TARGET_FLAGS \
25628 | TARGET_SUBTARGET_DEFAULT \
25629 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25631 #undef TARGET_HANDLE_OPTION
25632 #define TARGET_HANDLE_OPTION ix86_handle_option
25634 #undef TARGET_RTX_COSTS
25635 #define TARGET_RTX_COSTS ix86_rtx_costs
25636 #undef TARGET_ADDRESS_COST
25637 #define TARGET_ADDRESS_COST ix86_address_cost
25639 #undef TARGET_FIXED_CONDITION_CODE_REGS
25640 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25641 #undef TARGET_CC_MODES_COMPATIBLE
25642 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25644 #undef TARGET_MACHINE_DEPENDENT_REORG
25645 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25647 #undef TARGET_BUILD_BUILTIN_VA_LIST
25648 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25650 #undef TARGET_EXPAND_BUILTIN_VA_START
25651 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25653 #undef TARGET_MD_ASM_CLOBBERS
25654 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25656 #undef TARGET_PROMOTE_PROTOTYPES
25657 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25658 #undef TARGET_STRUCT_VALUE_RTX
25659 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25660 #undef TARGET_SETUP_INCOMING_VARARGS
25661 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25662 #undef TARGET_MUST_PASS_IN_STACK
25663 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25664 #undef TARGET_PASS_BY_REFERENCE
25665 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25666 #undef TARGET_INTERNAL_ARG_POINTER
25667 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25668 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25669 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25670 #undef TARGET_STRICT_ARGUMENT_NAMING
25671 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25673 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25674 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25676 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25677 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25679 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25680 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25682 #undef TARGET_C_MODE_FOR_SUFFIX
25683 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25686 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25687 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25690 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25691 #undef TARGET_INSERT_ATTRIBUTES
25692 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25695 #undef TARGET_MANGLE_TYPE
25696 #define TARGET_MANGLE_TYPE ix86_mangle_type
25698 #undef TARGET_STACK_PROTECT_FAIL
25699 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25701 #undef TARGET_FUNCTION_VALUE
25702 #define TARGET_FUNCTION_VALUE ix86_function_value
25704 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25705 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25707 struct gcc_target targetm = TARGET_INITIALIZER;
25709 #include "gt-i386.h"