1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1227 /* X86_TUNE_USE_BIT_TEST */
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1288 /* X86_TUNE_READ_MODIFY */
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1376 /* X86_TUNE_USE_FFREEP */
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1440 /* Feature tests against the various architecture variations. */
1441 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1442 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1443 ~(m_386 | m_486 | m_PENT | m_K6),
1445 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1448 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1451 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1454 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1458 static const unsigned int x86_accumulate_outgoing_args
1459 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1461 static const unsigned int x86_arch_always_fancy_math_387
1462 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1463 | m_NOCONA | m_CORE2 | m_GENERIC;
1465 static enum stringop_alg stringop_alg = no_stringop;
1467 /* In case the average insn count for single function invocation is
1468 lower than this constant, emit fast (but longer) prologue and
1470 #define FAST_PROLOGUE_INSN_COUNT 20
1472 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1473 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1474 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1475 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1477 /* Array of the smallest class containing reg number REGNO, indexed by
1478 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1480 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1482 /* ax, dx, cx, bx */
1483 AREG, DREG, CREG, BREG,
1484 /* si, di, bp, sp */
1485 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1487 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1488 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1491 /* flags, fpsr, fpcr, frame */
1492 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1494 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1497 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1502 /* SSE REX registers */
1503 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1507 /* The "default" register map used in 32bit mode. */
1509 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1511 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1512 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1513 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1514 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1515 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1517 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1520 static int const x86_64_int_parameter_registers[6] =
1522 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1523 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1526 static int const x86_64_ms_abi_int_parameter_registers[4] =
1528 2 /*RCX*/, 1 /*RDX*/,
1529 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1532 static int const x86_64_int_return_registers[4] =
1534 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1537 /* The "default" register map used in 64bit mode. */
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1614 /* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1621 /* Size of the register save area. */
1622 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1624 /* Define the structure for the machine field in struct function. */
1626 struct stack_local_entry GTY(())
1628 unsigned short mode;
1631 struct stack_local_entry *next;
1634 /* Structure describing stack frame layout.
1635 Stack grows downward:
1641 saved frame pointer if frame_pointer_needed
1642 <- HARD_FRAME_POINTER
1647 [va_arg registers] (
1648 > to_allocate <- FRAME_POINTER
1658 HOST_WIDE_INT frame;
1660 int outgoing_arguments_size;
1663 HOST_WIDE_INT to_allocate;
1664 /* The offsets relative to ARG_POINTER. */
1665 HOST_WIDE_INT frame_pointer_offset;
1666 HOST_WIDE_INT hard_frame_pointer_offset;
1667 HOST_WIDE_INT stack_pointer_offset;
1669 /* When save_regs_using_mov is set, emit prologue using
1670 move instead of push instructions. */
1671 bool save_regs_using_mov;
1674 /* Code model option. */
1675 enum cmodel ix86_cmodel;
1677 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1679 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1681 /* Which unit we are generating floating point math for. */
1682 enum fpmath_unit ix86_fpmath;
1684 /* Which cpu are we scheduling for. */
1685 enum processor_type ix86_tune;
1687 /* Which instruction set architecture to use. */
1688 enum processor_type ix86_arch;
1690 /* true if sse prefetch instruction is not NOOP. */
1691 int x86_prefetch_sse;
1693 /* ix86_regparm_string as a number */
1694 static int ix86_regparm;
1696 /* -mstackrealign option */
1697 extern int ix86_force_align_arg_pointer;
1698 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1700 /* Preferred alignment for stack boundary in bits. */
1701 unsigned int ix86_preferred_stack_boundary;
1703 /* Values 1-5: see jump.c */
1704 int ix86_branch_cost;
1706 /* Variables which are this size or smaller are put in the data/bss
1707 or ldata/lbss sections. */
1709 int ix86_section_threshold = 65536;
1711 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1712 char internal_label_prefix[16];
1713 int internal_label_prefix_len;
1715 /* Fence to use after loop using movnt. */
1718 /* Register class used for passing given 64bit part of the argument.
1719 These represent classes as documented by the PS ABI, with the exception
1720 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1721 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1723 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1724 whenever possible (upper half does contain padding). */
1725 enum x86_64_reg_class
1728 X86_64_INTEGER_CLASS,
1729 X86_64_INTEGERSI_CLASS,
1736 X86_64_COMPLEX_X87_CLASS,
1739 static const char * const x86_64_reg_class_name[] =
1741 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1742 "sseup", "x87", "x87up", "cplx87", "no"
1745 #define MAX_CLASSES 4
1747 /* Table of constants used by fldpi, fldln2, etc.... */
1748 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1749 static bool ext_80387_constants_init = 0;
1752 static struct machine_function * ix86_init_machine_status (void);
1753 static rtx ix86_function_value (const_tree, const_tree, bool);
1754 static int ix86_function_regparm (const_tree, const_tree);
1755 static void ix86_compute_frame_layout (struct ix86_frame *);
1756 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1760 /* The svr4 ABI for the i386 says that records and unions are returned
1762 #ifndef DEFAULT_PCC_STRUCT_RETURN
1763 #define DEFAULT_PCC_STRUCT_RETURN 1
1766 /* Bit flags that specify the ISA we are compiling for. */
1767 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1769 /* A mask of ix86_isa_flags that includes bit X if X
1770 was set or cleared on the command line. */
1771 static int ix86_isa_flags_explicit;
1773 /* Define a set of ISAs which are available when a given ISA is
1774 enabled. MMX and SSE ISAs are handled separately. */
1776 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1777 #define OPTION_MASK_ISA_3DNOW_SET \
1778 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1780 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1781 #define OPTION_MASK_ISA_SSE2_SET \
1782 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1783 #define OPTION_MASK_ISA_SSE3_SET \
1784 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1785 #define OPTION_MASK_ISA_SSSE3_SET \
1786 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1787 #define OPTION_MASK_ISA_SSE4_1_SET \
1788 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1789 #define OPTION_MASK_ISA_SSE4_2_SET \
1790 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1792 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1794 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1796 #define OPTION_MASK_ISA_SSE4A_SET \
1797 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1798 #define OPTION_MASK_ISA_SSE5_SET \
1799 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1801 /* Define a set of ISAs which aren't available when a given ISA is
1802 disabled. MMX and SSE ISAs are handled separately. */
1804 #define OPTION_MASK_ISA_MMX_UNSET \
1805 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1806 #define OPTION_MASK_ISA_3DNOW_UNSET \
1807 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1808 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1810 #define OPTION_MASK_ISA_SSE_UNSET \
1811 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1812 #define OPTION_MASK_ISA_SSE2_UNSET \
1813 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1814 #define OPTION_MASK_ISA_SSE3_UNSET \
1815 (OPTION_MASK_ISA_SSE3 \
1816 | OPTION_MASK_ISA_SSSE3_UNSET \
1817 | OPTION_MASK_ISA_SSE4A_UNSET )
1818 #define OPTION_MASK_ISA_SSSE3_UNSET \
1819 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1820 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1821 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1822 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1824 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1826 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1828 #define OPTION_MASK_ISA_SSE4A_UNSET \
1829 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1831 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1833 /* Vectorization library interface and handlers. */
1834 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1835 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1836 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1838 /* Implement TARGET_HANDLE_OPTION. */
1841 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1848 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1853 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1861 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1877 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1882 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1883 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1953 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1958 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1965 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1966 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1970 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1971 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1993 /* Sometimes certain combinations of command options do not make
1994 sense on a particular target machine. You can define a macro
1995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1996 defined, is executed once just after all the command options have
1999 Don't use this macro to turn on various extra optimizations for
2000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2003 override_options (void)
2006 int ix86_tune_defaulted = 0;
2007 int ix86_arch_specified = 0;
2008 unsigned int ix86_arch_mask, ix86_tune_mask;
2010 /* Comes from final.c -- no real reason to change it. */
2011 #define MAX_CODE_ALIGN 16
2015 const struct processor_costs *cost; /* Processor costs */
2016 const int align_loop; /* Default alignments. */
2017 const int align_loop_max_skip;
2018 const int align_jump;
2019 const int align_jump_max_skip;
2020 const int align_func;
2022 const processor_target_table[PROCESSOR_max] =
2024 {&i386_cost, 4, 3, 4, 3, 4},
2025 {&i486_cost, 16, 15, 16, 15, 16},
2026 {&pentium_cost, 16, 7, 16, 7, 16},
2027 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2028 {&geode_cost, 0, 0, 0, 0, 0},
2029 {&k6_cost, 32, 7, 32, 7, 32},
2030 {&athlon_cost, 16, 7, 16, 7, 16},
2031 {&pentium4_cost, 0, 0, 0, 0, 0},
2032 {&k8_cost, 16, 7, 16, 7, 16},
2033 {&nocona_cost, 0, 0, 0, 0, 0},
2034 {&core2_cost, 16, 10, 16, 10, 16},
2035 {&generic32_cost, 16, 7, 16, 7, 16},
2036 {&generic64_cost, 16, 10, 16, 10, 16},
2037 {&amdfam10_cost, 32, 24, 32, 7, 32}
2040 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2071 PTA_PREFETCH_SSE = 1 << 4,
2073 PTA_3DNOW_A = 1 << 6,
2077 PTA_POPCNT = 1 << 10,
2079 PTA_SSE4A = 1 << 12,
2080 PTA_NO_SAHF = 1 << 13,
2081 PTA_SSE4_1 = 1 << 14,
2082 PTA_SSE4_2 = 1 << 15,
2085 PTA_PCLMUL = 1 << 18
2090 const char *const name; /* processor name or nickname. */
2091 const enum processor_type processor;
2092 const unsigned /*enum pta_flags*/ flags;
2094 const processor_alias_table[] =
2096 {"i386", PROCESSOR_I386, 0},
2097 {"i486", PROCESSOR_I486, 0},
2098 {"i586", PROCESSOR_PENTIUM, 0},
2099 {"pentium", PROCESSOR_PENTIUM, 0},
2100 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2101 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2102 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2104 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2105 {"i686", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2107 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2108 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2110 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2111 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2112 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2113 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2114 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2115 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116 | PTA_CX16 | PTA_NO_SAHF)},
2117 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2118 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2121 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122 |PTA_PREFETCH_SSE)},
2123 {"k6", PROCESSOR_K6, PTA_MMX},
2124 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2126 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127 | PTA_PREFETCH_SSE)},
2128 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129 | PTA_PREFETCH_SSE)},
2130 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2132 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2134 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2136 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2137 | PTA_MMX | PTA_SSE | PTA_SSE2
2139 {"k8", PROCESSOR_K8, (PTA_64BIT
2140 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2141 | PTA_SSE | PTA_SSE2
2143 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2144 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2147 {"opteron", PROCESSOR_K8, (PTA_64BIT
2148 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 | PTA_SSE | PTA_SSE2
2151 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2152 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2155 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2156 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2157 | PTA_SSE | PTA_SSE2
2159 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2160 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2161 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2163 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2164 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2165 | PTA_SSE | PTA_SSE2
2167 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2168 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2171 | PTA_CX16 | PTA_ABM)},
2172 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2176 | PTA_CX16 | PTA_ABM)},
2177 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2178 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2181 int const pta_size = ARRAY_SIZE (processor_alias_table);
2183 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2184 SUBTARGET_OVERRIDE_OPTIONS;
2187 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2188 SUBSUBTARGET_OVERRIDE_OPTIONS;
2191 /* -fPIC is the default for x86_64. */
2192 if (TARGET_MACHO && TARGET_64BIT)
2195 /* Set the default values for switches whose default depends on TARGET_64BIT
2196 in case they weren't overwritten by command line options. */
2199 /* Mach-O doesn't support omitting the frame pointer for now. */
2200 if (flag_omit_frame_pointer == 2)
2201 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2202 if (flag_asynchronous_unwind_tables == 2)
2203 flag_asynchronous_unwind_tables = 1;
2204 if (flag_pcc_struct_return == 2)
2205 flag_pcc_struct_return = 0;
2209 if (flag_omit_frame_pointer == 2)
2210 flag_omit_frame_pointer = 0;
2211 if (flag_asynchronous_unwind_tables == 2)
2212 flag_asynchronous_unwind_tables = 0;
2213 if (flag_pcc_struct_return == 2)
2214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2217 /* Need to check -mtune=generic first. */
2218 if (ix86_tune_string)
2220 if (!strcmp (ix86_tune_string, "generic")
2221 || !strcmp (ix86_tune_string, "i686")
2222 /* As special support for cross compilers we read -mtune=native
2223 as -mtune=generic. With native compilers we won't see the
2224 -mtune=native, as it was changed by the driver. */
2225 || !strcmp (ix86_tune_string, "native"))
2228 ix86_tune_string = "generic64";
2230 ix86_tune_string = "generic32";
2232 else if (!strncmp (ix86_tune_string, "generic", 7))
2233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2237 if (ix86_arch_string)
2238 ix86_tune_string = ix86_arch_string;
2239 if (!ix86_tune_string)
2241 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2242 ix86_tune_defaulted = 1;
2245 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2246 need to use a sensible tune option. */
2247 if (!strcmp (ix86_tune_string, "generic")
2248 || !strcmp (ix86_tune_string, "x86-64")
2249 || !strcmp (ix86_tune_string, "i686"))
2252 ix86_tune_string = "generic64";
2254 ix86_tune_string = "generic32";
2257 if (ix86_stringop_string)
2259 if (!strcmp (ix86_stringop_string, "rep_byte"))
2260 stringop_alg = rep_prefix_1_byte;
2261 else if (!strcmp (ix86_stringop_string, "libcall"))
2262 stringop_alg = libcall;
2263 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2264 stringop_alg = rep_prefix_4_byte;
2265 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2266 stringop_alg = rep_prefix_8_byte;
2267 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2268 stringop_alg = loop_1_byte;
2269 else if (!strcmp (ix86_stringop_string, "loop"))
2270 stringop_alg = loop;
2271 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2272 stringop_alg = unrolled_loop;
2274 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2276 if (!strcmp (ix86_tune_string, "x86-64"))
2277 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2278 "-mtune=generic instead as appropriate.");
2280 if (!ix86_arch_string)
2281 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2283 ix86_arch_specified = 1;
2285 if (!strcmp (ix86_arch_string, "generic"))
2286 error ("generic CPU can be used only for -mtune= switch");
2287 if (!strncmp (ix86_arch_string, "generic", 7))
2288 error ("bad value (%s) for -march= switch", ix86_arch_string);
2290 if (ix86_cmodel_string != 0)
2292 if (!strcmp (ix86_cmodel_string, "small"))
2293 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2294 else if (!strcmp (ix86_cmodel_string, "medium"))
2295 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2296 else if (!strcmp (ix86_cmodel_string, "large"))
2297 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2299 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2300 else if (!strcmp (ix86_cmodel_string, "32"))
2301 ix86_cmodel = CM_32;
2302 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2303 ix86_cmodel = CM_KERNEL;
2305 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2309 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2310 use of rip-relative addressing. This eliminates fixups that
2311 would otherwise be needed if this object is to be placed in a
2312 DLL, and is essentially just as efficient as direct addressing. */
2313 if (TARGET_64BIT_MS_ABI)
2314 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2315 else if (TARGET_64BIT)
2316 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2318 ix86_cmodel = CM_32;
2320 if (ix86_asm_string != 0)
2323 && !strcmp (ix86_asm_string, "intel"))
2324 ix86_asm_dialect = ASM_INTEL;
2325 else if (!strcmp (ix86_asm_string, "att"))
2326 ix86_asm_dialect = ASM_ATT;
2328 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2330 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2331 error ("code model %qs not supported in the %s bit mode",
2332 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2333 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2334 sorry ("%i-bit mode not compiled in",
2335 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2337 for (i = 0; i < pta_size; i++)
2338 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2340 ix86_arch = processor_alias_table[i].processor;
2341 /* Default cpu tuning to the architecture. */
2342 ix86_tune = ix86_arch;
2344 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2345 error ("CPU you selected does not support x86-64 "
2348 if (processor_alias_table[i].flags & PTA_MMX
2349 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2350 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2351 if (processor_alias_table[i].flags & PTA_3DNOW
2352 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2353 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2354 if (processor_alias_table[i].flags & PTA_3DNOW_A
2355 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2356 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2357 if (processor_alias_table[i].flags & PTA_SSE
2358 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2360 if (processor_alias_table[i].flags & PTA_SSE2
2361 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2362 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2363 if (processor_alias_table[i].flags & PTA_SSE3
2364 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2366 if (processor_alias_table[i].flags & PTA_SSSE3
2367 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2368 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2369 if (processor_alias_table[i].flags & PTA_SSE4_1
2370 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2372 if (processor_alias_table[i].flags & PTA_SSE4_2
2373 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2374 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2375 if (processor_alias_table[i].flags & PTA_SSE4A
2376 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2377 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2378 if (processor_alias_table[i].flags & PTA_SSE5
2379 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2380 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2382 if (processor_alias_table[i].flags & PTA_ABM)
2384 if (processor_alias_table[i].flags & PTA_CX16)
2385 x86_cmpxchg16b = true;
2386 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2388 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2389 x86_prefetch_sse = true;
2390 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2392 if (processor_alias_table[i].flags & PTA_AES)
2394 if (processor_alias_table[i].flags & PTA_PCLMUL)
2401 error ("bad value (%s) for -march= switch", ix86_arch_string);
2403 ix86_arch_mask = 1u << ix86_arch;
2404 for (i = 0; i < X86_ARCH_LAST; ++i)
2405 ix86_arch_features[i] &= ix86_arch_mask;
2407 for (i = 0; i < pta_size; i++)
2408 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2410 ix86_tune = processor_alias_table[i].processor;
2411 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2413 if (ix86_tune_defaulted)
2415 ix86_tune_string = "x86-64";
2416 for (i = 0; i < pta_size; i++)
2417 if (! strcmp (ix86_tune_string,
2418 processor_alias_table[i].name))
2420 ix86_tune = processor_alias_table[i].processor;
2423 error ("CPU you selected does not support x86-64 "
2426 /* Intel CPUs have always interpreted SSE prefetch instructions as
2427 NOPs; so, we can enable SSE prefetch instructions even when
2428 -mtune (rather than -march) points us to a processor that has them.
2429 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2430 higher processors. */
2432 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2433 x86_prefetch_sse = true;
2437 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2439 /* Enable SSE2 if AES or PCLMUL is enabled. */
2440 if ((x86_aes || x86_pclmul)
2441 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2443 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2447 ix86_tune_mask = 1u << ix86_tune;
2448 for (i = 0; i < X86_TUNE_LAST; ++i)
2449 ix86_tune_features[i] &= ix86_tune_mask;
2452 ix86_cost = &size_cost;
2454 ix86_cost = processor_target_table[ix86_tune].cost;
2456 /* Arrange to set up i386_stack_locals for all functions. */
2457 init_machine_status = ix86_init_machine_status;
2459 /* Validate -mregparm= value. */
2460 if (ix86_regparm_string)
2463 warning (0, "-mregparm is ignored in 64-bit mode");
2464 i = atoi (ix86_regparm_string);
2465 if (i < 0 || i > REGPARM_MAX)
2466 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2471 ix86_regparm = REGPARM_MAX;
2473 /* If the user has provided any of the -malign-* options,
2474 warn and use that value only if -falign-* is not set.
2475 Remove this code in GCC 3.2 or later. */
2476 if (ix86_align_loops_string)
2478 warning (0, "-malign-loops is obsolete, use -falign-loops");
2479 if (align_loops == 0)
2481 i = atoi (ix86_align_loops_string);
2482 if (i < 0 || i > MAX_CODE_ALIGN)
2483 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2485 align_loops = 1 << i;
2489 if (ix86_align_jumps_string)
2491 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2492 if (align_jumps == 0)
2494 i = atoi (ix86_align_jumps_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2498 align_jumps = 1 << i;
2502 if (ix86_align_funcs_string)
2504 warning (0, "-malign-functions is obsolete, use -falign-functions");
2505 if (align_functions == 0)
2507 i = atoi (ix86_align_funcs_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2511 align_functions = 1 << i;
2515 /* Default align_* from the processor table. */
2516 if (align_loops == 0)
2518 align_loops = processor_target_table[ix86_tune].align_loop;
2519 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2521 if (align_jumps == 0)
2523 align_jumps = processor_target_table[ix86_tune].align_jump;
2524 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2526 if (align_functions == 0)
2528 align_functions = processor_target_table[ix86_tune].align_func;
2531 /* Validate -mbranch-cost= value, or provide default. */
2532 ix86_branch_cost = ix86_cost->branch_cost;
2533 if (ix86_branch_cost_string)
2535 i = atoi (ix86_branch_cost_string);
2537 error ("-mbranch-cost=%d is not between 0 and 5", i);
2539 ix86_branch_cost = i;
2541 if (ix86_section_threshold_string)
2543 i = atoi (ix86_section_threshold_string);
2545 error ("-mlarge-data-threshold=%d is negative", i);
2547 ix86_section_threshold = i;
2550 if (ix86_tls_dialect_string)
2552 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2553 ix86_tls_dialect = TLS_DIALECT_GNU;
2554 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2555 ix86_tls_dialect = TLS_DIALECT_GNU2;
2556 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2557 ix86_tls_dialect = TLS_DIALECT_SUN;
2559 error ("bad value (%s) for -mtls-dialect= switch",
2560 ix86_tls_dialect_string);
2563 if (ix87_precision_string)
2565 i = atoi (ix87_precision_string);
2566 if (i != 32 && i != 64 && i != 80)
2567 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2572 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2574 /* Enable by default the SSE and MMX builtins. Do allow the user to
2575 explicitly disable any of these. In particular, disabling SSE and
2576 MMX for kernel code is extremely useful. */
2577 if (!ix86_arch_specified)
2579 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2580 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2583 warning (0, "-mrtd is ignored in 64bit mode");
2587 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2589 if (!ix86_arch_specified)
2591 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2593 /* i386 ABI does not specify red zone. It still makes sense to use it
2594 when programmer takes care to stack from being destroyed. */
2595 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2596 target_flags |= MASK_NO_RED_ZONE;
2599 /* Keep nonleaf frame pointers. */
2600 if (flag_omit_frame_pointer)
2601 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2602 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2603 flag_omit_frame_pointer = 1;
2605 /* If we're doing fast math, we don't care about comparison order
2606 wrt NaNs. This lets us use a shorter comparison sequence. */
2607 if (flag_finite_math_only)
2608 target_flags &= ~MASK_IEEE_FP;
2610 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2611 since the insns won't need emulation. */
2612 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2613 target_flags &= ~MASK_NO_FANCY_MATH_387;
2615 /* Likewise, if the target doesn't have a 387, or we've specified
2616 software floating point, don't use 387 inline intrinsics. */
2618 target_flags |= MASK_NO_FANCY_MATH_387;
2620 /* Turn on MMX builtins for -msse. */
2623 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2624 x86_prefetch_sse = true;
2627 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2628 if (TARGET_SSE4_2 || TARGET_ABM)
2631 /* Validate -mpreferred-stack-boundary= value, or provide default.
2632 The default of 128 bits is for Pentium III's SSE __m128. We can't
2633 change it because of optimize_size. Otherwise, we can't mix object
2634 files compiled with -Os and -On. */
2635 ix86_preferred_stack_boundary = 128;
2636 if (ix86_preferred_stack_boundary_string)
2638 i = atoi (ix86_preferred_stack_boundary_string);
2639 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2640 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2641 TARGET_64BIT ? 4 : 2);
2643 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2646 /* Accept -msseregparm only if at least SSE support is enabled. */
2647 if (TARGET_SSEREGPARM
2649 error ("-msseregparm used without SSE enabled");
2651 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2652 if (ix86_fpmath_string != 0)
2654 if (! strcmp (ix86_fpmath_string, "387"))
2655 ix86_fpmath = FPMATH_387;
2656 else if (! strcmp (ix86_fpmath_string, "sse"))
2660 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2661 ix86_fpmath = FPMATH_387;
2664 ix86_fpmath = FPMATH_SSE;
2666 else if (! strcmp (ix86_fpmath_string, "387,sse")
2667 || ! strcmp (ix86_fpmath_string, "sse,387"))
2671 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2672 ix86_fpmath = FPMATH_387;
2674 else if (!TARGET_80387)
2676 warning (0, "387 instruction set disabled, using SSE arithmetics");
2677 ix86_fpmath = FPMATH_SSE;
2680 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2683 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2686 /* If the i387 is disabled, then do not return values in it. */
2688 target_flags &= ~MASK_FLOAT_RETURNS;
2690 /* Use external vectorized library in vectorizing intrinsics. */
2691 if (ix86_veclibabi_string)
2693 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2694 ix86_veclib_handler = ix86_veclibabi_svml;
2695 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2696 ix86_veclib_handler = ix86_veclibabi_acml;
2698 error ("unknown vectorization library ABI type (%s) for "
2699 "-mveclibabi= switch", ix86_veclibabi_string);
2702 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2703 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2705 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2707 /* ??? Unwind info is not correct around the CFG unless either a frame
2708 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2709 unwind info generation to be aware of the CFG and propagating states
2711 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2712 || flag_exceptions || flag_non_call_exceptions)
2713 && flag_omit_frame_pointer
2714 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2716 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 warning (0, "unwind tables currently require either a frame pointer "
2718 "or -maccumulate-outgoing-args for correctness");
2719 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2722 /* If stack probes are required, the space used for large function
2723 arguments on the stack must also be probed, so enable
2724 -maccumulate-outgoing-args so this happens in the prologue. */
2725 if (TARGET_STACK_PROBE
2726 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2728 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2729 warning (0, "stack probing requires -maccumulate-outgoing-args "
2731 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2734 /* For sane SSE instruction set generation we need fcomi instruction.
2735 It is safe to enable all CMOVE instructions. */
2739 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2742 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2743 p = strchr (internal_label_prefix, 'X');
2744 internal_label_prefix_len = p - internal_label_prefix;
2748 /* When scheduling description is not available, disable scheduler pass
2749 so it won't slow down the compilation and make x87 code slower. */
2750 if (!TARGET_SCHEDULE)
2751 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2753 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2754 set_param_value ("simultaneous-prefetches",
2755 ix86_cost->simultaneous_prefetches);
2756 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2757 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2758 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2759 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2760 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2761 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2763 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2764 can be optimized to ap = __builtin_next_arg (0). */
2765 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2766 targetm.expand_builtin_va_start = NULL;
2769 /* Return true if this goes in large data/bss. */
2772 ix86_in_large_data_p (tree exp)
2774 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2777 /* Functions are never large data. */
2778 if (TREE_CODE (exp) == FUNCTION_DECL)
2781 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2783 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2784 if (strcmp (section, ".ldata") == 0
2785 || strcmp (section, ".lbss") == 0)
2791 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2793 /* If this is an incomplete type with size 0, then we can't put it
2794 in data because it might be too big when completed. */
2795 if (!size || size > ix86_section_threshold)
2802 /* Switch to the appropriate section for output of DECL.
2803 DECL is either a `VAR_DECL' node or a constant of some sort.
2804 RELOC indicates whether forming the initial value of DECL requires
2805 link-time relocations. */
2807 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2811 x86_64_elf_select_section (tree decl, int reloc,
2812 unsigned HOST_WIDE_INT align)
2814 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2815 && ix86_in_large_data_p (decl))
2817 const char *sname = NULL;
2818 unsigned int flags = SECTION_WRITE;
2819 switch (categorize_decl_for_section (decl, reloc))
2824 case SECCAT_DATA_REL:
2825 sname = ".ldata.rel";
2827 case SECCAT_DATA_REL_LOCAL:
2828 sname = ".ldata.rel.local";
2830 case SECCAT_DATA_REL_RO:
2831 sname = ".ldata.rel.ro";
2833 case SECCAT_DATA_REL_RO_LOCAL:
2834 sname = ".ldata.rel.ro.local";
2838 flags |= SECTION_BSS;
2841 case SECCAT_RODATA_MERGE_STR:
2842 case SECCAT_RODATA_MERGE_STR_INIT:
2843 case SECCAT_RODATA_MERGE_CONST:
2847 case SECCAT_SRODATA:
2854 /* We don't split these for medium model. Place them into
2855 default sections and hope for best. */
2857 case SECCAT_EMUTLS_VAR:
2858 case SECCAT_EMUTLS_TMPL:
2863 /* We might get called with string constants, but get_named_section
2864 doesn't like them as they are not DECLs. Also, we need to set
2865 flags in that case. */
2867 return get_section (sname, flags, NULL);
2868 return get_named_section (decl, sname, reloc);
2871 return default_elf_select_section (decl, reloc, align);
2874 /* Build up a unique section name, expressed as a
2875 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2876 RELOC indicates whether the initial value of EXP requires
2877 link-time relocations. */
2879 static void ATTRIBUTE_UNUSED
2880 x86_64_elf_unique_section (tree decl, int reloc)
2882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2883 && ix86_in_large_data_p (decl))
2885 const char *prefix = NULL;
2886 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2887 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2889 switch (categorize_decl_for_section (decl, reloc))
2892 case SECCAT_DATA_REL:
2893 case SECCAT_DATA_REL_LOCAL:
2894 case SECCAT_DATA_REL_RO:
2895 case SECCAT_DATA_REL_RO_LOCAL:
2896 prefix = one_only ? ".ld" : ".ldata";
2899 prefix = one_only ? ".lb" : ".lbss";
2902 case SECCAT_RODATA_MERGE_STR:
2903 case SECCAT_RODATA_MERGE_STR_INIT:
2904 case SECCAT_RODATA_MERGE_CONST:
2905 prefix = one_only ? ".lr" : ".lrodata";
2907 case SECCAT_SRODATA:
2914 /* We don't split these for medium model. Place them into
2915 default sections and hope for best. */
2917 case SECCAT_EMUTLS_VAR:
2918 prefix = targetm.emutls.var_section;
2920 case SECCAT_EMUTLS_TMPL:
2921 prefix = targetm.emutls.tmpl_section;
2926 const char *name, *linkonce;
2929 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2930 name = targetm.strip_name_encoding (name);
2932 /* If we're using one_only, then there needs to be a .gnu.linkonce
2933 prefix to the section name. */
2934 linkonce = one_only ? ".gnu.linkonce" : "";
2936 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2938 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2942 default_unique_section (decl, reloc);
2945 #ifdef COMMON_ASM_OP
2946 /* This says how to output assembler code to declare an
2947 uninitialized external linkage data object.
2949 For medium model x86-64 we need to use .largecomm opcode for
2952 x86_elf_aligned_common (FILE *file,
2953 const char *name, unsigned HOST_WIDE_INT size,
2956 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2957 && size > (unsigned int)ix86_section_threshold)
2958 fprintf (file, ".largecomm\t");
2960 fprintf (file, "%s", COMMON_ASM_OP);
2961 assemble_name (file, name);
2962 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2963 size, align / BITS_PER_UNIT);
2967 /* Utility function for targets to use in implementing
2968 ASM_OUTPUT_ALIGNED_BSS. */
2971 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2972 const char *name, unsigned HOST_WIDE_INT size,
2975 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2976 && size > (unsigned int)ix86_section_threshold)
2977 switch_to_section (get_named_section (decl, ".lbss", 0));
2979 switch_to_section (bss_section);
2980 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2981 #ifdef ASM_DECLARE_OBJECT_NAME
2982 last_assemble_variable_decl = decl;
2983 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2985 /* Standard thing is just output label for the object. */
2986 ASM_OUTPUT_LABEL (file, name);
2987 #endif /* ASM_DECLARE_OBJECT_NAME */
2988 ASM_OUTPUT_SKIP (file, size ? size : 1);
2992 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2994 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2995 make the problem with not enough registers even worse. */
2996 #ifdef INSN_SCHEDULING
2998 flag_schedule_insns = 0;
3002 /* The Darwin libraries never set errno, so we might as well
3003 avoid calling them when that's the only reason we would. */
3004 flag_errno_math = 0;
3006 /* The default values of these switches depend on the TARGET_64BIT
3007 that is not known at this moment. Mark these values with 2 and
3008 let user the to override these. In case there is no command line option
3009 specifying them, we will set the defaults in override_options. */
3011 flag_omit_frame_pointer = 2;
3012 flag_pcc_struct_return = 2;
3013 flag_asynchronous_unwind_tables = 2;
3014 flag_vect_cost_model = 1;
3015 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3016 SUBTARGET_OPTIMIZATION_OPTIONS;
3020 /* Decide whether we can make a sibling call to a function. DECL is the
3021 declaration of the function being targeted by the call and EXP is the
3022 CALL_EXPR representing the call. */
3025 ix86_function_ok_for_sibcall (tree decl, tree exp)
3030 /* If we are generating position-independent code, we cannot sibcall
3031 optimize any indirect call, or a direct call to a global function,
3032 as the PLT requires %ebx be live. */
3033 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3040 func = TREE_TYPE (CALL_EXPR_FN (exp));
3041 if (POINTER_TYPE_P (func))
3042 func = TREE_TYPE (func);
3045 /* Check that the return value locations are the same. Like
3046 if we are returning floats on the 80387 register stack, we cannot
3047 make a sibcall from a function that doesn't return a float to a
3048 function that does or, conversely, from a function that does return
3049 a float to a function that doesn't; the necessary stack adjustment
3050 would not be executed. This is also the place we notice
3051 differences in the return value ABI. Note that it is ok for one
3052 of the functions to have void return type as long as the return
3053 value of the other is passed in a register. */
3054 a = ix86_function_value (TREE_TYPE (exp), func, false);
3055 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3057 if (STACK_REG_P (a) || STACK_REG_P (b))
3059 if (!rtx_equal_p (a, b))
3062 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3064 else if (!rtx_equal_p (a, b))
3067 /* If this call is indirect, we'll need to be able to use a call-clobbered
3068 register for the address of the target function. Make sure that all
3069 such registers are not used for passing parameters. */
3070 if (!decl && !TARGET_64BIT)
3074 /* We're looking at the CALL_EXPR, we need the type of the function. */
3075 type = CALL_EXPR_FN (exp); /* pointer expression */
3076 type = TREE_TYPE (type); /* pointer type */
3077 type = TREE_TYPE (type); /* function type */
3079 if (ix86_function_regparm (type, NULL) >= 3)
3081 /* ??? Need to count the actual number of registers to be used,
3082 not the possible number of registers. Fix later. */
3087 /* Dllimport'd functions are also called indirectly. */
3088 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3089 && decl && DECL_DLLIMPORT_P (decl)
3090 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3093 /* If we forced aligned the stack, then sibcalling would unalign the
3094 stack, which may break the called function. */
3095 if (cfun->machine->force_align_arg_pointer)
3098 /* Otherwise okay. That also includes certain types of indirect calls. */
3102 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3103 calling convention attributes;
3104 arguments as in struct attribute_spec.handler. */
3107 ix86_handle_cconv_attribute (tree *node, tree name,
3109 int flags ATTRIBUTE_UNUSED,
3112 if (TREE_CODE (*node) != FUNCTION_TYPE
3113 && TREE_CODE (*node) != METHOD_TYPE
3114 && TREE_CODE (*node) != FIELD_DECL
3115 && TREE_CODE (*node) != TYPE_DECL)
3117 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3118 IDENTIFIER_POINTER (name));
3119 *no_add_attrs = true;
3123 /* Can combine regparm with all attributes but fastcall. */
3124 if (is_attribute_p ("regparm", name))
3128 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3130 error ("fastcall and regparm attributes are not compatible");
3133 cst = TREE_VALUE (args);
3134 if (TREE_CODE (cst) != INTEGER_CST)
3136 warning (OPT_Wattributes,
3137 "%qs attribute requires an integer constant argument",
3138 IDENTIFIER_POINTER (name));
3139 *no_add_attrs = true;
3141 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3143 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3144 IDENTIFIER_POINTER (name), REGPARM_MAX);
3145 *no_add_attrs = true;
3149 && lookup_attribute (ix86_force_align_arg_pointer_string,
3150 TYPE_ATTRIBUTES (*node))
3151 && compare_tree_int (cst, REGPARM_MAX-1))
3153 error ("%s functions limited to %d register parameters",
3154 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3162 /* Do not warn when emulating the MS ABI. */
3163 if (!TARGET_64BIT_MS_ABI)
3164 warning (OPT_Wattributes, "%qs attribute ignored",
3165 IDENTIFIER_POINTER (name));
3166 *no_add_attrs = true;
3170 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3171 if (is_attribute_p ("fastcall", name))
3173 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3175 error ("fastcall and cdecl attributes are not compatible");
3177 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3179 error ("fastcall and stdcall attributes are not compatible");
3181 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3183 error ("fastcall and regparm attributes are not compatible");
3187 /* Can combine stdcall with fastcall (redundant), regparm and
3189 else if (is_attribute_p ("stdcall", name))
3191 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3193 error ("stdcall and cdecl attributes are not compatible");
3195 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3197 error ("stdcall and fastcall attributes are not compatible");
3201 /* Can combine cdecl with regparm and sseregparm. */
3202 else if (is_attribute_p ("cdecl", name))
3204 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3206 error ("stdcall and cdecl attributes are not compatible");
3208 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3210 error ("fastcall and cdecl attributes are not compatible");
3214 /* Can combine sseregparm with all attributes. */
3219 /* Return 0 if the attributes for two types are incompatible, 1 if they
3220 are compatible, and 2 if they are nearly compatible (which causes a
3221 warning to be generated). */
3224 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3226 /* Check for mismatch of non-default calling convention. */
3227 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3229 if (TREE_CODE (type1) != FUNCTION_TYPE
3230 && TREE_CODE (type1) != METHOD_TYPE)
3233 /* Check for mismatched fastcall/regparm types. */
3234 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3235 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3236 || (ix86_function_regparm (type1, NULL)
3237 != ix86_function_regparm (type2, NULL)))
3240 /* Check for mismatched sseregparm types. */
3241 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3242 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3245 /* Check for mismatched return types (cdecl vs stdcall). */
3246 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3247 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3253 /* Return the regparm value for a function with the indicated TYPE and DECL.
3254 DECL may be NULL when calling function indirectly
3255 or considering a libcall. */
3258 ix86_function_regparm (const_tree type, const_tree decl)
3261 int regparm = ix86_regparm;
3263 static bool error_issued;
3268 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3272 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3274 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3276 /* We can't use regparm(3) for nested functions because
3277 these pass static chain pointer in %ecx register. */
3278 if (!error_issued && regparm == 3
3279 && decl_function_context (decl)
3280 && !DECL_NO_STATIC_CHAIN (decl))
3282 error ("nested functions are limited to 2 register parameters");
3283 error_issued = true;
3291 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3294 /* Use register calling convention for local functions when possible. */
3295 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3296 && flag_unit_at_a_time && !profile_flag)
3298 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3299 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3302 int local_regparm, globals = 0, regno;
3305 /* Make sure no regparm register is taken by a
3306 fixed register variable. */
3307 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3308 if (fixed_regs[local_regparm])
3311 /* We can't use regparm(3) for nested functions as these use
3312 static chain pointer in third argument. */
3313 if (local_regparm == 3
3314 && (decl_function_context (decl)
3315 || ix86_force_align_arg_pointer)
3316 && !DECL_NO_STATIC_CHAIN (decl))
3319 /* If the function realigns its stackpointer, the prologue will
3320 clobber %ecx. If we've already generated code for the callee,
3321 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3322 scanning the attributes for the self-realigning property. */
3323 f = DECL_STRUCT_FUNCTION (decl);
3324 if (local_regparm == 3
3325 && (f ? !!f->machine->force_align_arg_pointer
3326 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3327 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3330 /* Each fixed register usage increases register pressure,
3331 so less registers should be used for argument passing.
3332 This functionality can be overriden by an explicit
3334 for (regno = 0; regno <= DI_REG; regno++)
3335 if (fixed_regs[regno])
3339 = globals < local_regparm ? local_regparm - globals : 0;
3341 if (local_regparm > regparm)
3342 regparm = local_regparm;
3349 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3350 DFmode (2) arguments in SSE registers for a function with the
3351 indicated TYPE and DECL. DECL may be NULL when calling function
3352 indirectly or considering a libcall. Otherwise return 0. */
3355 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3357 gcc_assert (!TARGET_64BIT);
3359 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3360 by the sseregparm attribute. */
3361 if (TARGET_SSEREGPARM
3362 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3369 error ("Calling %qD with attribute sseregparm without "
3370 "SSE/SSE2 enabled", decl);
3372 error ("Calling %qT with attribute sseregparm without "
3373 "SSE/SSE2 enabled", type);
3381 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3382 (and DFmode for SSE2) arguments in SSE registers. */
3383 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3385 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3386 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3388 return TARGET_SSE2 ? 2 : 1;
3394 /* Return true if EAX is live at the start of the function. Used by
3395 ix86_expand_prologue to determine if we need special help before
3396 calling allocate_stack_worker. */
3399 ix86_eax_live_at_start_p (void)
3401 /* Cheat. Don't bother working forward from ix86_function_regparm
3402 to the function type to whether an actual argument is located in
3403 eax. Instead just look at cfg info, which is still close enough
3404 to correct at this point. This gives false positives for broken
3405 functions that might use uninitialized data that happens to be
3406 allocated in eax, but who cares? */
3407 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3410 /* Value is the number of bytes of arguments automatically
3411 popped when returning from a subroutine call.
3412 FUNDECL is the declaration node of the function (as a tree),
3413 FUNTYPE is the data type of the function (as a tree),
3414 or for a library call it is an identifier node for the subroutine name.
3415 SIZE is the number of bytes of arguments passed on the stack.
3417 On the 80386, the RTD insn may be used to pop them if the number
3418 of args is fixed, but if the number is variable then the caller
3419 must pop them all. RTD can't be used for library calls now
3420 because the library is compiled with the Unix compiler.
3421 Use of RTD is a selectable option, since it is incompatible with
3422 standard Unix calling sequences. If the option is not selected,
3423 the caller must always pop the args.
3425 The attribute stdcall is equivalent to RTD on a per module basis. */
3428 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3432 /* None of the 64-bit ABIs pop arguments. */
3436 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3438 /* Cdecl functions override -mrtd, and never pop the stack. */
3439 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3441 /* Stdcall and fastcall functions will pop the stack if not
3443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3444 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3447 if (rtd && ! stdarg_p (funtype))
3451 /* Lose any fake structure return argument if it is passed on the stack. */
3452 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3453 && !KEEP_AGGREGATE_RETURN_POINTER)
3455 int nregs = ix86_function_regparm (funtype, fundecl);
3457 return GET_MODE_SIZE (Pmode);
3463 /* Argument support functions. */
3465 /* Return true when register may be used to pass function parameters. */
3467 ix86_function_arg_regno_p (int regno)
3470 const int *parm_regs;
3475 return (regno < REGPARM_MAX
3476 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3478 return (regno < REGPARM_MAX
3479 || (TARGET_MMX && MMX_REGNO_P (regno)
3480 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3481 || (TARGET_SSE && SSE_REGNO_P (regno)
3482 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3487 if (SSE_REGNO_P (regno) && TARGET_SSE)
3492 if (TARGET_SSE && SSE_REGNO_P (regno)
3493 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3497 /* RAX is used as hidden argument to va_arg functions. */
3498 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3501 if (TARGET_64BIT_MS_ABI)
3502 parm_regs = x86_64_ms_abi_int_parameter_registers;
3504 parm_regs = x86_64_int_parameter_registers;
3505 for (i = 0; i < REGPARM_MAX; i++)
3506 if (regno == parm_regs[i])
3511 /* Return if we do not know how to pass TYPE solely in registers. */
3514 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3516 if (must_pass_in_stack_var_size_or_pad (mode, type))
3519 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3520 The layout_type routine is crafty and tries to trick us into passing
3521 currently unsupported vector types on the stack by using TImode. */
3522 return (!TARGET_64BIT && mode == TImode
3523 && type && TREE_CODE (type) != VECTOR_TYPE);
3526 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3527 for a call to a function whose data type is FNTYPE.
3528 For a library call, FNTYPE is 0. */
3531 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3532 tree fntype, /* tree ptr for function decl */
3533 rtx libname, /* SYMBOL_REF of library name or 0 */
3536 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3537 memset (cum, 0, sizeof (*cum));
3539 /* Set up the number of registers to use for passing arguments. */
3540 cum->nregs = ix86_regparm;
3542 cum->sse_nregs = SSE_REGPARM_MAX;
3544 cum->mmx_nregs = MMX_REGPARM_MAX;
3545 cum->warn_sse = true;
3546 cum->warn_mmx = true;
3548 /* Because type might mismatch in between caller and callee, we need to
3549 use actual type of function for local calls.
3550 FIXME: cgraph_analyze can be told to actually record if function uses
3551 va_start so for local functions maybe_vaarg can be made aggressive
3553 FIXME: once typesytem is fixed, we won't need this code anymore. */
3555 fntype = TREE_TYPE (fndecl);
3556 cum->maybe_vaarg = (fntype
3557 ? (!prototype_p (fntype) || stdarg_p (fntype))
3562 /* If there are variable arguments, then we won't pass anything
3563 in registers in 32-bit mode. */
3564 if (stdarg_p (fntype))
3574 /* Use ecx and edx registers if function has fastcall attribute,
3575 else look for regparm information. */
3578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3584 cum->nregs = ix86_function_regparm (fntype, fndecl);
3587 /* Set up the number of SSE registers used for passing SFmode
3588 and DFmode arguments. Warn for mismatching ABI. */
3589 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3593 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3594 But in the case of vector types, it is some vector mode.
3596 When we have only some of our vector isa extensions enabled, then there
3597 are some modes for which vector_mode_supported_p is false. For these
3598 modes, the generic vector support in gcc will choose some non-vector mode
3599 in order to implement the type. By computing the natural mode, we'll
3600 select the proper ABI location for the operand and not depend on whatever
3601 the middle-end decides to do with these vector types. */
3603 static enum machine_mode
3604 type_natural_mode (const_tree type)
3606 enum machine_mode mode = TYPE_MODE (type);
3608 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3610 HOST_WIDE_INT size = int_size_in_bytes (type);
3611 if ((size == 8 || size == 16)
3612 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3613 && TYPE_VECTOR_SUBPARTS (type) > 1)
3615 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3617 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3618 mode = MIN_MODE_VECTOR_FLOAT;
3620 mode = MIN_MODE_VECTOR_INT;
3622 /* Get the mode which has this inner mode and number of units. */
3623 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3624 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3625 && GET_MODE_INNER (mode) == innermode)
3635 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3636 this may not agree with the mode that the type system has chosen for the
3637 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3638 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3641 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3646 if (orig_mode != BLKmode)
3647 tmp = gen_rtx_REG (orig_mode, regno);
3650 tmp = gen_rtx_REG (mode, regno);
3651 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3652 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3658 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3659 of this code is to classify each 8bytes of incoming argument by the register
3660 class and assign registers accordingly. */
3662 /* Return the union class of CLASS1 and CLASS2.
3663 See the x86-64 PS ABI for details. */
3665 static enum x86_64_reg_class
3666 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3668 /* Rule #1: If both classes are equal, this is the resulting class. */
3669 if (class1 == class2)
3672 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3674 if (class1 == X86_64_NO_CLASS)
3676 if (class2 == X86_64_NO_CLASS)
3679 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3680 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3681 return X86_64_MEMORY_CLASS;
3683 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3684 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3685 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3686 return X86_64_INTEGERSI_CLASS;
3687 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3688 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3689 return X86_64_INTEGER_CLASS;
3691 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3693 if (class1 == X86_64_X87_CLASS
3694 || class1 == X86_64_X87UP_CLASS
3695 || class1 == X86_64_COMPLEX_X87_CLASS
3696 || class2 == X86_64_X87_CLASS
3697 || class2 == X86_64_X87UP_CLASS
3698 || class2 == X86_64_COMPLEX_X87_CLASS)
3699 return X86_64_MEMORY_CLASS;
3701 /* Rule #6: Otherwise class SSE is used. */
3702 return X86_64_SSE_CLASS;
3705 /* Classify the argument of type TYPE and mode MODE.
3706 CLASSES will be filled by the register class used to pass each word
3707 of the operand. The number of words is returned. In case the parameter
3708 should be passed in memory, 0 is returned. As a special case for zero
3709 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3711 BIT_OFFSET is used internally for handling records and specifies offset
3712 of the offset in bits modulo 256 to avoid overflow cases.
3714 See the x86-64 PS ABI for details.
3718 classify_argument (enum machine_mode mode, const_tree type,
3719 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3721 HOST_WIDE_INT bytes =
3722 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3723 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3725 /* Variable sized entities are always passed/returned in memory. */
3729 if (mode != VOIDmode
3730 && targetm.calls.must_pass_in_stack (mode, type))
3733 if (type && AGGREGATE_TYPE_P (type))
3737 enum x86_64_reg_class subclasses[MAX_CLASSES];
3739 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3743 for (i = 0; i < words; i++)
3744 classes[i] = X86_64_NO_CLASS;
3746 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3747 signalize memory class, so handle it as special case. */
3750 classes[0] = X86_64_NO_CLASS;
3754 /* Classify each field of record and merge classes. */
3755 switch (TREE_CODE (type))
3758 /* And now merge the fields of structure. */
3759 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3761 if (TREE_CODE (field) == FIELD_DECL)
3765 if (TREE_TYPE (field) == error_mark_node)
3768 /* Bitfields are always classified as integer. Handle them
3769 early, since later code would consider them to be
3770 misaligned integers. */
3771 if (DECL_BIT_FIELD (field))
3773 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3774 i < ((int_bit_position (field) + (bit_offset % 64))
3775 + tree_low_cst (DECL_SIZE (field), 0)
3778 merge_classes (X86_64_INTEGER_CLASS,
3783 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3784 TREE_TYPE (field), subclasses,
3785 (int_bit_position (field)
3786 + bit_offset) % 256);
3789 for (i = 0; i < num; i++)
3792 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3794 merge_classes (subclasses[i], classes[i + pos]);
3802 /* Arrays are handled as small records. */
3805 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3806 TREE_TYPE (type), subclasses, bit_offset);
3810 /* The partial classes are now full classes. */
3811 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3812 subclasses[0] = X86_64_SSE_CLASS;
3813 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3814 subclasses[0] = X86_64_INTEGER_CLASS;
3816 for (i = 0; i < words; i++)
3817 classes[i] = subclasses[i % num];
3822 case QUAL_UNION_TYPE:
3823 /* Unions are similar to RECORD_TYPE but offset is always 0.
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3827 if (TREE_CODE (field) == FIELD_DECL)
3831 if (TREE_TYPE (field) == error_mark_node)
3834 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3835 TREE_TYPE (field), subclasses,
3839 for (i = 0; i < num; i++)
3840 classes[i] = merge_classes (subclasses[i], classes[i]);
3849 /* Final merger cleanup. */
3850 for (i = 0; i < words; i++)
3852 /* If one class is MEMORY, everything should be passed in
3854 if (classes[i] == X86_64_MEMORY_CLASS)
3857 /* The X86_64_SSEUP_CLASS should be always preceded by
3858 X86_64_SSE_CLASS. */
3859 if (classes[i] == X86_64_SSEUP_CLASS
3860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3861 classes[i] = X86_64_SSE_CLASS;
3863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3864 if (classes[i] == X86_64_X87UP_CLASS
3865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3866 classes[i] = X86_64_SSE_CLASS;
3871 /* Compute alignment needed. We align all types to natural boundaries with
3872 exception of XFmode that is aligned to 64bits. */
3873 if (mode != VOIDmode && mode != BLKmode)
3875 int mode_alignment = GET_MODE_BITSIZE (mode);
3878 mode_alignment = 128;
3879 else if (mode == XCmode)
3880 mode_alignment = 256;
3881 if (COMPLEX_MODE_P (mode))
3882 mode_alignment /= 2;
3883 /* Misaligned fields are always returned in memory. */
3884 if (bit_offset % mode_alignment)
3888 /* for V1xx modes, just use the base mode */
3889 if (VECTOR_MODE_P (mode) && mode != V1DImode
3890 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3891 mode = GET_MODE_INNER (mode);
3893 /* Classification of atomic types. */
3898 classes[0] = X86_64_SSE_CLASS;
3901 classes[0] = X86_64_SSE_CLASS;
3902 classes[1] = X86_64_SSEUP_CLASS;
3911 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3912 classes[0] = X86_64_INTEGERSI_CLASS;
3914 classes[0] = X86_64_INTEGER_CLASS;
3918 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3923 if (!(bit_offset % 64))
3924 classes[0] = X86_64_SSESF_CLASS;
3926 classes[0] = X86_64_SSE_CLASS;
3929 classes[0] = X86_64_SSEDF_CLASS;
3932 classes[0] = X86_64_X87_CLASS;
3933 classes[1] = X86_64_X87UP_CLASS;
3936 classes[0] = X86_64_SSE_CLASS;
3937 classes[1] = X86_64_SSEUP_CLASS;
3940 classes[0] = X86_64_SSE_CLASS;
3943 classes[0] = X86_64_SSEDF_CLASS;
3944 classes[1] = X86_64_SSEDF_CLASS;
3947 classes[0] = X86_64_COMPLEX_X87_CLASS;
3950 /* This modes is larger than 16 bytes. */
3958 classes[0] = X86_64_SSE_CLASS;
3959 classes[1] = X86_64_SSEUP_CLASS;
3966 classes[0] = X86_64_SSE_CLASS;
3972 gcc_assert (VECTOR_MODE_P (mode));
3977 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3979 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3980 classes[0] = X86_64_INTEGERSI_CLASS;
3982 classes[0] = X86_64_INTEGER_CLASS;
3983 classes[1] = X86_64_INTEGER_CLASS;
3984 return 1 + (bytes > 8);
3988 /* Examine the argument and return set number of register required in each
3989 class. Return 0 iff parameter should be passed in memory. */
3991 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3992 int *int_nregs, int *sse_nregs)
3994 enum x86_64_reg_class regclass[MAX_CLASSES];
3995 int n = classify_argument (mode, type, regclass, 0);
4001 for (n--; n >= 0; n--)
4002 switch (regclass[n])
4004 case X86_64_INTEGER_CLASS:
4005 case X86_64_INTEGERSI_CLASS:
4008 case X86_64_SSE_CLASS:
4009 case X86_64_SSESF_CLASS:
4010 case X86_64_SSEDF_CLASS:
4013 case X86_64_NO_CLASS:
4014 case X86_64_SSEUP_CLASS:
4016 case X86_64_X87_CLASS:
4017 case X86_64_X87UP_CLASS:
4021 case X86_64_COMPLEX_X87_CLASS:
4022 return in_return ? 2 : 0;
4023 case X86_64_MEMORY_CLASS:
4029 /* Construct container for the argument used by GCC interface. See
4030 FUNCTION_ARG for the detailed description. */
4033 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4034 const_tree type, int in_return, int nintregs, int nsseregs,
4035 const int *intreg, int sse_regno)
4037 /* The following variables hold the static issued_error state. */
4038 static bool issued_sse_arg_error;
4039 static bool issued_sse_ret_error;
4040 static bool issued_x87_ret_error;
4042 enum machine_mode tmpmode;
4044 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4045 enum x86_64_reg_class regclass[MAX_CLASSES];
4049 int needed_sseregs, needed_intregs;
4050 rtx exp[MAX_CLASSES];
4053 n = classify_argument (mode, type, regclass, 0);
4056 if (!examine_argument (mode, type, in_return, &needed_intregs,
4059 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4062 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4063 some less clueful developer tries to use floating-point anyway. */
4064 if (needed_sseregs && !TARGET_SSE)
4068 if (!issued_sse_ret_error)
4070 error ("SSE register return with SSE disabled");
4071 issued_sse_ret_error = true;
4074 else if (!issued_sse_arg_error)
4076 error ("SSE register argument with SSE disabled");
4077 issued_sse_arg_error = true;
4082 /* Likewise, error if the ABI requires us to return values in the
4083 x87 registers and the user specified -mno-80387. */
4084 if (!TARGET_80387 && in_return)
4085 for (i = 0; i < n; i++)
4086 if (regclass[i] == X86_64_X87_CLASS
4087 || regclass[i] == X86_64_X87UP_CLASS
4088 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4090 if (!issued_x87_ret_error)
4092 error ("x87 register return with x87 disabled");
4093 issued_x87_ret_error = true;
4098 /* First construct simple cases. Avoid SCmode, since we want to use
4099 single register to pass this type. */
4100 if (n == 1 && mode != SCmode)
4101 switch (regclass[0])
4103 case X86_64_INTEGER_CLASS:
4104 case X86_64_INTEGERSI_CLASS:
4105 return gen_rtx_REG (mode, intreg[0]);
4106 case X86_64_SSE_CLASS:
4107 case X86_64_SSESF_CLASS:
4108 case X86_64_SSEDF_CLASS:
4109 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4110 case X86_64_X87_CLASS:
4111 case X86_64_COMPLEX_X87_CLASS:
4112 return gen_rtx_REG (mode, FIRST_STACK_REG);
4113 case X86_64_NO_CLASS:
4114 /* Zero sized array, struct or class. */
4119 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4120 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4121 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4124 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4125 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4126 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4127 && regclass[1] == X86_64_INTEGER_CLASS
4128 && (mode == CDImode || mode == TImode || mode == TFmode)
4129 && intreg[0] + 1 == intreg[1])
4130 return gen_rtx_REG (mode, intreg[0]);
4132 /* Otherwise figure out the entries of the PARALLEL. */
4133 for (i = 0; i < n; i++)
4135 switch (regclass[i])
4137 case X86_64_NO_CLASS:
4139 case X86_64_INTEGER_CLASS:
4140 case X86_64_INTEGERSI_CLASS:
4141 /* Merge TImodes on aligned occasions here too. */
4142 if (i * 8 + 8 > bytes)
4143 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4144 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4148 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4149 if (tmpmode == BLKmode)
4151 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4152 gen_rtx_REG (tmpmode, *intreg),
4156 case X86_64_SSESF_CLASS:
4157 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4158 gen_rtx_REG (SFmode,
4159 SSE_REGNO (sse_regno)),
4163 case X86_64_SSEDF_CLASS:
4164 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4165 gen_rtx_REG (DFmode,
4166 SSE_REGNO (sse_regno)),
4170 case X86_64_SSE_CLASS:
4171 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4175 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4176 gen_rtx_REG (tmpmode,
4177 SSE_REGNO (sse_regno)),
4179 if (tmpmode == TImode)
4188 /* Empty aligned struct, union or class. */
4192 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4193 for (i = 0; i < nexps; i++)
4194 XVECEXP (ret, 0, i) = exp [i];
4198 /* Update the data in CUM to advance over an argument of mode MODE
4199 and data type TYPE. (TYPE is null for libcalls where that information
4200 may not be available.) */
4203 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4204 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4220 cum->words += words;
4221 cum->nregs -= words;
4222 cum->regno += words;
4224 if (cum->nregs <= 0)
4232 if (cum->float_in_sse < 2)
4235 if (cum->float_in_sse < 1)
4246 if (!type || !AGGREGATE_TYPE_P (type))
4248 cum->sse_words += words;
4249 cum->sse_nregs -= 1;
4250 cum->sse_regno += 1;
4251 if (cum->sse_nregs <= 0)
4264 if (!type || !AGGREGATE_TYPE_P (type))
4266 cum->mmx_words += words;
4267 cum->mmx_nregs -= 1;
4268 cum->mmx_regno += 1;
4269 if (cum->mmx_nregs <= 0)
4280 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4281 tree type, HOST_WIDE_INT words)
4283 int int_nregs, sse_nregs;
4285 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4286 cum->words += words;
4287 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4289 cum->nregs -= int_nregs;
4290 cum->sse_nregs -= sse_nregs;
4291 cum->regno += int_nregs;
4292 cum->sse_regno += sse_nregs;
4295 cum->words += words;
4299 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4300 HOST_WIDE_INT words)
4302 /* Otherwise, this should be passed indirect. */
4303 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4305 cum->words += words;
4314 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4315 tree type, int named ATTRIBUTE_UNUSED)
4317 HOST_WIDE_INT bytes, words;
4319 if (mode == BLKmode)
4320 bytes = int_size_in_bytes (type);
4322 bytes = GET_MODE_SIZE (mode);
4323 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4326 mode = type_natural_mode (type);
4328 if (TARGET_64BIT_MS_ABI)
4329 function_arg_advance_ms_64 (cum, bytes, words);
4330 else if (TARGET_64BIT)
4331 function_arg_advance_64 (cum, mode, type, words);
4333 function_arg_advance_32 (cum, mode, type, bytes, words);
4336 /* Define where to put the arguments to a function.
4337 Value is zero to push the argument on the stack,
4338 or a hard register in which to store the argument.
4340 MODE is the argument's machine mode.
4341 TYPE is the data type of the argument (as a tree).
4342 This is null for libcalls where that information may
4344 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4345 the preceding args and about the function being called.
4346 NAMED is nonzero if this argument is a named parameter
4347 (otherwise it is an extra parameter matching an ellipsis). */
4350 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4351 enum machine_mode orig_mode, tree type,
4352 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4354 static bool warnedsse, warnedmmx;
4356 /* Avoid the AL settings for the Unix64 ABI. */
4357 if (mode == VOIDmode)
4373 if (words <= cum->nregs)
4375 int regno = cum->regno;
4377 /* Fastcall allocates the first two DWORD (SImode) or
4378 smaller arguments to ECX and EDX if it isn't an
4384 || (type && AGGREGATE_TYPE_P (type)))
4387 /* ECX not EAX is the first allocated register. */
4388 if (regno == AX_REG)
4391 return gen_rtx_REG (mode, regno);
4396 if (cum->float_in_sse < 2)
4399 if (cum->float_in_sse < 1)
4409 if (!type || !AGGREGATE_TYPE_P (type))
4411 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4414 warning (0, "SSE vector argument without SSE enabled "
4418 return gen_reg_or_parallel (mode, orig_mode,
4419 cum->sse_regno + FIRST_SSE_REG);
4428 if (!type || !AGGREGATE_TYPE_P (type))
4430 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4433 warning (0, "MMX vector argument without MMX enabled "
4437 return gen_reg_or_parallel (mode, orig_mode,
4438 cum->mmx_regno + FIRST_MMX_REG);
4447 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4448 enum machine_mode orig_mode, tree type)
4450 /* Handle a hidden AL argument containing number of registers
4451 for varargs x86-64 functions. */
4452 if (mode == VOIDmode)
4453 return GEN_INT (cum->maybe_vaarg
4454 ? (cum->sse_nregs < 0
4459 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4461 &x86_64_int_parameter_registers [cum->regno],
4466 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4467 enum machine_mode orig_mode, int named,
4468 HOST_WIDE_INT bytes)
4472 /* Avoid the AL settings for the Unix64 ABI. */
4473 if (mode == VOIDmode)
4476 /* If we've run out of registers, it goes on the stack. */
4477 if (cum->nregs == 0)
4480 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4482 /* Only floating point modes are passed in anything but integer regs. */
4483 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4486 regno = cum->regno + FIRST_SSE_REG;
4491 /* Unnamed floating parameters are passed in both the
4492 SSE and integer registers. */
4493 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4494 t2 = gen_rtx_REG (mode, regno);
4495 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4496 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4497 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4500 /* Handle aggregated types passed in register. */
4501 if (orig_mode == BLKmode)
4503 if (bytes > 0 && bytes <= 8)
4504 mode = (bytes > 4 ? DImode : SImode);
4505 if (mode == BLKmode)
4509 return gen_reg_or_parallel (mode, orig_mode, regno);
4513 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4514 tree type, int named)
4516 enum machine_mode mode = omode;
4517 HOST_WIDE_INT bytes, words;
4519 if (mode == BLKmode)
4520 bytes = int_size_in_bytes (type);
4522 bytes = GET_MODE_SIZE (mode);
4523 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4525 /* To simplify the code below, represent vector types with a vector mode
4526 even if MMX/SSE are not active. */
4527 if (type && TREE_CODE (type) == VECTOR_TYPE)
4528 mode = type_natural_mode (type);
4530 if (TARGET_64BIT_MS_ABI)
4531 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4532 else if (TARGET_64BIT)
4533 return function_arg_64 (cum, mode, omode, type);
4535 return function_arg_32 (cum, mode, omode, type, bytes, words);
4538 /* A C expression that indicates when an argument must be passed by
4539 reference. If nonzero for an argument, a copy of that argument is
4540 made in memory and a pointer to the argument is passed instead of
4541 the argument itself. The pointer is passed in whatever way is
4542 appropriate for passing a pointer to that type. */
4545 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4546 enum machine_mode mode ATTRIBUTE_UNUSED,
4547 const_tree type, bool named ATTRIBUTE_UNUSED)
4549 /* See Windows x64 Software Convention. */
4550 if (TARGET_64BIT_MS_ABI)
4552 int msize = (int) GET_MODE_SIZE (mode);
4555 /* Arrays are passed by reference. */
4556 if (TREE_CODE (type) == ARRAY_TYPE)
4559 if (AGGREGATE_TYPE_P (type))
4561 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4562 are passed by reference. */
4563 msize = int_size_in_bytes (type);
4567 /* __m128 is passed by reference. */
4569 case 1: case 2: case 4: case 8:
4575 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4581 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4584 contains_aligned_value_p (tree type)
4586 enum machine_mode mode = TYPE_MODE (type);
4587 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
4588 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4590 if (TYPE_ALIGN (type) < 128)
4593 if (AGGREGATE_TYPE_P (type))
4595 /* Walk the aggregates recursively. */
4596 switch (TREE_CODE (type))
4600 case QUAL_UNION_TYPE:
4604 /* Walk all the structure fields. */
4605 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4607 if (TREE_CODE (field) == FIELD_DECL
4608 && contains_aligned_value_p (TREE_TYPE (field)))
4615 /* Just for use if some languages passes arrays by value. */
4616 if (contains_aligned_value_p (TREE_TYPE (type)))
4627 /* Gives the alignment boundary, in bits, of an argument with the
4628 specified mode and type. */
4631 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4635 align = TYPE_ALIGN (type);
4637 align = GET_MODE_ALIGNMENT (mode);
4638 if (align < PARM_BOUNDARY)
4639 align = PARM_BOUNDARY;
4640 /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
4641 if (!TARGET_64BIT && mode != TDmode)
4643 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4644 make an exception for SSE modes since these require 128bit
4647 The handling here differs from field_alignment. ICC aligns MMX
4648 arguments to 4 byte boundaries, while structure fields are aligned
4649 to 8 byte boundaries. */
4652 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
4653 align = PARM_BOUNDARY;
4657 if (!contains_aligned_value_p (type))
4658 align = PARM_BOUNDARY;
4661 if (align > BIGGEST_ALIGNMENT)
4662 align = BIGGEST_ALIGNMENT;
4666 /* Return true if N is a possible register number of function value. */
4669 ix86_function_value_regno_p (int regno)
4676 case FIRST_FLOAT_REG:
4677 if (TARGET_64BIT_MS_ABI)
4679 return TARGET_FLOAT_RETURNS_IN_80387;
4685 if (TARGET_MACHO || TARGET_64BIT)
4693 /* Define how to find the value returned by a function.
4694 VALTYPE is the data type of the value (as a tree).
4695 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4696 otherwise, FUNC is 0. */
4699 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4700 const_tree fntype, const_tree fn)
4704 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4705 we normally prevent this case when mmx is not available. However
4706 some ABIs may require the result to be returned like DImode. */
4707 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4708 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4710 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4711 we prevent this case when sse is not available. However some ABIs
4712 may require the result to be returned like integer TImode. */
4713 else if (mode == TImode
4714 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4715 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4717 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4718 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4719 regno = FIRST_FLOAT_REG;
4721 /* Most things go in %eax. */
4724 /* Override FP return register with %xmm0 for local functions when
4725 SSE math is enabled or for functions with sseregparm attribute. */
4726 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4728 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4729 if ((sse_level >= 1 && mode == SFmode)
4730 || (sse_level == 2 && mode == DFmode))
4731 regno = FIRST_SSE_REG;
4734 return gen_rtx_REG (orig_mode, regno);
4738 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4743 /* Handle libcalls, which don't provide a type node. */
4744 if (valtype == NULL)
4756 return gen_rtx_REG (mode, FIRST_SSE_REG);
4759 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4763 return gen_rtx_REG (mode, AX_REG);
4767 ret = construct_container (mode, orig_mode, valtype, 1,
4768 REGPARM_MAX, SSE_REGPARM_MAX,
4769 x86_64_int_return_registers, 0);
4771 /* For zero sized structures, construct_container returns NULL, but we
4772 need to keep rest of compiler happy by returning meaningful value. */
4774 ret = gen_rtx_REG (orig_mode, AX_REG);
4780 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4782 unsigned int regno = AX_REG;
4786 switch (GET_MODE_SIZE (mode))
4789 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4790 && !COMPLEX_MODE_P (mode))
4791 regno = FIRST_SSE_REG;
4795 if (mode == SFmode || mode == DFmode)
4796 regno = FIRST_SSE_REG;
4802 return gen_rtx_REG (orig_mode, regno);
4806 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4807 enum machine_mode orig_mode, enum machine_mode mode)
4809 const_tree fn, fntype;
4812 if (fntype_or_decl && DECL_P (fntype_or_decl))
4813 fn = fntype_or_decl;
4814 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4816 if (TARGET_64BIT_MS_ABI)
4817 return function_value_ms_64 (orig_mode, mode);
4818 else if (TARGET_64BIT)
4819 return function_value_64 (orig_mode, mode, valtype);
4821 return function_value_32 (orig_mode, mode, fntype, fn);
4825 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4826 bool outgoing ATTRIBUTE_UNUSED)
4828 enum machine_mode mode, orig_mode;
4830 orig_mode = TYPE_MODE (valtype);
4831 mode = type_natural_mode (valtype);
4832 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4836 ix86_libcall_value (enum machine_mode mode)
4838 return ix86_function_value_1 (NULL, NULL, mode, mode);
4841 /* Return true iff type is returned in memory. */
4844 return_in_memory_32 (const_tree type, enum machine_mode mode)
4848 if (mode == BLKmode)
4851 size = int_size_in_bytes (type);
4853 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4856 if (VECTOR_MODE_P (mode) || mode == TImode)
4858 /* User-created vectors small enough to fit in EAX. */
4862 /* MMX/3dNow values are returned in MM0,
4863 except when it doesn't exits. */
4865 return (TARGET_MMX ? 0 : 1);
4867 /* SSE values are returned in XMM0, except when it doesn't exist. */
4869 return (TARGET_SSE ? 0 : 1);
4884 return_in_memory_64 (const_tree type, enum machine_mode mode)
4886 int needed_intregs, needed_sseregs;
4887 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4891 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4893 HOST_WIDE_INT size = int_size_in_bytes (type);
4895 /* __m128 is returned in xmm0. */
4896 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4897 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
4900 /* Otherwise, the size must be exactly in [1248]. */
4901 return (size != 1 && size != 2 && size != 4 && size != 8);
4905 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4907 const enum machine_mode mode = type_natural_mode (type);
4909 if (TARGET_64BIT_MS_ABI)
4910 return return_in_memory_ms_64 (type, mode);
4911 else if (TARGET_64BIT)
4912 return return_in_memory_64 (type, mode);
4914 return return_in_memory_32 (type, mode);
4917 /* Return false iff TYPE is returned in memory. This version is used
4918 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4919 but differs notably in that when MMX is available, 8-byte vectors
4920 are returned in memory, rather than in MMX registers. */
4923 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4926 enum machine_mode mode = type_natural_mode (type);
4929 return return_in_memory_64 (type, mode);
4931 if (mode == BLKmode)
4934 size = int_size_in_bytes (type);
4936 if (VECTOR_MODE_P (mode))
4938 /* Return in memory only if MMX registers *are* available. This
4939 seems backwards, but it is consistent with the existing
4946 else if (mode == TImode)
4948 else if (mode == XFmode)
4955 ix86_i386elf_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4957 return (TYPE_MODE (type) == BLKmode
4958 || (VECTOR_MODE_P (TYPE_MODE (type)) && int_size_in_bytes (type) == 8));
4962 ix86_i386interix_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4964 return (TYPE_MODE (type) == BLKmode
4965 || (AGGREGATE_TYPE_P (type) && int_size_in_bytes(type) > 8 ));
4968 /* When returning SSE vector types, we have a choice of either
4969 (1) being abi incompatible with a -march switch, or
4970 (2) generating an error.
4971 Given no good solution, I think the safest thing is one warning.
4972 The user won't be able to use -Werror, but....
4974 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4975 called in response to actually generating a caller or callee that
4976 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
4977 via aggregate_value_p for general type probing from tree-ssa. */
4980 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4982 static bool warnedsse, warnedmmx;
4984 if (!TARGET_64BIT && type)
4986 /* Look at the return type of the function, not the function type. */
4987 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4989 if (!TARGET_SSE && !warnedsse)
4992 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4995 warning (0, "SSE vector return without SSE enabled "
5000 if (!TARGET_MMX && !warnedmmx)
5002 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5005 warning (0, "MMX vector return without MMX enabled "
5015 /* Create the va_list data type. */
5018 ix86_build_builtin_va_list (void)
5020 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5022 /* For i386 we use plain pointer to argument area. */
5023 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5024 return build_pointer_type (char_type_node);
5026 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5027 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5029 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5030 unsigned_type_node);
5031 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5032 unsigned_type_node);
5033 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5035 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5038 va_list_gpr_counter_field = f_gpr;
5039 va_list_fpr_counter_field = f_fpr;
5041 DECL_FIELD_CONTEXT (f_gpr) = record;
5042 DECL_FIELD_CONTEXT (f_fpr) = record;
5043 DECL_FIELD_CONTEXT (f_ovf) = record;
5044 DECL_FIELD_CONTEXT (f_sav) = record;
5046 TREE_CHAIN (record) = type_decl;
5047 TYPE_NAME (record) = type_decl;
5048 TYPE_FIELDS (record) = f_gpr;
5049 TREE_CHAIN (f_gpr) = f_fpr;
5050 TREE_CHAIN (f_fpr) = f_ovf;
5051 TREE_CHAIN (f_ovf) = f_sav;
5053 layout_type (record);
5055 /* The correct type is an array type of one element. */
5056 return build_array_type (record, build_index_type (size_zero_node));
5059 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5062 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5072 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5075 /* Indicate to allocate space on the stack for varargs save area. */
5076 ix86_save_varrargs_registers = 1;
5077 /* We need 16-byte stack alignment to save SSE registers. If user
5078 asked for lower preferred_stack_boundary, lets just hope that he knows
5079 what he is doing and won't varargs SSE values.
5081 We also may end up assuming that only 64bit values are stored in SSE
5082 register let some floating point program work. */
5083 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5084 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5086 save_area = frame_pointer_rtx;
5087 set = get_varargs_alias_set ();
5089 for (i = cum->regno;
5091 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5094 mem = gen_rtx_MEM (Pmode,
5095 plus_constant (save_area, i * UNITS_PER_WORD));
5096 MEM_NOTRAP_P (mem) = 1;
5097 set_mem_alias_set (mem, set);
5098 emit_move_insn (mem, gen_rtx_REG (Pmode,
5099 x86_64_int_parameter_registers[i]));
5102 if (cum->sse_nregs && cfun->va_list_fpr_size)
5104 /* Now emit code to save SSE registers. The AX parameter contains number
5105 of SSE parameter registers used to call this function. We use
5106 sse_prologue_save insn template that produces computed jump across
5107 SSE saves. We need some preparation work to get this working. */
5109 label = gen_label_rtx ();
5110 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5112 /* Compute address to jump to :
5113 label - 5*eax + nnamed_sse_arguments*5 */
5114 tmp_reg = gen_reg_rtx (Pmode);
5115 nsse_reg = gen_reg_rtx (Pmode);
5116 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5117 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5118 gen_rtx_MULT (Pmode, nsse_reg,
5123 gen_rtx_CONST (DImode,
5124 gen_rtx_PLUS (DImode,
5126 GEN_INT (cum->sse_regno * 4))));
5128 emit_move_insn (nsse_reg, label_ref);
5129 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5131 /* Compute address of memory block we save into. We always use pointer
5132 pointing 127 bytes after first byte to store - this is needed to keep
5133 instruction size limited by 4 bytes. */
5134 tmp_reg = gen_reg_rtx (Pmode);
5135 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5136 plus_constant (save_area,
5137 8 * REGPARM_MAX + 127)));
5138 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5139 MEM_NOTRAP_P (mem) = 1;
5140 set_mem_alias_set (mem, set);
5141 set_mem_align (mem, BITS_PER_WORD);
5143 /* And finally do the dirty job! */
5144 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5145 GEN_INT (cum->sse_regno), label));
5150 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5152 alias_set_type set = get_varargs_alias_set ();
5155 for (i = cum->regno; i < REGPARM_MAX; i++)
5159 mem = gen_rtx_MEM (Pmode,
5160 plus_constant (virtual_incoming_args_rtx,
5161 i * UNITS_PER_WORD));
5162 MEM_NOTRAP_P (mem) = 1;
5163 set_mem_alias_set (mem, set);
5165 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5166 emit_move_insn (mem, reg);
5171 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5172 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5175 CUMULATIVE_ARGS next_cum;
5178 /* This argument doesn't appear to be used anymore. Which is good,
5179 because the old code here didn't suppress rtl generation. */
5180 gcc_assert (!no_rtl);
5185 fntype = TREE_TYPE (current_function_decl);
5187 /* For varargs, we do not want to skip the dummy va_dcl argument.
5188 For stdargs, we do want to skip the last named argument. */
5190 if (stdarg_p (fntype))
5191 function_arg_advance (&next_cum, mode, type, 1);
5193 if (TARGET_64BIT_MS_ABI)
5194 setup_incoming_varargs_ms_64 (&next_cum);
5196 setup_incoming_varargs_64 (&next_cum);
5199 /* Implement va_start. */
5202 ix86_va_start (tree valist, rtx nextarg)
5204 HOST_WIDE_INT words, n_gpr, n_fpr;
5205 tree f_gpr, f_fpr, f_ovf, f_sav;
5206 tree gpr, fpr, ovf, sav, t;
5209 /* Only 64bit target needs something special. */
5210 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5212 std_expand_builtin_va_start (valist, nextarg);
5216 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5217 f_fpr = TREE_CHAIN (f_gpr);
5218 f_ovf = TREE_CHAIN (f_fpr);
5219 f_sav = TREE_CHAIN (f_ovf);
5221 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5222 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5223 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5224 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5225 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5227 /* Count number of gp and fp argument registers used. */
5228 words = crtl->args.info.words;
5229 n_gpr = crtl->args.info.regno;
5230 n_fpr = crtl->args.info.sse_regno;
5232 if (cfun->va_list_gpr_size)
5234 type = TREE_TYPE (gpr);
5235 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5236 build_int_cst (type, n_gpr * 8));
5237 TREE_SIDE_EFFECTS (t) = 1;
5238 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5241 if (cfun->va_list_fpr_size)
5243 type = TREE_TYPE (fpr);
5244 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5245 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5246 TREE_SIDE_EFFECTS (t) = 1;
5247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5250 /* Find the overflow area. */
5251 type = TREE_TYPE (ovf);
5252 t = make_tree (type, virtual_incoming_args_rtx);
5254 t = build2 (POINTER_PLUS_EXPR, type, t,
5255 size_int (words * UNITS_PER_WORD));
5256 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5257 TREE_SIDE_EFFECTS (t) = 1;
5258 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5260 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5262 /* Find the register save area.
5263 Prologue of the function save it right above stack frame. */
5264 type = TREE_TYPE (sav);
5265 t = make_tree (type, frame_pointer_rtx);
5266 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5267 TREE_SIDE_EFFECTS (t) = 1;
5268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5272 /* Implement va_arg. */
5275 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5277 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5278 tree f_gpr, f_fpr, f_ovf, f_sav;
5279 tree gpr, fpr, ovf, sav, t;
5281 tree lab_false, lab_over = NULL_TREE;
5286 enum machine_mode nat_mode;
5288 /* Only 64bit target needs something special. */
5289 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5290 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5292 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5293 f_fpr = TREE_CHAIN (f_gpr);
5294 f_ovf = TREE_CHAIN (f_fpr);
5295 f_sav = TREE_CHAIN (f_ovf);
5297 valist = build_va_arg_indirect_ref (valist);
5298 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5299 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5300 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5301 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5303 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5305 type = build_pointer_type (type);
5306 size = int_size_in_bytes (type);
5307 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5309 nat_mode = type_natural_mode (type);
5310 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5311 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5313 /* Pull the value out of the saved registers. */
5315 addr = create_tmp_var (ptr_type_node, "addr");
5316 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5320 int needed_intregs, needed_sseregs;
5322 tree int_addr, sse_addr;
5324 lab_false = create_artificial_label ();
5325 lab_over = create_artificial_label ();
5327 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5329 need_temp = (!REG_P (container)
5330 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5331 || TYPE_ALIGN (type) > 128));
5333 /* In case we are passing structure, verify that it is consecutive block
5334 on the register save area. If not we need to do moves. */
5335 if (!need_temp && !REG_P (container))
5337 /* Verify that all registers are strictly consecutive */
5338 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5342 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5344 rtx slot = XVECEXP (container, 0, i);
5345 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5346 || INTVAL (XEXP (slot, 1)) != i * 16)
5354 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5356 rtx slot = XVECEXP (container, 0, i);
5357 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5358 || INTVAL (XEXP (slot, 1)) != i * 8)
5370 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5371 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5372 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5373 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5376 /* First ensure that we fit completely in registers. */
5379 t = build_int_cst (TREE_TYPE (gpr),
5380 (REGPARM_MAX - needed_intregs + 1) * 8);
5381 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5382 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5383 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5384 gimplify_and_add (t, pre_p);
5388 t = build_int_cst (TREE_TYPE (fpr),
5389 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5391 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5392 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5393 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5394 gimplify_and_add (t, pre_p);
5397 /* Compute index to start of area used for integer regs. */
5400 /* int_addr = gpr + sav; */
5401 t = fold_convert (sizetype, gpr);
5402 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5403 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5404 gimplify_and_add (t, pre_p);
5408 /* sse_addr = fpr + sav; */
5409 t = fold_convert (sizetype, fpr);
5410 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5411 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5412 gimplify_and_add (t, pre_p);
5417 tree temp = create_tmp_var (type, "va_arg_tmp");
5420 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5421 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5422 gimplify_and_add (t, pre_p);
5424 for (i = 0; i < XVECLEN (container, 0); i++)
5426 rtx slot = XVECEXP (container, 0, i);
5427 rtx reg = XEXP (slot, 0);
5428 enum machine_mode mode = GET_MODE (reg);
5429 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5430 tree addr_type = build_pointer_type (piece_type);
5433 tree dest_addr, dest;
5435 if (SSE_REGNO_P (REGNO (reg)))
5437 src_addr = sse_addr;
5438 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5442 src_addr = int_addr;
5443 src_offset = REGNO (reg) * 8;
5445 src_addr = fold_convert (addr_type, src_addr);
5446 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5447 size_int (src_offset));
5448 src = build_va_arg_indirect_ref (src_addr);
5450 dest_addr = fold_convert (addr_type, addr);
5451 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5452 size_int (INTVAL (XEXP (slot, 1))));
5453 dest = build_va_arg_indirect_ref (dest_addr);
5455 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5456 gimplify_and_add (t, pre_p);
5462 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5463 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5464 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5465 gimplify_and_add (t, pre_p);
5469 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5470 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5471 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5472 gimplify_and_add (t, pre_p);
5475 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5476 gimplify_and_add (t, pre_p);
5478 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5479 append_to_statement_list (t, pre_p);
5482 /* ... otherwise out of the overflow area. */
5484 /* Care for on-stack alignment if needed. */
5485 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5486 || integer_zerop (TYPE_SIZE (type)))
5490 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5491 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5492 size_int (align - 1));
5493 t = fold_convert (sizetype, t);
5494 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5496 t = fold_convert (TREE_TYPE (ovf), t);
5498 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5500 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5501 gimplify_and_add (t2, pre_p);
5503 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5504 size_int (rsize * UNITS_PER_WORD));
5505 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5506 gimplify_and_add (t, pre_p);
5510 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5511 append_to_statement_list (t, pre_p);
5514 ptrtype = build_pointer_type (type);
5515 addr = fold_convert (ptrtype, addr);
5518 addr = build_va_arg_indirect_ref (addr);
5519 return build_va_arg_indirect_ref (addr);
5522 /* Return nonzero if OPNUM's MEM should be matched
5523 in movabs* patterns. */
5526 ix86_check_movabs (rtx insn, int opnum)
5530 set = PATTERN (insn);
5531 if (GET_CODE (set) == PARALLEL)
5532 set = XVECEXP (set, 0, 0);
5533 gcc_assert (GET_CODE (set) == SET);
5534 mem = XEXP (set, opnum);
5535 while (GET_CODE (mem) == SUBREG)
5536 mem = SUBREG_REG (mem);
5537 gcc_assert (MEM_P (mem));
5538 return (volatile_ok || !MEM_VOLATILE_P (mem));
5541 /* Initialize the table of extra 80387 mathematical constants. */
5544 init_ext_80387_constants (void)
5546 static const char * cst[5] =
5548 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5549 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5550 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5551 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5552 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5556 for (i = 0; i < 5; i++)
5558 real_from_string (&ext_80387_constants_table[i], cst[i]);
5559 /* Ensure each constant is rounded to XFmode precision. */
5560 real_convert (&ext_80387_constants_table[i],
5561 XFmode, &ext_80387_constants_table[i]);
5564 ext_80387_constants_init = 1;
5567 /* Return true if the constant is something that can be loaded with
5568 a special instruction. */
5571 standard_80387_constant_p (rtx x)
5573 enum machine_mode mode = GET_MODE (x);
5577 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5580 if (x == CONST0_RTX (mode))
5582 if (x == CONST1_RTX (mode))
5585 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5587 /* For XFmode constants, try to find a special 80387 instruction when
5588 optimizing for size or on those CPUs that benefit from them. */
5590 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5594 if (! ext_80387_constants_init)
5595 init_ext_80387_constants ();
5597 for (i = 0; i < 5; i++)
5598 if (real_identical (&r, &ext_80387_constants_table[i]))
5602 /* Load of the constant -0.0 or -1.0 will be split as
5603 fldz;fchs or fld1;fchs sequence. */
5604 if (real_isnegzero (&r))
5606 if (real_identical (&r, &dconstm1))
5612 /* Return the opcode of the special instruction to be used to load
5616 standard_80387_constant_opcode (rtx x)
5618 switch (standard_80387_constant_p (x))
5642 /* Return the CONST_DOUBLE representing the 80387 constant that is
5643 loaded by the specified special instruction. The argument IDX
5644 matches the return value from standard_80387_constant_p. */
5647 standard_80387_constant_rtx (int idx)
5651 if (! ext_80387_constants_init)
5652 init_ext_80387_constants ();
5668 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5672 /* Return 1 if mode is a valid mode for sse. */
5674 standard_sse_mode_p (enum machine_mode mode)
5691 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5694 standard_sse_constant_p (rtx x)
5696 enum machine_mode mode = GET_MODE (x);
5698 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5700 if (vector_all_ones_operand (x, mode)
5701 && standard_sse_mode_p (mode))
5702 return TARGET_SSE2 ? 2 : -1;
5707 /* Return the opcode of the special instruction to be used to load
5711 standard_sse_constant_opcode (rtx insn, rtx x)
5713 switch (standard_sse_constant_p (x))
5716 if (get_attr_mode (insn) == MODE_V4SF)
5717 return "xorps\t%0, %0";
5718 else if (get_attr_mode (insn) == MODE_V2DF)
5719 return "xorpd\t%0, %0";
5721 return "pxor\t%0, %0";
5723 return "pcmpeqd\t%0, %0";
5728 /* Returns 1 if OP contains a symbol reference */
5731 symbolic_reference_mentioned_p (rtx op)
5736 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5739 fmt = GET_RTX_FORMAT (GET_CODE (op));
5740 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5746 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5747 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5751 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5758 /* Return 1 if it is appropriate to emit `ret' instructions in the
5759 body of a function. Do this only if the epilogue is simple, needing a
5760 couple of insns. Prior to reloading, we can't tell how many registers
5761 must be saved, so return 0 then. Return 0 if there is no frame
5762 marker to de-allocate. */
5765 ix86_can_use_return_insn_p (void)
5767 struct ix86_frame frame;
5769 if (! reload_completed || frame_pointer_needed)
5772 /* Don't allow more than 32 pop, since that's all we can do
5773 with one instruction. */
5774 if (crtl->args.pops_args
5775 && crtl->args.size >= 32768)
5778 ix86_compute_frame_layout (&frame);
5779 return frame.to_allocate == 0 && frame.nregs == 0;
5782 /* Value should be nonzero if functions must have frame pointers.
5783 Zero means the frame pointer need not be set up (and parms may
5784 be accessed via the stack pointer) in functions that seem suitable. */
5787 ix86_frame_pointer_required (void)
5789 /* If we accessed previous frames, then the generated code expects
5790 to be able to access the saved ebp value in our frame. */
5791 if (cfun->machine->accesses_prev_frame)
5794 /* Several x86 os'es need a frame pointer for other reasons,
5795 usually pertaining to setjmp. */
5796 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5799 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5800 the frame pointer by default. Turn it back on now if we've not
5801 got a leaf function. */
5802 if (TARGET_OMIT_LEAF_FRAME_POINTER
5803 && (!current_function_is_leaf
5804 || ix86_current_function_calls_tls_descriptor))
5813 /* Record that the current function accesses previous call frames. */
5816 ix86_setup_frame_addresses (void)
5818 cfun->machine->accesses_prev_frame = 1;
5821 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5822 # define USE_HIDDEN_LINKONCE 1
5824 # define USE_HIDDEN_LINKONCE 0
5827 static int pic_labels_used;
5829 /* Fills in the label name that should be used for a pc thunk for
5830 the given register. */
5833 get_pc_thunk_name (char name[32], unsigned int regno)
5835 gcc_assert (!TARGET_64BIT);
5837 if (USE_HIDDEN_LINKONCE)
5838 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5840 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5844 /* This function generates code for -fpic that loads %ebx with
5845 the return address of the caller and then returns. */
5848 ix86_file_end (void)
5853 for (regno = 0; regno < 8; ++regno)
5857 if (! ((pic_labels_used >> regno) & 1))
5860 get_pc_thunk_name (name, regno);
5865 switch_to_section (darwin_sections[text_coal_section]);
5866 fputs ("\t.weak_definition\t", asm_out_file);
5867 assemble_name (asm_out_file, name);
5868 fputs ("\n\t.private_extern\t", asm_out_file);
5869 assemble_name (asm_out_file, name);
5870 fputs ("\n", asm_out_file);
5871 ASM_OUTPUT_LABEL (asm_out_file, name);
5875 if (USE_HIDDEN_LINKONCE)
5879 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5881 TREE_PUBLIC (decl) = 1;
5882 TREE_STATIC (decl) = 1;
5883 DECL_ONE_ONLY (decl) = 1;
5885 (*targetm.asm_out.unique_section) (decl, 0);
5886 switch_to_section (get_named_section (decl, NULL, 0));
5888 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5889 fputs ("\t.hidden\t", asm_out_file);
5890 assemble_name (asm_out_file, name);
5891 fputc ('\n', asm_out_file);
5892 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5896 switch_to_section (text_section);
5897 ASM_OUTPUT_LABEL (asm_out_file, name);
5900 xops[0] = gen_rtx_REG (Pmode, regno);
5901 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5903 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
5905 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5906 output_asm_insn ("ret", xops);
5909 if (NEED_INDICATE_EXEC_STACK)
5910 file_end_indicate_exec_stack ();
5913 /* Emit code for the SET_GOT patterns. */
5916 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5922 if (TARGET_VXWORKS_RTP && flag_pic)
5924 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5925 xops[2] = gen_rtx_MEM (Pmode,
5926 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5927 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5929 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5930 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5931 an unadorned address. */
5932 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5933 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5934 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5938 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5940 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5942 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5947 output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
5949 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5952 output_asm_insn ("call\t%a2", xops);
5955 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5956 is what will be referenced by the Mach-O PIC subsystem. */
5958 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5961 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5962 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5967 output_asm_insn ("pop{q}\t%0", xops);
5969 output_asm_insn ("pop{l}\t%0", xops);
5975 get_pc_thunk_name (name, REGNO (dest));
5976 pic_labels_used |= 1 << REGNO (dest);
5978 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5979 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5980 output_asm_insn ("call\t%X2", xops);
5981 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5982 is what will be referenced by the Mach-O PIC subsystem. */
5985 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5987 targetm.asm_out.internal_label (asm_out_file, "L",
5988 CODE_LABEL_NUMBER (label));
5995 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5998 output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
6000 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
6005 output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6007 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6013 /* Generate an "push" pattern for input ARG. */
6018 return gen_rtx_SET (VOIDmode,
6020 gen_rtx_PRE_DEC (Pmode,
6021 stack_pointer_rtx)),
6025 /* Return >= 0 if there is an unused call-clobbered register available
6026 for the entire function. */
6029 ix86_select_alt_pic_regnum (void)
6031 if (current_function_is_leaf && !crtl->profile
6032 && !ix86_current_function_calls_tls_descriptor)
6035 for (i = 2; i >= 0; --i)
6036 if (!df_regs_ever_live_p (i))
6040 return INVALID_REGNUM;
6043 /* Return 1 if we need to save REGNO. */
6045 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6047 if (pic_offset_table_rtx
6048 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6049 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6051 || crtl->calls_eh_return
6052 || crtl->uses_const_pool))
6054 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6059 if (crtl->calls_eh_return && maybe_eh_return)
6064 unsigned test = EH_RETURN_DATA_REGNO (i);
6065 if (test == INVALID_REGNUM)
6072 if (cfun->machine->force_align_arg_pointer
6073 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6076 return (df_regs_ever_live_p (regno)
6077 && !call_used_regs[regno]
6078 && !fixed_regs[regno]
6079 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6082 /* Return number of registers to be saved on the stack. */
6085 ix86_nsaved_regs (void)
6090 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6091 if (ix86_save_reg (regno, true))
6096 /* Return the offset between two registers, one to be eliminated, and the other
6097 its replacement, at the start of a routine. */
6100 ix86_initial_elimination_offset (int from, int to)
6102 struct ix86_frame frame;
6103 ix86_compute_frame_layout (&frame);
6105 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6106 return frame.hard_frame_pointer_offset;
6107 else if (from == FRAME_POINTER_REGNUM
6108 && to == HARD_FRAME_POINTER_REGNUM)
6109 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6112 gcc_assert (to == STACK_POINTER_REGNUM);
6114 if (from == ARG_POINTER_REGNUM)
6115 return frame.stack_pointer_offset;
6117 gcc_assert (from == FRAME_POINTER_REGNUM);
6118 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6122 /* Fill structure ix86_frame about frame of currently computed function. */
6125 ix86_compute_frame_layout (struct ix86_frame *frame)
6127 HOST_WIDE_INT total_size;
6128 unsigned int stack_alignment_needed;
6129 HOST_WIDE_INT offset;
6130 unsigned int preferred_alignment;
6131 HOST_WIDE_INT size = get_frame_size ();
6133 frame->nregs = ix86_nsaved_regs ();
6136 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6137 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6139 /* During reload iteration the amount of registers saved can change.
6140 Recompute the value as needed. Do not recompute when amount of registers
6141 didn't change as reload does multiple calls to the function and does not
6142 expect the decision to change within single iteration. */
6144 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6146 int count = frame->nregs;
6148 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6149 /* The fast prologue uses move instead of push to save registers. This
6150 is significantly longer, but also executes faster as modern hardware
6151 can execute the moves in parallel, but can't do that for push/pop.
6153 Be careful about choosing what prologue to emit: When function takes
6154 many instructions to execute we may use slow version as well as in
6155 case function is known to be outside hot spot (this is known with
6156 feedback only). Weight the size of function by number of registers
6157 to save as it is cheap to use one or two push instructions but very
6158 slow to use many of them. */
6160 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6161 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6162 || (flag_branch_probabilities
6163 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6164 cfun->machine->use_fast_prologue_epilogue = false;
6166 cfun->machine->use_fast_prologue_epilogue
6167 = !expensive_function_p (count);
6169 if (TARGET_PROLOGUE_USING_MOVE
6170 && cfun->machine->use_fast_prologue_epilogue)
6171 frame->save_regs_using_mov = true;
6173 frame->save_regs_using_mov = false;
6176 /* Skip return address and saved base pointer. */
6177 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6179 frame->hard_frame_pointer_offset = offset;
6181 /* Do some sanity checking of stack_alignment_needed and
6182 preferred_alignment, since i386 port is the only using those features
6183 that may break easily. */
6185 gcc_assert (!size || stack_alignment_needed);
6186 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6187 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6188 gcc_assert (stack_alignment_needed
6189 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6191 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6192 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6194 /* Register save area */
6195 offset += frame->nregs * UNITS_PER_WORD;
6198 if (ix86_save_varrargs_registers)
6200 offset += X86_64_VARARGS_SIZE;
6201 frame->va_arg_size = X86_64_VARARGS_SIZE;
6204 frame->va_arg_size = 0;
6206 /* Align start of frame for local function. */
6207 frame->padding1 = ((offset + stack_alignment_needed - 1)
6208 & -stack_alignment_needed) - offset;
6210 offset += frame->padding1;
6212 /* Frame pointer points here. */
6213 frame->frame_pointer_offset = offset;
6217 /* Add outgoing arguments area. Can be skipped if we eliminated
6218 all the function calls as dead code.
6219 Skipping is however impossible when function calls alloca. Alloca
6220 expander assumes that last crtl->outgoing_args_size
6221 of stack frame are unused. */
6222 if (ACCUMULATE_OUTGOING_ARGS
6223 && (!current_function_is_leaf || cfun->calls_alloca
6224 || ix86_current_function_calls_tls_descriptor))
6226 offset += crtl->outgoing_args_size;
6227 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6230 frame->outgoing_arguments_size = 0;
6232 /* Align stack boundary. Only needed if we're calling another function
6234 if (!current_function_is_leaf || cfun->calls_alloca
6235 || ix86_current_function_calls_tls_descriptor)
6236 frame->padding2 = ((offset + preferred_alignment - 1)
6237 & -preferred_alignment) - offset;
6239 frame->padding2 = 0;
6241 offset += frame->padding2;
6243 /* We've reached end of stack frame. */
6244 frame->stack_pointer_offset = offset;
6246 /* Size prologue needs to allocate. */
6247 frame->to_allocate =
6248 (size + frame->padding1 + frame->padding2
6249 + frame->outgoing_arguments_size + frame->va_arg_size);
6251 if ((!frame->to_allocate && frame->nregs <= 1)
6252 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6253 frame->save_regs_using_mov = false;
6255 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6256 && current_function_is_leaf
6257 && !ix86_current_function_calls_tls_descriptor)
6259 frame->red_zone_size = frame->to_allocate;
6260 if (frame->save_regs_using_mov)
6261 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6262 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6263 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6266 frame->red_zone_size = 0;
6267 frame->to_allocate -= frame->red_zone_size;
6268 frame->stack_pointer_offset -= frame->red_zone_size;
6270 fprintf (stderr, "\n");
6271 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6272 fprintf (stderr, "size: %ld\n", (long)size);
6273 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6274 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6275 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6276 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6277 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6278 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6279 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6280 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6281 (long)frame->hard_frame_pointer_offset);
6282 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6283 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6284 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6285 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6289 /* Emit code to save registers in the prologue. */
6292 ix86_emit_save_regs (void)
6297 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6298 if (ix86_save_reg (regno, true))
6300 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6301 RTX_FRAME_RELATED_P (insn) = 1;
6305 /* Emit code to save registers using MOV insns. First register
6306 is restored from POINTER + OFFSET. */
6308 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6313 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6314 if (ix86_save_reg (regno, true))
6316 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6318 gen_rtx_REG (Pmode, regno));
6319 RTX_FRAME_RELATED_P (insn) = 1;
6320 offset += UNITS_PER_WORD;
6324 /* Expand prologue or epilogue stack adjustment.
6325 The pattern exist to put a dependency on all ebp-based memory accesses.
6326 STYLE should be negative if instructions should be marked as frame related,
6327 zero if %r11 register is live and cannot be freely used and positive
6331 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6336 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6337 else if (x86_64_immediate_operand (offset, DImode))
6338 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6342 /* r11 is used by indirect sibcall return as well, set before the
6343 epilogue and used after the epilogue. ATM indirect sibcall
6344 shouldn't be used together with huge frame sizes in one
6345 function because of the frame_size check in sibcall.c. */
6347 r11 = gen_rtx_REG (DImode, R11_REG);
6348 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6350 RTX_FRAME_RELATED_P (insn) = 1;
6351 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6355 RTX_FRAME_RELATED_P (insn) = 1;
6358 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6361 ix86_internal_arg_pointer (void)
6363 bool has_force_align_arg_pointer =
6364 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6365 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6366 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6367 && DECL_NAME (current_function_decl)
6368 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6369 && DECL_FILE_SCOPE_P (current_function_decl))
6370 || ix86_force_align_arg_pointer
6371 || has_force_align_arg_pointer)
6373 /* Nested functions can't realign the stack due to a register
6375 if (DECL_CONTEXT (current_function_decl)
6376 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6378 if (ix86_force_align_arg_pointer)
6379 warning (0, "-mstackrealign ignored for nested functions");
6380 if (has_force_align_arg_pointer)
6381 error ("%s not supported for nested functions",
6382 ix86_force_align_arg_pointer_string);
6383 return virtual_incoming_args_rtx;
6385 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6386 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6389 return virtual_incoming_args_rtx;
6392 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6393 This is called from dwarf2out.c to emit call frame instructions
6394 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6396 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6398 rtx unspec = SET_SRC (pattern);
6399 gcc_assert (GET_CODE (unspec) == UNSPEC);
6403 case UNSPEC_REG_SAVE:
6404 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6405 SET_DEST (pattern));
6407 case UNSPEC_DEF_CFA:
6408 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6409 INTVAL (XVECEXP (unspec, 0, 0)));
6416 /* Expand the prologue into a bunch of separate insns. */
6419 ix86_expand_prologue (void)
6423 struct ix86_frame frame;
6424 HOST_WIDE_INT allocate;
6426 ix86_compute_frame_layout (&frame);
6428 if (cfun->machine->force_align_arg_pointer)
6432 /* Grab the argument pointer. */
6433 x = plus_constant (stack_pointer_rtx, 4);
6434 y = cfun->machine->force_align_arg_pointer;
6435 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6436 RTX_FRAME_RELATED_P (insn) = 1;
6438 /* The unwind info consists of two parts: install the fafp as the cfa,
6439 and record the fafp as the "save register" of the stack pointer.
6440 The later is there in order that the unwinder can see where it
6441 should restore the stack pointer across the and insn. */
6442 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6443 x = gen_rtx_SET (VOIDmode, y, x);
6444 RTX_FRAME_RELATED_P (x) = 1;
6445 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6447 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6448 RTX_FRAME_RELATED_P (y) = 1;
6449 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6450 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6451 REG_NOTES (insn) = x;
6453 /* Align the stack. */
6454 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6457 /* And here we cheat like madmen with the unwind info. We force the
6458 cfa register back to sp+4, which is exactly what it was at the
6459 start of the function. Re-pushing the return address results in
6460 the return at the same spot relative to the cfa, and thus is
6461 correct wrt the unwind info. */
6462 x = cfun->machine->force_align_arg_pointer;
6463 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6464 insn = emit_insn (gen_push (x));
6465 RTX_FRAME_RELATED_P (insn) = 1;
6468 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6469 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6470 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6471 REG_NOTES (insn) = x;
6474 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6475 slower on all targets. Also sdb doesn't like it. */
6477 if (frame_pointer_needed)
6479 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6480 RTX_FRAME_RELATED_P (insn) = 1;
6482 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6483 RTX_FRAME_RELATED_P (insn) = 1;
6486 allocate = frame.to_allocate;
6488 if (!frame.save_regs_using_mov)
6489 ix86_emit_save_regs ();
6491 allocate += frame.nregs * UNITS_PER_WORD;
6493 /* When using red zone we may start register saving before allocating
6494 the stack frame saving one cycle of the prologue. However I will
6495 avoid doing this if I am going to have to probe the stack since
6496 at least on x86_64 the stack probe can turn into a call that clobbers
6497 a red zone location */
6498 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6499 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6500 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6501 : stack_pointer_rtx,
6502 -frame.nregs * UNITS_PER_WORD);
6506 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6507 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6508 GEN_INT (-allocate), -1);
6511 /* Only valid for Win32. */
6512 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6516 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6518 if (TARGET_64BIT_MS_ABI)
6521 eax_live = ix86_eax_live_at_start_p ();
6525 emit_insn (gen_push (eax));
6526 allocate -= UNITS_PER_WORD;
6529 emit_move_insn (eax, GEN_INT (allocate));
6532 insn = gen_allocate_stack_worker_64 (eax);
6534 insn = gen_allocate_stack_worker_32 (eax);
6535 insn = emit_insn (insn);
6536 RTX_FRAME_RELATED_P (insn) = 1;
6537 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6538 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6539 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6540 t, REG_NOTES (insn));
6544 if (frame_pointer_needed)
6545 t = plus_constant (hard_frame_pointer_rtx,
6548 - frame.nregs * UNITS_PER_WORD);
6550 t = plus_constant (stack_pointer_rtx, allocate);
6551 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6555 if (frame.save_regs_using_mov
6556 && !(TARGET_RED_ZONE
6557 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6559 if (!frame_pointer_needed || !frame.to_allocate)
6560 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6562 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6563 -frame.nregs * UNITS_PER_WORD);
6566 pic_reg_used = false;
6567 if (pic_offset_table_rtx
6568 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6571 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6573 if (alt_pic_reg_used != INVALID_REGNUM)
6574 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6576 pic_reg_used = true;
6583 if (ix86_cmodel == CM_LARGE_PIC)
6585 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6586 rtx label = gen_label_rtx ();
6588 LABEL_PRESERVE_P (label) = 1;
6589 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6590 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6591 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6592 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6593 pic_offset_table_rtx, tmp_reg));
6596 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6599 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6602 /* Prevent function calls from being scheduled before the call to mcount.
6603 In the pic_reg_used case, make sure that the got load isn't deleted. */
6607 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6608 emit_insn (gen_blockage ());
6612 /* Emit code to restore saved registers using MOV insns. First register
6613 is restored from POINTER + OFFSET. */
6615 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6616 int maybe_eh_return)
6619 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6621 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6622 if (ix86_save_reg (regno, maybe_eh_return))
6624 /* Ensure that adjust_address won't be forced to produce pointer
6625 out of range allowed by x86-64 instruction set. */
6626 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6630 r11 = gen_rtx_REG (DImode, R11_REG);
6631 emit_move_insn (r11, GEN_INT (offset));
6632 emit_insn (gen_adddi3 (r11, r11, pointer));
6633 base_address = gen_rtx_MEM (Pmode, r11);
6636 emit_move_insn (gen_rtx_REG (Pmode, regno),
6637 adjust_address (base_address, Pmode, offset));
6638 offset += UNITS_PER_WORD;
6642 /* Restore function stack, frame, and registers. */
6645 ix86_expand_epilogue (int style)
6648 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6649 struct ix86_frame frame;
6650 HOST_WIDE_INT offset;
6652 ix86_compute_frame_layout (&frame);
6654 /* Calculate start of saved registers relative to ebp. Special care
6655 must be taken for the normal return case of a function using
6656 eh_return: the eax and edx registers are marked as saved, but not
6657 restored along this path. */
6658 offset = frame.nregs;
6659 if (crtl->calls_eh_return && style != 2)
6661 offset *= -UNITS_PER_WORD;
6663 /* If we're only restoring one register and sp is not valid then
6664 using a move instruction to restore the register since it's
6665 less work than reloading sp and popping the register.
6667 The default code result in stack adjustment using add/lea instruction,
6668 while this code results in LEAVE instruction (or discrete equivalent),
6669 so it is profitable in some other cases as well. Especially when there
6670 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6671 and there is exactly one register to pop. This heuristic may need some
6672 tuning in future. */
6673 if ((!sp_valid && frame.nregs <= 1)
6674 || (TARGET_EPILOGUE_USING_MOVE
6675 && cfun->machine->use_fast_prologue_epilogue
6676 && (frame.nregs > 1 || frame.to_allocate))
6677 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6678 || (frame_pointer_needed && TARGET_USE_LEAVE
6679 && cfun->machine->use_fast_prologue_epilogue
6680 && frame.nregs == 1)
6681 || crtl->calls_eh_return)
6683 /* Restore registers. We can use ebp or esp to address the memory
6684 locations. If both are available, default to ebp, since offsets
6685 are known to be small. Only exception is esp pointing directly to the
6686 end of block of saved registers, where we may simplify addressing
6689 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6690 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6691 frame.to_allocate, style == 2);
6693 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6694 offset, style == 2);
6696 /* eh_return epilogues need %ecx added to the stack pointer. */
6699 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6701 if (frame_pointer_needed)
6703 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6704 tmp = plus_constant (tmp, UNITS_PER_WORD);
6705 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6707 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6708 emit_move_insn (hard_frame_pointer_rtx, tmp);
6710 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6715 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6716 tmp = plus_constant (tmp, (frame.to_allocate
6717 + frame.nregs * UNITS_PER_WORD));
6718 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6721 else if (!frame_pointer_needed)
6722 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6723 GEN_INT (frame.to_allocate
6724 + frame.nregs * UNITS_PER_WORD),
6726 /* If not an i386, mov & pop is faster than "leave". */
6727 else if (TARGET_USE_LEAVE || optimize_size
6728 || !cfun->machine->use_fast_prologue_epilogue)
6729 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6732 pro_epilogue_adjust_stack (stack_pointer_rtx,
6733 hard_frame_pointer_rtx,
6736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6743 /* First step is to deallocate the stack frame so that we can
6744 pop the registers. */
6747 gcc_assert (frame_pointer_needed);
6748 pro_epilogue_adjust_stack (stack_pointer_rtx,
6749 hard_frame_pointer_rtx,
6750 GEN_INT (offset), style);
6752 else if (frame.to_allocate)
6753 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6754 GEN_INT (frame.to_allocate), style);
6756 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6757 if (ix86_save_reg (regno, false))
6760 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6762 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6764 if (frame_pointer_needed)
6766 /* Leave results in shorter dependency chains on CPUs that are
6767 able to grok it fast. */
6768 if (TARGET_USE_LEAVE)
6769 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6770 else if (TARGET_64BIT)
6771 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6773 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6777 if (cfun->machine->force_align_arg_pointer)
6779 emit_insn (gen_addsi3 (stack_pointer_rtx,
6780 cfun->machine->force_align_arg_pointer,
6784 /* Sibcall epilogues don't want a return instruction. */
6788 if (crtl->args.pops_args && crtl->args.size)
6790 rtx popc = GEN_INT (crtl->args.pops_args);
6792 /* i386 can only pop 64K bytes. If asked to pop more, pop
6793 return address, do explicit add, and jump indirectly to the
6796 if (crtl->args.pops_args >= 65536)
6798 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6800 /* There is no "pascal" calling convention in any 64bit ABI. */
6801 gcc_assert (!TARGET_64BIT);
6803 emit_insn (gen_popsi1 (ecx));
6804 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6805 emit_jump_insn (gen_return_indirect_internal (ecx));
6808 emit_jump_insn (gen_return_pop_internal (popc));
6811 emit_jump_insn (gen_return_internal ());
6814 /* Reset from the function's potential modifications. */
6817 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6818 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6820 if (pic_offset_table_rtx)
6821 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6823 /* Mach-O doesn't support labels at the end of objects, so if
6824 it looks like we might want one, insert a NOP. */
6826 rtx insn = get_last_insn ();
6829 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6830 insn = PREV_INSN (insn);
6834 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6835 fputs ("\tnop\n", file);
6841 /* Extract the parts of an RTL expression that is a valid memory address
6842 for an instruction. Return 0 if the structure of the address is
6843 grossly off. Return -1 if the address contains ASHIFT, so it is not
6844 strictly valid, but still used for computing length of lea instruction. */
6847 ix86_decompose_address (rtx addr, struct ix86_address *out)
6849 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6850 rtx base_reg, index_reg;
6851 HOST_WIDE_INT scale = 1;
6852 rtx scale_rtx = NULL_RTX;
6854 enum ix86_address_seg seg = SEG_DEFAULT;
6856 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6858 else if (GET_CODE (addr) == PLUS)
6868 addends[n++] = XEXP (op, 1);
6871 while (GET_CODE (op) == PLUS);
6876 for (i = n; i >= 0; --i)
6879 switch (GET_CODE (op))
6884 index = XEXP (op, 0);
6885 scale_rtx = XEXP (op, 1);
6889 if (XINT (op, 1) == UNSPEC_TP
6890 && TARGET_TLS_DIRECT_SEG_REFS
6891 && seg == SEG_DEFAULT)
6892 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6921 else if (GET_CODE (addr) == MULT)
6923 index = XEXP (addr, 0); /* index*scale */
6924 scale_rtx = XEXP (addr, 1);
6926 else if (GET_CODE (addr) == ASHIFT)
6930 /* We're called for lea too, which implements ashift on occasion. */
6931 index = XEXP (addr, 0);
6932 tmp = XEXP (addr, 1);
6933 if (!CONST_INT_P (tmp))
6935 scale = INTVAL (tmp);
6936 if ((unsigned HOST_WIDE_INT) scale > 3)
6942 disp = addr; /* displacement */
6944 /* Extract the integral value of scale. */
6947 if (!CONST_INT_P (scale_rtx))
6949 scale = INTVAL (scale_rtx);
6952 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6953 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6955 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6956 if (base_reg && index_reg && scale == 1
6957 && (index_reg == arg_pointer_rtx
6958 || index_reg == frame_pointer_rtx
6959 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6962 tmp = base, base = index, index = tmp;
6963 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6966 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6967 if ((base_reg == hard_frame_pointer_rtx
6968 || base_reg == frame_pointer_rtx
6969 || base_reg == arg_pointer_rtx) && !disp)
6972 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6973 Avoid this by transforming to [%esi+0]. */
6974 if (TARGET_K6 && !optimize_size
6975 && base_reg && !index_reg && !disp
6977 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6980 /* Special case: encode reg+reg instead of reg*2. */
6981 if (!base && index && scale && scale == 2)
6982 base = index, base_reg = index_reg, scale = 1;
6984 /* Special case: scaling cannot be encoded without base or displacement. */
6985 if (!base && !disp && index && scale != 1)
6997 /* Return cost of the memory address x.
6998 For i386, it is better to use a complex address than let gcc copy
6999 the address into a reg and make a new pseudo. But not if the address
7000 requires to two regs - that would mean more pseudos with longer
7003 ix86_address_cost (rtx x)
7005 struct ix86_address parts;
7007 int ok = ix86_decompose_address (x, &parts);
7011 if (parts.base && GET_CODE (parts.base) == SUBREG)
7012 parts.base = SUBREG_REG (parts.base);
7013 if (parts.index && GET_CODE (parts.index) == SUBREG)
7014 parts.index = SUBREG_REG (parts.index);
7016 /* Attempt to minimize number of registers in the address. */
7018 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7020 && (!REG_P (parts.index)
7021 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7025 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7027 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7028 && parts.base != parts.index)
7031 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7032 since it's predecode logic can't detect the length of instructions
7033 and it degenerates to vector decoded. Increase cost of such
7034 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7035 to split such addresses or even refuse such addresses at all.
7037 Following addressing modes are affected:
7042 The first and last case may be avoidable by explicitly coding the zero in
7043 memory address, but I don't have AMD-K6 machine handy to check this
7047 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7048 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7049 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7055 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7056 this is used for to form addresses to local data when -fPIC is in
7060 darwin_local_data_pic (rtx disp)
7062 if (GET_CODE (disp) == MINUS)
7064 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7065 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7066 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7068 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7069 if (! strcmp (sym_name, "<pic base>"))
7077 /* Determine if a given RTX is a valid constant. We already know this
7078 satisfies CONSTANT_P. */
7081 legitimate_constant_p (rtx x)
7083 switch (GET_CODE (x))
7088 if (GET_CODE (x) == PLUS)
7090 if (!CONST_INT_P (XEXP (x, 1)))
7095 if (TARGET_MACHO && darwin_local_data_pic (x))
7098 /* Only some unspecs are valid as "constants". */
7099 if (GET_CODE (x) == UNSPEC)
7100 switch (XINT (x, 1))
7105 return TARGET_64BIT;
7108 x = XVECEXP (x, 0, 0);
7109 return (GET_CODE (x) == SYMBOL_REF
7110 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7112 x = XVECEXP (x, 0, 0);
7113 return (GET_CODE (x) == SYMBOL_REF
7114 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7119 /* We must have drilled down to a symbol. */
7120 if (GET_CODE (x) == LABEL_REF)
7122 if (GET_CODE (x) != SYMBOL_REF)
7127 /* TLS symbols are never valid. */
7128 if (SYMBOL_REF_TLS_MODEL (x))
7131 /* DLLIMPORT symbols are never valid. */
7132 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7133 && SYMBOL_REF_DLLIMPORT_P (x))
7138 if (GET_MODE (x) == TImode
7139 && x != CONST0_RTX (TImode)
7145 if (x == CONST0_RTX (GET_MODE (x)))
7153 /* Otherwise we handle everything else in the move patterns. */
7157 /* Determine if it's legal to put X into the constant pool. This
7158 is not possible for the address of thread-local symbols, which
7159 is checked above. */
7162 ix86_cannot_force_const_mem (rtx x)
7164 /* We can always put integral constants and vectors in memory. */
7165 switch (GET_CODE (x))
7175 return !legitimate_constant_p (x);
7178 /* Determine if a given RTX is a valid constant address. */
7181 constant_address_p (rtx x)
7183 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7186 /* Nonzero if the constant value X is a legitimate general operand
7187 when generating PIC code. It is given that flag_pic is on and
7188 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7191 legitimate_pic_operand_p (rtx x)
7195 switch (GET_CODE (x))
7198 inner = XEXP (x, 0);
7199 if (GET_CODE (inner) == PLUS
7200 && CONST_INT_P (XEXP (inner, 1)))
7201 inner = XEXP (inner, 0);
7203 /* Only some unspecs are valid as "constants". */
7204 if (GET_CODE (inner) == UNSPEC)
7205 switch (XINT (inner, 1))
7210 return TARGET_64BIT;
7212 x = XVECEXP (inner, 0, 0);
7213 return (GET_CODE (x) == SYMBOL_REF
7214 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7222 return legitimate_pic_address_disp_p (x);
7229 /* Determine if a given CONST RTX is a valid memory displacement
7233 legitimate_pic_address_disp_p (rtx disp)
7237 /* In 64bit mode we can allow direct addresses of symbols and labels
7238 when they are not dynamic symbols. */
7241 rtx op0 = disp, op1;
7243 switch (GET_CODE (disp))
7249 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7251 op0 = XEXP (XEXP (disp, 0), 0);
7252 op1 = XEXP (XEXP (disp, 0), 1);
7253 if (!CONST_INT_P (op1)
7254 || INTVAL (op1) >= 16*1024*1024
7255 || INTVAL (op1) < -16*1024*1024)
7257 if (GET_CODE (op0) == LABEL_REF)
7259 if (GET_CODE (op0) != SYMBOL_REF)
7264 /* TLS references should always be enclosed in UNSPEC. */
7265 if (SYMBOL_REF_TLS_MODEL (op0))
7267 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7268 && ix86_cmodel != CM_LARGE_PIC)
7276 if (GET_CODE (disp) != CONST)
7278 disp = XEXP (disp, 0);
7282 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7283 of GOT tables. We should not need these anyway. */
7284 if (GET_CODE (disp) != UNSPEC
7285 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7286 && XINT (disp, 1) != UNSPEC_GOTOFF
7287 && XINT (disp, 1) != UNSPEC_PLTOFF))
7290 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7291 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7297 if (GET_CODE (disp) == PLUS)
7299 if (!CONST_INT_P (XEXP (disp, 1)))
7301 disp = XEXP (disp, 0);
7305 if (TARGET_MACHO && darwin_local_data_pic (disp))
7308 if (GET_CODE (disp) != UNSPEC)
7311 switch (XINT (disp, 1))
7316 /* We need to check for both symbols and labels because VxWorks loads
7317 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7319 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7320 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7322 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7323 While ABI specify also 32bit relocation but we don't produce it in
7324 small PIC model at all. */
7325 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7326 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7328 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7330 case UNSPEC_GOTTPOFF:
7331 case UNSPEC_GOTNTPOFF:
7332 case UNSPEC_INDNTPOFF:
7335 disp = XVECEXP (disp, 0, 0);
7336 return (GET_CODE (disp) == SYMBOL_REF
7337 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7339 disp = XVECEXP (disp, 0, 0);
7340 return (GET_CODE (disp) == SYMBOL_REF
7341 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7343 disp = XVECEXP (disp, 0, 0);
7344 return (GET_CODE (disp) == SYMBOL_REF
7345 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7351 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7352 memory address for an instruction. The MODE argument is the machine mode
7353 for the MEM expression that wants to use this address.
7355 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7356 convert common non-canonical forms to canonical form so that they will
7360 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7361 rtx addr, int strict)
7363 struct ix86_address parts;
7364 rtx base, index, disp;
7365 HOST_WIDE_INT scale;
7366 const char *reason = NULL;
7367 rtx reason_rtx = NULL_RTX;
7369 if (ix86_decompose_address (addr, &parts) <= 0)
7371 reason = "decomposition failed";
7376 index = parts.index;
7378 scale = parts.scale;
7380 /* Validate base register.
7382 Don't allow SUBREG's that span more than a word here. It can lead to spill
7383 failures when the base is one word out of a two word structure, which is
7384 represented internally as a DImode int. */
7393 else if (GET_CODE (base) == SUBREG
7394 && REG_P (SUBREG_REG (base))
7395 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7397 reg = SUBREG_REG (base);
7400 reason = "base is not a register";
7404 if (GET_MODE (base) != Pmode)
7406 reason = "base is not in Pmode";
7410 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7411 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7413 reason = "base is not valid";
7418 /* Validate index register.
7420 Don't allow SUBREG's that span more than a word here -- same as above. */
7429 else if (GET_CODE (index) == SUBREG
7430 && REG_P (SUBREG_REG (index))
7431 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7433 reg = SUBREG_REG (index);
7436 reason = "index is not a register";
7440 if (GET_MODE (index) != Pmode)
7442 reason = "index is not in Pmode";
7446 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7447 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7449 reason = "index is not valid";
7454 /* Validate scale factor. */
7457 reason_rtx = GEN_INT (scale);
7460 reason = "scale without index";
7464 if (scale != 2 && scale != 4 && scale != 8)
7466 reason = "scale is not a valid multiplier";
7471 /* Validate displacement. */
7476 if (GET_CODE (disp) == CONST
7477 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7478 switch (XINT (XEXP (disp, 0), 1))
7480 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7481 used. While ABI specify also 32bit relocations, we don't produce
7482 them at all and use IP relative instead. */
7485 gcc_assert (flag_pic);
7487 goto is_legitimate_pic;
7488 reason = "64bit address unspec";
7491 case UNSPEC_GOTPCREL:
7492 gcc_assert (flag_pic);
7493 goto is_legitimate_pic;
7495 case UNSPEC_GOTTPOFF:
7496 case UNSPEC_GOTNTPOFF:
7497 case UNSPEC_INDNTPOFF:
7503 reason = "invalid address unspec";
7507 else if (SYMBOLIC_CONST (disp)
7511 && MACHOPIC_INDIRECT
7512 && !machopic_operand_p (disp)
7518 if (TARGET_64BIT && (index || base))
7520 /* foo@dtpoff(%rX) is ok. */
7521 if (GET_CODE (disp) != CONST
7522 || GET_CODE (XEXP (disp, 0)) != PLUS
7523 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7524 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7525 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7526 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7528 reason = "non-constant pic memory reference";
7532 else if (! legitimate_pic_address_disp_p (disp))
7534 reason = "displacement is an invalid pic construct";
7538 /* This code used to verify that a symbolic pic displacement
7539 includes the pic_offset_table_rtx register.
7541 While this is good idea, unfortunately these constructs may
7542 be created by "adds using lea" optimization for incorrect
7551 This code is nonsensical, but results in addressing
7552 GOT table with pic_offset_table_rtx base. We can't
7553 just refuse it easily, since it gets matched by
7554 "addsi3" pattern, that later gets split to lea in the
7555 case output register differs from input. While this
7556 can be handled by separate addsi pattern for this case
7557 that never results in lea, this seems to be easier and
7558 correct fix for crash to disable this test. */
7560 else if (GET_CODE (disp) != LABEL_REF
7561 && !CONST_INT_P (disp)
7562 && (GET_CODE (disp) != CONST
7563 || !legitimate_constant_p (disp))
7564 && (GET_CODE (disp) != SYMBOL_REF
7565 || !legitimate_constant_p (disp)))
7567 reason = "displacement is not constant";
7570 else if (TARGET_64BIT
7571 && !x86_64_immediate_operand (disp, VOIDmode))
7573 reason = "displacement is out of range";
7578 /* Everything looks valid. */
7585 /* Return a unique alias set for the GOT. */
7587 static alias_set_type
7588 ix86_GOT_alias_set (void)
7590 static alias_set_type set = -1;
7592 set = new_alias_set ();
7596 /* Return a legitimate reference for ORIG (an address) using the
7597 register REG. If REG is 0, a new pseudo is generated.
7599 There are two types of references that must be handled:
7601 1. Global data references must load the address from the GOT, via
7602 the PIC reg. An insn is emitted to do this load, and the reg is
7605 2. Static data references, constant pool addresses, and code labels
7606 compute the address as an offset from the GOT, whose base is in
7607 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7608 differentiate them from global data objects. The returned
7609 address is the PIC reg + an unspec constant.
7611 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7612 reg also appears in the address. */
7615 legitimize_pic_address (rtx orig, rtx reg)
7622 if (TARGET_MACHO && !TARGET_64BIT)
7625 reg = gen_reg_rtx (Pmode);
7626 /* Use the generic Mach-O PIC machinery. */
7627 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7631 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7633 else if (TARGET_64BIT
7634 && ix86_cmodel != CM_SMALL_PIC
7635 && gotoff_operand (addr, Pmode))
7638 /* This symbol may be referenced via a displacement from the PIC
7639 base address (@GOTOFF). */
7641 if (reload_in_progress)
7642 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7643 if (GET_CODE (addr) == CONST)
7644 addr = XEXP (addr, 0);
7645 if (GET_CODE (addr) == PLUS)
7647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7649 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7655 tmpreg = gen_reg_rtx (Pmode);
7658 emit_move_insn (tmpreg, new_rtx);
7662 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7663 tmpreg, 1, OPTAB_DIRECT);
7666 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7668 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7670 /* This symbol may be referenced via a displacement from the PIC
7671 base address (@GOTOFF). */
7673 if (reload_in_progress)
7674 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7675 if (GET_CODE (addr) == CONST)
7676 addr = XEXP (addr, 0);
7677 if (GET_CODE (addr) == PLUS)
7679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7681 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7684 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7685 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7690 emit_move_insn (reg, new_rtx);
7694 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7695 /* We can't use @GOTOFF for text labels on VxWorks;
7696 see gotoff_operand. */
7697 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7701 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7702 return legitimize_dllimport_symbol (addr, true);
7703 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7704 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7705 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7707 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7708 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7712 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7716 new_rtx = gen_const_mem (Pmode, new_rtx);
7717 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7720 reg = gen_reg_rtx (Pmode);
7721 /* Use directly gen_movsi, otherwise the address is loaded
7722 into register for CSE. We don't want to CSE this addresses,
7723 instead we CSE addresses from the GOT table, so skip this. */
7724 emit_insn (gen_movsi (reg, new_rtx));
7729 /* This symbol must be referenced via a load from the
7730 Global Offset Table (@GOT). */
7732 if (reload_in_progress)
7733 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7735 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7737 new_rtx = force_reg (Pmode, new_rtx);
7738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7739 new_rtx = gen_const_mem (Pmode, new_rtx);
7740 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7743 reg = gen_reg_rtx (Pmode);
7744 emit_move_insn (reg, new_rtx);
7750 if (CONST_INT_P (addr)
7751 && !x86_64_immediate_operand (addr, VOIDmode))
7755 emit_move_insn (reg, addr);
7759 new_rtx = force_reg (Pmode, addr);
7761 else if (GET_CODE (addr) == CONST)
7763 addr = XEXP (addr, 0);
7765 /* We must match stuff we generate before. Assume the only
7766 unspecs that can get here are ours. Not that we could do
7767 anything with them anyway.... */
7768 if (GET_CODE (addr) == UNSPEC
7769 || (GET_CODE (addr) == PLUS
7770 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7772 gcc_assert (GET_CODE (addr) == PLUS);
7774 if (GET_CODE (addr) == PLUS)
7776 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7778 /* Check first to see if this is a constant offset from a @GOTOFF
7779 symbol reference. */
7780 if (gotoff_operand (op0, Pmode)
7781 && CONST_INT_P (op1))
7785 if (reload_in_progress)
7786 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7789 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7790 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7795 emit_move_insn (reg, new_rtx);
7801 if (INTVAL (op1) < -16*1024*1024
7802 || INTVAL (op1) >= 16*1024*1024)
7804 if (!x86_64_immediate_operand (op1, Pmode))
7805 op1 = force_reg (Pmode, op1);
7806 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7812 base = legitimize_pic_address (XEXP (addr, 0), reg);
7813 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7814 base == reg ? NULL_RTX : reg);
7816 if (CONST_INT_P (new_rtx))
7817 new_rtx = plus_constant (base, INTVAL (new_rtx));
7820 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7822 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7823 new_rtx = XEXP (new_rtx, 1);
7825 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7833 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7836 get_thread_pointer (int to_reg)
7840 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7844 reg = gen_reg_rtx (Pmode);
7845 insn = gen_rtx_SET (VOIDmode, reg, tp);
7846 insn = emit_insn (insn);
7851 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7852 false if we expect this to be used for a memory address and true if
7853 we expect to load the address into a register. */
7856 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7858 rtx dest, base, off, pic, tp;
7863 case TLS_MODEL_GLOBAL_DYNAMIC:
7864 dest = gen_reg_rtx (Pmode);
7865 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7867 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7869 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7872 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7873 insns = get_insns ();
7876 RTL_CONST_CALL_P (insns) = 1;
7877 emit_libcall_block (insns, dest, rax, x);
7879 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7880 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7882 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7884 if (TARGET_GNU2_TLS)
7886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7892 case TLS_MODEL_LOCAL_DYNAMIC:
7893 base = gen_reg_rtx (Pmode);
7894 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7896 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7898 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7901 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7902 insns = get_insns ();
7905 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7906 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7907 RTL_CONST_CALL_P (insns) = 1;
7908 emit_libcall_block (insns, base, rax, note);
7910 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7911 emit_insn (gen_tls_local_dynamic_base_64 (base));
7913 emit_insn (gen_tls_local_dynamic_base_32 (base));
7915 if (TARGET_GNU2_TLS)
7917 rtx x = ix86_tls_module_base ();
7919 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7920 gen_rtx_MINUS (Pmode, x, tp));
7923 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7924 off = gen_rtx_CONST (Pmode, off);
7926 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7928 if (TARGET_GNU2_TLS)
7930 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7932 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7937 case TLS_MODEL_INITIAL_EXEC:
7941 type = UNSPEC_GOTNTPOFF;
7945 if (reload_in_progress)
7946 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7947 pic = pic_offset_table_rtx;
7948 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7950 else if (!TARGET_ANY_GNU_TLS)
7952 pic = gen_reg_rtx (Pmode);
7953 emit_insn (gen_set_got (pic));
7954 type = UNSPEC_GOTTPOFF;
7959 type = UNSPEC_INDNTPOFF;
7962 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7963 off = gen_rtx_CONST (Pmode, off);
7965 off = gen_rtx_PLUS (Pmode, pic, off);
7966 off = gen_const_mem (Pmode, off);
7967 set_mem_alias_set (off, ix86_GOT_alias_set ());
7969 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7971 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7972 off = force_reg (Pmode, off);
7973 return gen_rtx_PLUS (Pmode, base, off);
7977 base = get_thread_pointer (true);
7978 dest = gen_reg_rtx (Pmode);
7979 emit_insn (gen_subsi3 (dest, base, off));
7983 case TLS_MODEL_LOCAL_EXEC:
7984 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7985 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7986 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7987 off = gen_rtx_CONST (Pmode, off);
7989 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7991 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7992 return gen_rtx_PLUS (Pmode, base, off);
7996 base = get_thread_pointer (true);
7997 dest = gen_reg_rtx (Pmode);
7998 emit_insn (gen_subsi3 (dest, base, off));
8009 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8012 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8013 htab_t dllimport_map;
8016 get_dllimport_decl (tree decl)
8018 struct tree_map *h, in;
8022 size_t namelen, prefixlen;
8028 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8030 in.hash = htab_hash_pointer (decl);
8031 in.base.from = decl;
8032 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8033 h = (struct tree_map *) *loc;
8037 *loc = h = GGC_NEW (struct tree_map);
8039 h->base.from = decl;
8040 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8041 DECL_ARTIFICIAL (to) = 1;
8042 DECL_IGNORED_P (to) = 1;
8043 DECL_EXTERNAL (to) = 1;
8044 TREE_READONLY (to) = 1;
8046 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8047 name = targetm.strip_name_encoding (name);
8048 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8049 namelen = strlen (name);
8050 prefixlen = strlen (prefix);
8051 imp_name = (char *) alloca (namelen + prefixlen + 1);
8052 memcpy (imp_name, prefix, prefixlen);
8053 memcpy (imp_name + prefixlen, name, namelen + 1);
8055 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8056 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8057 SET_SYMBOL_REF_DECL (rtl, to);
8058 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8060 rtl = gen_const_mem (Pmode, rtl);
8061 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8063 SET_DECL_RTL (to, rtl);
8064 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8069 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8070 true if we require the result be a register. */
8073 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8078 gcc_assert (SYMBOL_REF_DECL (symbol));
8079 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8081 x = DECL_RTL (imp_decl);
8083 x = force_reg (Pmode, x);
8087 /* Try machine-dependent ways of modifying an illegitimate address
8088 to be legitimate. If we find one, return the new, valid address.
8089 This macro is used in only one place: `memory_address' in explow.c.
8091 OLDX is the address as it was before break_out_memory_refs was called.
8092 In some cases it is useful to look at this to decide what needs to be done.
8094 MODE and WIN are passed so that this macro can use
8095 GO_IF_LEGITIMATE_ADDRESS.
8097 It is always safe for this macro to do nothing. It exists to recognize
8098 opportunities to optimize the output.
8100 For the 80386, we handle X+REG by loading X into a register R and
8101 using R+REG. R will go in a general reg and indexing will be used.
8102 However, if REG is a broken-out memory address or multiplication,
8103 nothing needs to be done because REG can certainly go in a general reg.
8105 When -fpic is used, special handling is needed for symbolic references.
8106 See comments by legitimize_pic_address in i386.c for details. */
8109 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8114 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8116 return legitimize_tls_address (x, (enum tls_model) log, false);
8117 if (GET_CODE (x) == CONST
8118 && GET_CODE (XEXP (x, 0)) == PLUS
8119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8120 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8122 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8123 (enum tls_model) log, false);
8124 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8127 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8129 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8130 return legitimize_dllimport_symbol (x, true);
8131 if (GET_CODE (x) == CONST
8132 && GET_CODE (XEXP (x, 0)) == PLUS
8133 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8134 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8136 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8137 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8141 if (flag_pic && SYMBOLIC_CONST (x))
8142 return legitimize_pic_address (x, 0);
8144 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8145 if (GET_CODE (x) == ASHIFT
8146 && CONST_INT_P (XEXP (x, 1))
8147 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8150 log = INTVAL (XEXP (x, 1));
8151 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8152 GEN_INT (1 << log));
8155 if (GET_CODE (x) == PLUS)
8157 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8159 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8160 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8161 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8164 log = INTVAL (XEXP (XEXP (x, 0), 1));
8165 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8166 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8167 GEN_INT (1 << log));
8170 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8171 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8172 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8175 log = INTVAL (XEXP (XEXP (x, 1), 1));
8176 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8177 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8178 GEN_INT (1 << log));
8181 /* Put multiply first if it isn't already. */
8182 if (GET_CODE (XEXP (x, 1)) == MULT)
8184 rtx tmp = XEXP (x, 0);
8185 XEXP (x, 0) = XEXP (x, 1);
8190 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8191 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8192 created by virtual register instantiation, register elimination, and
8193 similar optimizations. */
8194 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8197 x = gen_rtx_PLUS (Pmode,
8198 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8199 XEXP (XEXP (x, 1), 0)),
8200 XEXP (XEXP (x, 1), 1));
8204 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8205 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8206 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8207 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8208 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8209 && CONSTANT_P (XEXP (x, 1)))
8212 rtx other = NULL_RTX;
8214 if (CONST_INT_P (XEXP (x, 1)))
8216 constant = XEXP (x, 1);
8217 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8219 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8221 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8222 other = XEXP (x, 1);
8230 x = gen_rtx_PLUS (Pmode,
8231 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8232 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8233 plus_constant (other, INTVAL (constant)));
8237 if (changed && legitimate_address_p (mode, x, FALSE))
8240 if (GET_CODE (XEXP (x, 0)) == MULT)
8243 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8246 if (GET_CODE (XEXP (x, 1)) == MULT)
8249 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8253 && REG_P (XEXP (x, 1))
8254 && REG_P (XEXP (x, 0)))
8257 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8260 x = legitimize_pic_address (x, 0);
8263 if (changed && legitimate_address_p (mode, x, FALSE))
8266 if (REG_P (XEXP (x, 0)))
8268 rtx temp = gen_reg_rtx (Pmode);
8269 rtx val = force_operand (XEXP (x, 1), temp);
8271 emit_move_insn (temp, val);
8277 else if (REG_P (XEXP (x, 1)))
8279 rtx temp = gen_reg_rtx (Pmode);
8280 rtx val = force_operand (XEXP (x, 0), temp);
8282 emit_move_insn (temp, val);
8292 /* Print an integer constant expression in assembler syntax. Addition
8293 and subtraction are the only arithmetic that may appear in these
8294 expressions. FILE is the stdio stream to write to, X is the rtx, and
8295 CODE is the operand print code from the output string. */
8298 output_pic_addr_const (FILE *file, rtx x, int code)
8302 switch (GET_CODE (x))
8305 gcc_assert (flag_pic);
8310 if (! TARGET_MACHO || TARGET_64BIT)
8311 output_addr_const (file, x);
8314 const char *name = XSTR (x, 0);
8316 /* Mark the decl as referenced so that cgraph will
8317 output the function. */
8318 if (SYMBOL_REF_DECL (x))
8319 mark_decl_referenced (SYMBOL_REF_DECL (x));
8322 if (MACHOPIC_INDIRECT
8323 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8324 name = machopic_indirection_name (x, /*stub_p=*/true);
8326 assemble_name (file, name);
8328 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8329 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8330 fputs ("@PLT", file);
8337 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8338 assemble_name (asm_out_file, buf);
8342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8346 /* This used to output parentheses around the expression,
8347 but that does not work on the 386 (either ATT or BSD assembler). */
8348 output_pic_addr_const (file, XEXP (x, 0), code);
8352 if (GET_MODE (x) == VOIDmode)
8354 /* We can use %d if the number is <32 bits and positive. */
8355 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8356 fprintf (file, "0x%lx%08lx",
8357 (unsigned long) CONST_DOUBLE_HIGH (x),
8358 (unsigned long) CONST_DOUBLE_LOW (x));
8360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8363 /* We can't handle floating point constants;
8364 PRINT_OPERAND must handle them. */
8365 output_operand_lossage ("floating constant misused");
8369 /* Some assemblers need integer constants to appear first. */
8370 if (CONST_INT_P (XEXP (x, 0)))
8372 output_pic_addr_const (file, XEXP (x, 0), code);
8374 output_pic_addr_const (file, XEXP (x, 1), code);
8378 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8379 output_pic_addr_const (file, XEXP (x, 1), code);
8381 output_pic_addr_const (file, XEXP (x, 0), code);
8387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8388 output_pic_addr_const (file, XEXP (x, 0), code);
8390 output_pic_addr_const (file, XEXP (x, 1), code);
8392 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8396 gcc_assert (XVECLEN (x, 0) == 1);
8397 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8398 switch (XINT (x, 1))
8401 fputs ("@GOT", file);
8404 fputs ("@GOTOFF", file);
8407 fputs ("@PLTOFF", file);
8409 case UNSPEC_GOTPCREL:
8410 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8411 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8413 case UNSPEC_GOTTPOFF:
8414 /* FIXME: This might be @TPOFF in Sun ld too. */
8415 fputs ("@GOTTPOFF", file);
8418 fputs ("@TPOFF", file);
8422 fputs ("@TPOFF", file);
8424 fputs ("@NTPOFF", file);
8427 fputs ("@DTPOFF", file);
8429 case UNSPEC_GOTNTPOFF:
8431 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8432 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8434 fputs ("@GOTNTPOFF", file);
8436 case UNSPEC_INDNTPOFF:
8437 fputs ("@INDNTPOFF", file);
8440 output_operand_lossage ("invalid UNSPEC as operand");
8446 output_operand_lossage ("invalid expression as operand");
8450 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8451 We need to emit DTP-relative relocations. */
8453 static void ATTRIBUTE_UNUSED
8454 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8456 fputs (ASM_LONG, file);
8457 output_addr_const (file, x);
8458 fputs ("@DTPOFF", file);
8464 fputs (", 0", file);
8471 /* In the name of slightly smaller debug output, and to cater to
8472 general assembler lossage, recognize PIC+GOTOFF and turn it back
8473 into a direct symbol reference.
8475 On Darwin, this is necessary to avoid a crash, because Darwin
8476 has a different PIC label for each routine but the DWARF debugging
8477 information is not associated with any particular routine, so it's
8478 necessary to remove references to the PIC label from RTL stored by
8479 the DWARF output code. */
8482 ix86_delegitimize_address (rtx orig_x)
8485 /* reg_addend is NULL or a multiple of some register. */
8486 rtx reg_addend = NULL_RTX;
8487 /* const_addend is NULL or a const_int. */
8488 rtx const_addend = NULL_RTX;
8489 /* This is the result, or NULL. */
8490 rtx result = NULL_RTX;
8497 if (GET_CODE (x) != CONST
8498 || GET_CODE (XEXP (x, 0)) != UNSPEC
8499 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8502 return XVECEXP (XEXP (x, 0), 0, 0);
8505 if (GET_CODE (x) != PLUS
8506 || GET_CODE (XEXP (x, 1)) != CONST)
8509 if (REG_P (XEXP (x, 0))
8510 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8511 /* %ebx + GOT/GOTOFF */
8513 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8515 /* %ebx + %reg * scale + GOT/GOTOFF */
8516 reg_addend = XEXP (x, 0);
8517 if (REG_P (XEXP (reg_addend, 0))
8518 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8519 reg_addend = XEXP (reg_addend, 1);
8520 else if (REG_P (XEXP (reg_addend, 1))
8521 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8522 reg_addend = XEXP (reg_addend, 0);
8525 if (!REG_P (reg_addend)
8526 && GET_CODE (reg_addend) != MULT
8527 && GET_CODE (reg_addend) != ASHIFT)
8533 x = XEXP (XEXP (x, 1), 0);
8534 if (GET_CODE (x) == PLUS
8535 && CONST_INT_P (XEXP (x, 1)))
8537 const_addend = XEXP (x, 1);
8541 if (GET_CODE (x) == UNSPEC
8542 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8543 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8544 result = XVECEXP (x, 0, 0);
8546 if (TARGET_MACHO && darwin_local_data_pic (x)
8548 result = XEXP (x, 0);
8554 result = gen_rtx_PLUS (Pmode, result, const_addend);
8556 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8560 /* If X is a machine specific address (i.e. a symbol or label being
8561 referenced as a displacement from the GOT implemented using an
8562 UNSPEC), then return the base term. Otherwise return X. */
8565 ix86_find_base_term (rtx x)
8571 if (GET_CODE (x) != CONST)
8574 if (GET_CODE (term) == PLUS
8575 && (CONST_INT_P (XEXP (term, 1))
8576 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8577 term = XEXP (term, 0);
8578 if (GET_CODE (term) != UNSPEC
8579 || XINT (term, 1) != UNSPEC_GOTPCREL)
8582 term = XVECEXP (term, 0, 0);
8584 if (GET_CODE (term) != SYMBOL_REF
8585 && GET_CODE (term) != LABEL_REF)
8591 term = ix86_delegitimize_address (x);
8593 if (GET_CODE (term) != SYMBOL_REF
8594 && GET_CODE (term) != LABEL_REF)
8601 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8606 if (mode == CCFPmode || mode == CCFPUmode)
8608 enum rtx_code second_code, bypass_code;
8609 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8610 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8611 code = ix86_fp_compare_code_to_integer (code);
8615 code = reverse_condition (code);
8666 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8670 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8671 Those same assemblers have the same but opposite lossage on cmov. */
8673 suffix = fp ? "nbe" : "a";
8674 else if (mode == CCCmode)
8697 gcc_assert (mode == CCmode || mode == CCCmode);
8719 gcc_assert (mode == CCmode || mode == CCCmode);
8720 suffix = fp ? "nb" : "ae";
8723 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8730 else if (mode == CCCmode)
8731 suffix = fp ? "nb" : "ae";
8736 suffix = fp ? "u" : "p";
8739 suffix = fp ? "nu" : "np";
8744 fputs (suffix, file);
8747 /* Print the name of register X to FILE based on its machine mode and number.
8748 If CODE is 'w', pretend the mode is HImode.
8749 If CODE is 'b', pretend the mode is QImode.
8750 If CODE is 'k', pretend the mode is SImode.
8751 If CODE is 'q', pretend the mode is DImode.
8752 If CODE is 'h', pretend the reg is the 'high' byte register.
8753 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8756 print_reg (rtx x, int code, FILE *file)
8758 gcc_assert (x == pc_rtx
8759 || (REGNO (x) != ARG_POINTER_REGNUM
8760 && REGNO (x) != FRAME_POINTER_REGNUM
8761 && REGNO (x) != FLAGS_REG
8762 && REGNO (x) != FPSR_REG
8763 && REGNO (x) != FPCR_REG));
8765 if (ASSEMBLER_DIALECT == ASM_ATT)
8770 gcc_assert (TARGET_64BIT);
8771 fputs ("rip", file);
8775 if (code == 'w' || MMX_REG_P (x))
8777 else if (code == 'b')
8779 else if (code == 'k')
8781 else if (code == 'q')
8783 else if (code == 'y')
8785 else if (code == 'h')
8788 code = GET_MODE_SIZE (GET_MODE (x));
8790 /* Irritatingly, AMD extended registers use different naming convention
8791 from the normal registers. */
8792 if (REX_INT_REG_P (x))
8794 gcc_assert (TARGET_64BIT);
8798 error ("extended registers have no high halves");
8801 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8804 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8807 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8810 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8813 error ("unsupported operand size for extended register");
8821 if (STACK_TOP_P (x))
8823 fputs ("st(0)", file);
8830 if (! ANY_FP_REG_P (x))
8831 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8836 fputs (hi_reg_name[REGNO (x)], file);
8839 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8841 fputs (qi_reg_name[REGNO (x)], file);
8844 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8846 fputs (qi_high_reg_name[REGNO (x)], file);
8853 /* Locate some local-dynamic symbol still in use by this function
8854 so that we can print its name in some tls_local_dynamic_base
8858 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8862 if (GET_CODE (x) == SYMBOL_REF
8863 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8865 cfun->machine->some_ld_name = XSTR (x, 0);
8873 get_some_local_dynamic_name (void)
8877 if (cfun->machine->some_ld_name)
8878 return cfun->machine->some_ld_name;
8880 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8882 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8883 return cfun->machine->some_ld_name;
8889 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8890 C -- print opcode suffix for set/cmov insn.
8891 c -- like C, but print reversed condition
8892 F,f -- likewise, but for floating-point.
8893 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8895 R -- print the prefix for register names.
8896 z -- print the opcode suffix for the size of the current operand.
8897 * -- print a star (in certain assembler syntax)
8898 A -- print an absolute memory reference.
8899 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8900 s -- print a shift double count, followed by the assemblers argument
8902 b -- print the QImode name of the register for the indicated operand.
8903 %b0 would print %al if operands[0] is reg 0.
8904 w -- likewise, print the HImode name of the register.
8905 k -- likewise, print the SImode name of the register.
8906 q -- likewise, print the DImode name of the register.
8907 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8908 y -- print "st(0)" instead of "st" as a register.
8909 D -- print condition for SSE cmp instruction.
8910 P -- if PIC, print an @PLT suffix.
8911 X -- don't print any sort of PIC '@' suffix for a symbol.
8912 & -- print some in-use local-dynamic symbol name.
8913 H -- print a memory address offset by 8; used for sse high-parts
8914 Y -- print condition for SSE5 com* instruction.
8915 + -- print a branch hint as 'cs' or 'ds' prefix
8916 ; -- print a semicolon (after prefixes due to bug in older gas).
8920 print_operand (FILE *file, rtx x, int code)
8927 if (ASSEMBLER_DIALECT == ASM_ATT)
8932 assemble_name (file, get_some_local_dynamic_name ());
8936 switch (ASSEMBLER_DIALECT)
8943 /* Intel syntax. For absolute addresses, registers should not
8944 be surrounded by braces. */
8948 PRINT_OPERAND (file, x, 0);
8958 PRINT_OPERAND (file, x, 0);
8963 if (ASSEMBLER_DIALECT == ASM_ATT)
8968 if (ASSEMBLER_DIALECT == ASM_ATT)
8973 if (ASSEMBLER_DIALECT == ASM_ATT)
8978 if (ASSEMBLER_DIALECT == ASM_ATT)
8983 if (ASSEMBLER_DIALECT == ASM_ATT)
8988 if (ASSEMBLER_DIALECT == ASM_ATT)
8993 /* 387 opcodes don't get size suffixes if the operands are
8995 if (STACK_REG_P (x))
8998 /* Likewise if using Intel opcodes. */
8999 if (ASSEMBLER_DIALECT == ASM_INTEL)
9002 /* This is the size of op from size of operand. */
9003 switch (GET_MODE_SIZE (GET_MODE (x)))
9012 #ifdef HAVE_GAS_FILDS_FISTS
9022 if (GET_MODE (x) == SFmode)
9037 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9039 #ifdef GAS_MNEMONICS
9065 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9067 PRINT_OPERAND (file, x, 0);
9073 /* Little bit of braindamage here. The SSE compare instructions
9074 does use completely different names for the comparisons that the
9075 fp conditional moves. */
9076 switch (GET_CODE (x))
9091 fputs ("unord", file);
9095 fputs ("neq", file);
9099 fputs ("nlt", file);
9103 fputs ("nle", file);
9106 fputs ("ord", file);
9113 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9114 if (ASSEMBLER_DIALECT == ASM_ATT)
9116 switch (GET_MODE (x))
9118 case HImode: putc ('w', file); break;
9120 case SFmode: putc ('l', file); break;
9122 case DFmode: putc ('q', file); break;
9123 default: gcc_unreachable ();
9130 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9133 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9134 if (ASSEMBLER_DIALECT == ASM_ATT)
9137 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9140 /* Like above, but reverse condition */
9142 /* Check to see if argument to %c is really a constant
9143 and not a condition code which needs to be reversed. */
9144 if (!COMPARISON_P (x))
9146 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9149 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9152 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9153 if (ASSEMBLER_DIALECT == ASM_ATT)
9156 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9160 /* It doesn't actually matter what mode we use here, as we're
9161 only going to use this for printing. */
9162 x = adjust_address_nv (x, DImode, 8);
9169 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9172 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9175 int pred_val = INTVAL (XEXP (x, 0));
9177 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9178 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9180 int taken = pred_val > REG_BR_PROB_BASE / 2;
9181 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9183 /* Emit hints only in the case default branch prediction
9184 heuristics would fail. */
9185 if (taken != cputaken)
9187 /* We use 3e (DS) prefix for taken branches and
9188 2e (CS) prefix for not taken branches. */
9190 fputs ("ds ; ", file);
9192 fputs ("cs ; ", file);
9200 switch (GET_CODE (x))
9203 fputs ("neq", file);
9210 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9214 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9225 fputs ("unord", file);
9228 fputs ("ord", file);
9231 fputs ("ueq", file);
9234 fputs ("nlt", file);
9237 fputs ("nle", file);
9240 fputs ("ule", file);
9243 fputs ("ult", file);
9246 fputs ("une", file);
9255 fputs (" ; ", file);
9262 output_operand_lossage ("invalid operand code '%c'", code);
9267 print_reg (x, code, file);
9271 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9272 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9273 && GET_MODE (x) != BLKmode)
9276 switch (GET_MODE_SIZE (GET_MODE (x)))
9278 case 1: size = "BYTE"; break;
9279 case 2: size = "WORD"; break;
9280 case 4: size = "DWORD"; break;
9281 case 8: size = "QWORD"; break;
9282 case 12: size = "XWORD"; break;
9284 if (GET_MODE (x) == XFmode)
9293 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9296 else if (code == 'w')
9298 else if (code == 'k')
9302 fputs (" PTR ", file);
9306 /* Avoid (%rip) for call operands. */
9307 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9308 && !CONST_INT_P (x))
9309 output_addr_const (file, x);
9310 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9311 output_operand_lossage ("invalid constraints for operand");
9316 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9321 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9322 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9324 if (ASSEMBLER_DIALECT == ASM_ATT)
9326 fprintf (file, "0x%08lx", (long unsigned int) l);
9329 /* These float cases don't actually occur as immediate operands. */
9330 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9334 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9335 fprintf (file, "%s", dstr);
9338 else if (GET_CODE (x) == CONST_DOUBLE
9339 && GET_MODE (x) == XFmode)
9343 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9344 fprintf (file, "%s", dstr);
9349 /* We have patterns that allow zero sets of memory, for instance.
9350 In 64-bit mode, we should probably support all 8-byte vectors,
9351 since we can in fact encode that into an immediate. */
9352 if (GET_CODE (x) == CONST_VECTOR)
9354 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9360 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9362 if (ASSEMBLER_DIALECT == ASM_ATT)
9365 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9366 || GET_CODE (x) == LABEL_REF)
9368 if (ASSEMBLER_DIALECT == ASM_ATT)
9371 fputs ("OFFSET FLAT:", file);
9374 if (CONST_INT_P (x))
9375 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9377 output_pic_addr_const (file, x, code);
9379 output_addr_const (file, x);
9383 /* Print a memory operand whose address is ADDR. */
9386 print_operand_address (FILE *file, rtx addr)
9388 struct ix86_address parts;
9389 rtx base, index, disp;
9391 int ok = ix86_decompose_address (addr, &parts);
9396 index = parts.index;
9398 scale = parts.scale;
9406 if (ASSEMBLER_DIALECT == ASM_ATT)
9408 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9414 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9415 if (TARGET_64BIT && !base && !index)
9419 if (GET_CODE (disp) == CONST
9420 && GET_CODE (XEXP (disp, 0)) == PLUS
9421 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9422 symbol = XEXP (XEXP (disp, 0), 0);
9424 if (GET_CODE (symbol) == LABEL_REF
9425 || (GET_CODE (symbol) == SYMBOL_REF
9426 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9429 if (!base && !index)
9431 /* Displacement only requires special attention. */
9433 if (CONST_INT_P (disp))
9435 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9436 fputs ("ds:", file);
9437 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9440 output_pic_addr_const (file, disp, 0);
9442 output_addr_const (file, disp);
9446 if (ASSEMBLER_DIALECT == ASM_ATT)
9451 output_pic_addr_const (file, disp, 0);
9452 else if (GET_CODE (disp) == LABEL_REF)
9453 output_asm_label (disp);
9455 output_addr_const (file, disp);
9460 print_reg (base, 0, file);
9464 print_reg (index, 0, file);
9466 fprintf (file, ",%d", scale);
9472 rtx offset = NULL_RTX;
9476 /* Pull out the offset of a symbol; print any symbol itself. */
9477 if (GET_CODE (disp) == CONST
9478 && GET_CODE (XEXP (disp, 0)) == PLUS
9479 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9481 offset = XEXP (XEXP (disp, 0), 1);
9482 disp = gen_rtx_CONST (VOIDmode,
9483 XEXP (XEXP (disp, 0), 0));
9487 output_pic_addr_const (file, disp, 0);
9488 else if (GET_CODE (disp) == LABEL_REF)
9489 output_asm_label (disp);
9490 else if (CONST_INT_P (disp))
9493 output_addr_const (file, disp);
9499 print_reg (base, 0, file);
9502 if (INTVAL (offset) >= 0)
9504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9515 print_reg (index, 0, file);
9517 fprintf (file, "*%d", scale);
9525 output_addr_const_extra (FILE *file, rtx x)
9529 if (GET_CODE (x) != UNSPEC)
9532 op = XVECEXP (x, 0, 0);
9533 switch (XINT (x, 1))
9535 case UNSPEC_GOTTPOFF:
9536 output_addr_const (file, op);
9537 /* FIXME: This might be @TPOFF in Sun ld. */
9538 fputs ("@GOTTPOFF", file);
9541 output_addr_const (file, op);
9542 fputs ("@TPOFF", file);
9545 output_addr_const (file, op);
9547 fputs ("@TPOFF", file);
9549 fputs ("@NTPOFF", file);
9552 output_addr_const (file, op);
9553 fputs ("@DTPOFF", file);
9555 case UNSPEC_GOTNTPOFF:
9556 output_addr_const (file, op);
9558 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9559 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9561 fputs ("@GOTNTPOFF", file);
9563 case UNSPEC_INDNTPOFF:
9564 output_addr_const (file, op);
9565 fputs ("@INDNTPOFF", file);
9575 /* Split one or more DImode RTL references into pairs of SImode
9576 references. The RTL can be REG, offsettable MEM, integer constant, or
9577 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9578 split and "num" is its length. lo_half and hi_half are output arrays
9579 that parallel "operands". */
9582 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9586 rtx op = operands[num];
9588 /* simplify_subreg refuse to split volatile memory addresses,
9589 but we still have to handle it. */
9592 lo_half[num] = adjust_address (op, SImode, 0);
9593 hi_half[num] = adjust_address (op, SImode, 4);
9597 lo_half[num] = simplify_gen_subreg (SImode, op,
9598 GET_MODE (op) == VOIDmode
9599 ? DImode : GET_MODE (op), 0);
9600 hi_half[num] = simplify_gen_subreg (SImode, op,
9601 GET_MODE (op) == VOIDmode
9602 ? DImode : GET_MODE (op), 4);
9606 /* Split one or more TImode RTL references into pairs of DImode
9607 references. The RTL can be REG, offsettable MEM, integer constant, or
9608 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9609 split and "num" is its length. lo_half and hi_half are output arrays
9610 that parallel "operands". */
9613 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9617 rtx op = operands[num];
9619 /* simplify_subreg refuse to split volatile memory addresses, but we
9620 still have to handle it. */
9623 lo_half[num] = adjust_address (op, DImode, 0);
9624 hi_half[num] = adjust_address (op, DImode, 8);
9628 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9629 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9634 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9635 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9636 is the expression of the binary operation. The output may either be
9637 emitted here, or returned to the caller, like all output_* functions.
9639 There is no guarantee that the operands are the same mode, as they
9640 might be within FLOAT or FLOAT_EXTEND expressions. */
9642 #ifndef SYSV386_COMPAT
9643 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9644 wants to fix the assemblers because that causes incompatibility
9645 with gcc. No-one wants to fix gcc because that causes
9646 incompatibility with assemblers... You can use the option of
9647 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9648 #define SYSV386_COMPAT 1
9652 output_387_binary_op (rtx insn, rtx *operands)
9654 static char buf[30];
9657 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9659 #ifdef ENABLE_CHECKING
9660 /* Even if we do not want to check the inputs, this documents input
9661 constraints. Which helps in understanding the following code. */
9662 if (STACK_REG_P (operands[0])
9663 && ((REG_P (operands[1])
9664 && REGNO (operands[0]) == REGNO (operands[1])
9665 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9666 || (REG_P (operands[2])
9667 && REGNO (operands[0]) == REGNO (operands[2])
9668 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9669 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9672 gcc_assert (is_sse);
9675 switch (GET_CODE (operands[3]))
9678 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9679 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9687 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9688 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9696 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9697 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9705 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9706 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9720 if (GET_MODE (operands[0]) == SFmode)
9721 strcat (buf, "ss\t{%2, %0|%0, %2}");
9723 strcat (buf, "sd\t{%2, %0|%0, %2}");
9728 switch (GET_CODE (operands[3]))
9732 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9734 rtx temp = operands[2];
9735 operands[2] = operands[1];
9739 /* know operands[0] == operands[1]. */
9741 if (MEM_P (operands[2]))
9747 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9749 if (STACK_TOP_P (operands[0]))
9750 /* How is it that we are storing to a dead operand[2]?
9751 Well, presumably operands[1] is dead too. We can't
9752 store the result to st(0) as st(0) gets popped on this
9753 instruction. Instead store to operands[2] (which I
9754 think has to be st(1)). st(1) will be popped later.
9755 gcc <= 2.8.1 didn't have this check and generated
9756 assembly code that the Unixware assembler rejected. */
9757 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9759 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9763 if (STACK_TOP_P (operands[0]))
9764 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9766 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9771 if (MEM_P (operands[1]))
9777 if (MEM_P (operands[2]))
9783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9786 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9787 derived assemblers, confusingly reverse the direction of
9788 the operation for fsub{r} and fdiv{r} when the
9789 destination register is not st(0). The Intel assembler
9790 doesn't have this brain damage. Read !SYSV386_COMPAT to
9791 figure out what the hardware really does. */
9792 if (STACK_TOP_P (operands[0]))
9793 p = "{p\t%0, %2|rp\t%2, %0}";
9795 p = "{rp\t%2, %0|p\t%0, %2}";
9797 if (STACK_TOP_P (operands[0]))
9798 /* As above for fmul/fadd, we can't store to st(0). */
9799 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9801 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9806 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9809 if (STACK_TOP_P (operands[0]))
9810 p = "{rp\t%0, %1|p\t%1, %0}";
9812 p = "{p\t%1, %0|rp\t%0, %1}";
9814 if (STACK_TOP_P (operands[0]))
9815 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9817 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9822 if (STACK_TOP_P (operands[0]))
9824 if (STACK_TOP_P (operands[1]))
9825 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9827 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9830 else if (STACK_TOP_P (operands[1]))
9833 p = "{\t%1, %0|r\t%0, %1}";
9835 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9841 p = "{r\t%2, %0|\t%0, %2}";
9843 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9856 /* Return needed mode for entity in optimize_mode_switching pass. */
9859 ix86_mode_needed (int entity, rtx insn)
9861 enum attr_i387_cw mode;
9863 /* The mode UNINITIALIZED is used to store control word after a
9864 function call or ASM pattern. The mode ANY specify that function
9865 has no requirements on the control word and make no changes in the
9866 bits we are interested in. */
9869 || (NONJUMP_INSN_P (insn)
9870 && (asm_noperands (PATTERN (insn)) >= 0
9871 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9872 return I387_CW_UNINITIALIZED;
9874 if (recog_memoized (insn) < 0)
9877 mode = get_attr_i387_cw (insn);
9882 if (mode == I387_CW_TRUNC)
9887 if (mode == I387_CW_FLOOR)
9892 if (mode == I387_CW_CEIL)
9897 if (mode == I387_CW_MASK_PM)
9908 /* Output code to initialize control word copies used by trunc?f?i and
9909 rounding patterns. CURRENT_MODE is set to current control word,
9910 while NEW_MODE is set to new control word. */
9913 emit_i387_cw_initialization (int mode)
9915 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9918 enum ix86_stack_slot slot;
9920 rtx reg = gen_reg_rtx (HImode);
9922 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9923 emit_move_insn (reg, copy_rtx (stored_mode));
9925 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9930 /* round toward zero (truncate) */
9931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9932 slot = SLOT_CW_TRUNC;
9936 /* round down toward -oo */
9937 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9938 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9939 slot = SLOT_CW_FLOOR;
9943 /* round up toward +oo */
9944 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9946 slot = SLOT_CW_CEIL;
9949 case I387_CW_MASK_PM:
9950 /* mask precision exception for nearbyint() */
9951 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9952 slot = SLOT_CW_MASK_PM;
9964 /* round toward zero (truncate) */
9965 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9966 slot = SLOT_CW_TRUNC;
9970 /* round down toward -oo */
9971 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9972 slot = SLOT_CW_FLOOR;
9976 /* round up toward +oo */
9977 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9978 slot = SLOT_CW_CEIL;
9981 case I387_CW_MASK_PM:
9982 /* mask precision exception for nearbyint() */
9983 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9984 slot = SLOT_CW_MASK_PM;
9992 gcc_assert (slot < MAX_386_STACK_LOCALS);
9994 new_mode = assign_386_stack_local (HImode, slot);
9995 emit_move_insn (new_mode, reg);
9998 /* Output code for INSN to convert a float to a signed int. OPERANDS
9999 are the insn operands. The output may be [HSD]Imode and the input
10000 operand may be [SDX]Fmode. */
10003 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10005 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10006 int dimode_p = GET_MODE (operands[0]) == DImode;
10007 int round_mode = get_attr_i387_cw (insn);
10009 /* Jump through a hoop or two for DImode, since the hardware has no
10010 non-popping instruction. We used to do this a different way, but
10011 that was somewhat fragile and broke with post-reload splitters. */
10012 if ((dimode_p || fisttp) && !stack_top_dies)
10013 output_asm_insn ("fld\t%y1", operands);
10015 gcc_assert (STACK_TOP_P (operands[1]));
10016 gcc_assert (MEM_P (operands[0]));
10017 gcc_assert (GET_MODE (operands[1]) != TFmode);
10020 output_asm_insn ("fisttp%z0\t%0", operands);
10023 if (round_mode != I387_CW_ANY)
10024 output_asm_insn ("fldcw\t%3", operands);
10025 if (stack_top_dies || dimode_p)
10026 output_asm_insn ("fistp%z0\t%0", operands);
10028 output_asm_insn ("fist%z0\t%0", operands);
10029 if (round_mode != I387_CW_ANY)
10030 output_asm_insn ("fldcw\t%2", operands);
10036 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10037 have the values zero or one, indicates the ffreep insn's operand
10038 from the OPERANDS array. */
10040 static const char *
10041 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10043 if (TARGET_USE_FFREEP)
10044 #if HAVE_AS_IX86_FFREEP
10045 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10048 static char retval[] = ".word\t0xc_df";
10049 int regno = REGNO (operands[opno]);
10051 gcc_assert (FP_REGNO_P (regno));
10053 retval[9] = '0' + (regno - FIRST_STACK_REG);
10058 return opno ? "fstp\t%y1" : "fstp\t%y0";
10062 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10063 should be used. UNORDERED_P is true when fucom should be used. */
10066 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10068 int stack_top_dies;
10069 rtx cmp_op0, cmp_op1;
10070 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10074 cmp_op0 = operands[0];
10075 cmp_op1 = operands[1];
10079 cmp_op0 = operands[1];
10080 cmp_op1 = operands[2];
10085 if (GET_MODE (operands[0]) == SFmode)
10087 return "ucomiss\t{%1, %0|%0, %1}";
10089 return "comiss\t{%1, %0|%0, %1}";
10092 return "ucomisd\t{%1, %0|%0, %1}";
10094 return "comisd\t{%1, %0|%0, %1}";
10097 gcc_assert (STACK_TOP_P (cmp_op0));
10099 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10101 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10103 if (stack_top_dies)
10105 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10106 return output_387_ffreep (operands, 1);
10109 return "ftst\n\tfnstsw\t%0";
10112 if (STACK_REG_P (cmp_op1)
10114 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10115 && REGNO (cmp_op1) != FIRST_STACK_REG)
10117 /* If both the top of the 387 stack dies, and the other operand
10118 is also a stack register that dies, then this must be a
10119 `fcompp' float compare */
10123 /* There is no double popping fcomi variant. Fortunately,
10124 eflags is immune from the fstp's cc clobbering. */
10126 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10128 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10129 return output_387_ffreep (operands, 0);
10134 return "fucompp\n\tfnstsw\t%0";
10136 return "fcompp\n\tfnstsw\t%0";
10141 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10143 static const char * const alt[16] =
10145 "fcom%z2\t%y2\n\tfnstsw\t%0",
10146 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10147 "fucom%z2\t%y2\n\tfnstsw\t%0",
10148 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10150 "ficom%z2\t%y2\n\tfnstsw\t%0",
10151 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10155 "fcomi\t{%y1, %0|%0, %y1}",
10156 "fcomip\t{%y1, %0|%0, %y1}",
10157 "fucomi\t{%y1, %0|%0, %y1}",
10158 "fucomip\t{%y1, %0|%0, %y1}",
10169 mask = eflags_p << 3;
10170 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10171 mask |= unordered_p << 1;
10172 mask |= stack_top_dies;
10174 gcc_assert (mask < 16);
10183 ix86_output_addr_vec_elt (FILE *file, int value)
10185 const char *directive = ASM_LONG;
10189 directive = ASM_QUAD;
10191 gcc_assert (!TARGET_64BIT);
10194 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10198 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10200 const char *directive = ASM_LONG;
10203 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10204 directive = ASM_QUAD;
10206 gcc_assert (!TARGET_64BIT);
10208 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10209 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10210 fprintf (file, "%s%s%d-%s%d\n",
10211 directive, LPREFIX, value, LPREFIX, rel);
10212 else if (HAVE_AS_GOTOFF_IN_DATA)
10213 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10215 else if (TARGET_MACHO)
10217 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10218 machopic_output_function_base_name (file);
10219 fprintf(file, "\n");
10223 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10224 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10227 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10231 ix86_expand_clear (rtx dest)
10235 /* We play register width games, which are only valid after reload. */
10236 gcc_assert (reload_completed);
10238 /* Avoid HImode and its attendant prefix byte. */
10239 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10240 dest = gen_rtx_REG (SImode, REGNO (dest));
10241 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10243 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10244 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10246 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10247 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10253 /* X is an unchanging MEM. If it is a constant pool reference, return
10254 the constant pool rtx, else NULL. */
10257 maybe_get_pool_constant (rtx x)
10259 x = ix86_delegitimize_address (XEXP (x, 0));
10261 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10262 return get_pool_constant (x);
10268 ix86_expand_move (enum machine_mode mode, rtx operands[])
10271 enum tls_model model;
10276 if (GET_CODE (op1) == SYMBOL_REF)
10278 model = SYMBOL_REF_TLS_MODEL (op1);
10281 op1 = legitimize_tls_address (op1, model, true);
10282 op1 = force_operand (op1, op0);
10286 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10287 && SYMBOL_REF_DLLIMPORT_P (op1))
10288 op1 = legitimize_dllimport_symbol (op1, false);
10290 else if (GET_CODE (op1) == CONST
10291 && GET_CODE (XEXP (op1, 0)) == PLUS
10292 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10294 rtx addend = XEXP (XEXP (op1, 0), 1);
10295 rtx symbol = XEXP (XEXP (op1, 0), 0);
10298 model = SYMBOL_REF_TLS_MODEL (symbol);
10300 tmp = legitimize_tls_address (symbol, model, true);
10301 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10302 && SYMBOL_REF_DLLIMPORT_P (symbol))
10303 tmp = legitimize_dllimport_symbol (symbol, true);
10307 tmp = force_operand (tmp, NULL);
10308 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10309 op0, 1, OPTAB_DIRECT);
10315 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10317 if (TARGET_MACHO && !TARGET_64BIT)
10322 rtx temp = ((reload_in_progress
10323 || ((op0 && REG_P (op0))
10325 ? op0 : gen_reg_rtx (Pmode));
10326 op1 = machopic_indirect_data_reference (op1, temp);
10327 op1 = machopic_legitimize_pic_address (op1, mode,
10328 temp == op1 ? 0 : temp);
10330 else if (MACHOPIC_INDIRECT)
10331 op1 = machopic_indirect_data_reference (op1, 0);
10339 op1 = force_reg (Pmode, op1);
10340 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10342 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10343 op1 = legitimize_pic_address (op1, reg);
10352 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10353 || !push_operand (op0, mode))
10355 op1 = force_reg (mode, op1);
10357 if (push_operand (op0, mode)
10358 && ! general_no_elim_operand (op1, mode))
10359 op1 = copy_to_mode_reg (mode, op1);
10361 /* Force large constants in 64bit compilation into register
10362 to get them CSEed. */
10363 if (can_create_pseudo_p ()
10364 && (mode == DImode) && TARGET_64BIT
10365 && immediate_operand (op1, mode)
10366 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10367 && !register_operand (op0, mode)
10369 op1 = copy_to_mode_reg (mode, op1);
10371 if (can_create_pseudo_p ()
10372 && FLOAT_MODE_P (mode)
10373 && GET_CODE (op1) == CONST_DOUBLE)
10375 /* If we are loading a floating point constant to a register,
10376 force the value to memory now, since we'll get better code
10377 out the back end. */
10379 op1 = validize_mem (force_const_mem (mode, op1));
10380 if (!register_operand (op0, mode))
10382 rtx temp = gen_reg_rtx (mode);
10383 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10384 emit_move_insn (op0, temp);
10390 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10394 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10396 rtx op0 = operands[0], op1 = operands[1];
10397 unsigned int align = GET_MODE_ALIGNMENT (mode);
10399 /* Force constants other than zero into memory. We do not know how
10400 the instructions used to build constants modify the upper 64 bits
10401 of the register, once we have that information we may be able
10402 to handle some of them more efficiently. */
10403 if (can_create_pseudo_p ()
10404 && register_operand (op0, mode)
10405 && (CONSTANT_P (op1)
10406 || (GET_CODE (op1) == SUBREG
10407 && CONSTANT_P (SUBREG_REG (op1))))
10408 && standard_sse_constant_p (op1) <= 0)
10409 op1 = validize_mem (force_const_mem (mode, op1));
10411 /* TDmode values are passed as TImode on the stack. TImode values
10412 are moved via xmm registers, and moving them to stack can result in
10413 unaligned memory access. Use ix86_expand_vector_move_misalign()
10414 if memory operand is not aligned correctly. */
10415 if (can_create_pseudo_p ()
10416 && (mode == TImode) && !TARGET_64BIT
10417 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10418 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10422 /* ix86_expand_vector_move_misalign() does not like constants ... */
10423 if (CONSTANT_P (op1)
10424 || (GET_CODE (op1) == SUBREG
10425 && CONSTANT_P (SUBREG_REG (op1))))
10426 op1 = validize_mem (force_const_mem (mode, op1));
10428 /* ... nor both arguments in memory. */
10429 if (!register_operand (op0, mode)
10430 && !register_operand (op1, mode))
10431 op1 = force_reg (mode, op1);
10433 tmp[0] = op0; tmp[1] = op1;
10434 ix86_expand_vector_move_misalign (mode, tmp);
10438 /* Make operand1 a register if it isn't already. */
10439 if (can_create_pseudo_p ()
10440 && !register_operand (op0, mode)
10441 && !register_operand (op1, mode))
10443 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10447 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10450 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10451 straight to ix86_expand_vector_move. */
10452 /* Code generation for scalar reg-reg moves of single and double precision data:
10453 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10457 if (x86_sse_partial_reg_dependency == true)
10462 Code generation for scalar loads of double precision data:
10463 if (x86_sse_split_regs == true)
10464 movlpd mem, reg (gas syntax)
10468 Code generation for unaligned packed loads of single precision data
10469 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10470 if (x86_sse_unaligned_move_optimal)
10473 if (x86_sse_partial_reg_dependency == true)
10485 Code generation for unaligned packed loads of double precision data
10486 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10487 if (x86_sse_unaligned_move_optimal)
10490 if (x86_sse_split_regs == true)
10503 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10512 /* If we're optimizing for size, movups is the smallest. */
10515 op0 = gen_lowpart (V4SFmode, op0);
10516 op1 = gen_lowpart (V4SFmode, op1);
10517 emit_insn (gen_sse_movups (op0, op1));
10521 /* ??? If we have typed data, then it would appear that using
10522 movdqu is the only way to get unaligned data loaded with
10524 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10526 op0 = gen_lowpart (V16QImode, op0);
10527 op1 = gen_lowpart (V16QImode, op1);
10528 emit_insn (gen_sse2_movdqu (op0, op1));
10532 if (TARGET_SSE2 && mode == V2DFmode)
10536 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10538 op0 = gen_lowpart (V2DFmode, op0);
10539 op1 = gen_lowpart (V2DFmode, op1);
10540 emit_insn (gen_sse2_movupd (op0, op1));
10544 /* When SSE registers are split into halves, we can avoid
10545 writing to the top half twice. */
10546 if (TARGET_SSE_SPLIT_REGS)
10548 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10553 /* ??? Not sure about the best option for the Intel chips.
10554 The following would seem to satisfy; the register is
10555 entirely cleared, breaking the dependency chain. We
10556 then store to the upper half, with a dependency depth
10557 of one. A rumor has it that Intel recommends two movsd
10558 followed by an unpacklpd, but this is unconfirmed. And
10559 given that the dependency depth of the unpacklpd would
10560 still be one, I'm not sure why this would be better. */
10561 zero = CONST0_RTX (V2DFmode);
10564 m = adjust_address (op1, DFmode, 0);
10565 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10566 m = adjust_address (op1, DFmode, 8);
10567 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10571 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10573 op0 = gen_lowpart (V4SFmode, op0);
10574 op1 = gen_lowpart (V4SFmode, op1);
10575 emit_insn (gen_sse_movups (op0, op1));
10579 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10580 emit_move_insn (op0, CONST0_RTX (mode));
10582 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10584 if (mode != V4SFmode)
10585 op0 = gen_lowpart (V4SFmode, op0);
10586 m = adjust_address (op1, V2SFmode, 0);
10587 emit_insn (gen_sse_loadlps (op0, op0, m));
10588 m = adjust_address (op1, V2SFmode, 8);
10589 emit_insn (gen_sse_loadhps (op0, op0, m));
10592 else if (MEM_P (op0))
10594 /* If we're optimizing for size, movups is the smallest. */
10597 op0 = gen_lowpart (V4SFmode, op0);
10598 op1 = gen_lowpart (V4SFmode, op1);
10599 emit_insn (gen_sse_movups (op0, op1));
10603 /* ??? Similar to above, only less clear because of quote
10604 typeless stores unquote. */
10605 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10606 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10608 op0 = gen_lowpart (V16QImode, op0);
10609 op1 = gen_lowpart (V16QImode, op1);
10610 emit_insn (gen_sse2_movdqu (op0, op1));
10614 if (TARGET_SSE2 && mode == V2DFmode)
10616 m = adjust_address (op0, DFmode, 0);
10617 emit_insn (gen_sse2_storelpd (m, op1));
10618 m = adjust_address (op0, DFmode, 8);
10619 emit_insn (gen_sse2_storehpd (m, op1));
10623 if (mode != V4SFmode)
10624 op1 = gen_lowpart (V4SFmode, op1);
10625 m = adjust_address (op0, V2SFmode, 0);
10626 emit_insn (gen_sse_storelps (m, op1));
10627 m = adjust_address (op0, V2SFmode, 8);
10628 emit_insn (gen_sse_storehps (m, op1));
10632 gcc_unreachable ();
10635 /* Expand a push in MODE. This is some mode for which we do not support
10636 proper push instructions, at least from the registers that we expect
10637 the value to live in. */
10640 ix86_expand_push (enum machine_mode mode, rtx x)
10644 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10645 GEN_INT (-GET_MODE_SIZE (mode)),
10646 stack_pointer_rtx, 1, OPTAB_DIRECT);
10647 if (tmp != stack_pointer_rtx)
10648 emit_move_insn (stack_pointer_rtx, tmp);
10650 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10651 emit_move_insn (tmp, x);
10654 /* Helper function of ix86_fixup_binary_operands to canonicalize
10655 operand order. Returns true if the operands should be swapped. */
10658 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10661 rtx dst = operands[0];
10662 rtx src1 = operands[1];
10663 rtx src2 = operands[2];
10665 /* If the operation is not commutative, we can't do anything. */
10666 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10669 /* Highest priority is that src1 should match dst. */
10670 if (rtx_equal_p (dst, src1))
10672 if (rtx_equal_p (dst, src2))
10675 /* Next highest priority is that immediate constants come second. */
10676 if (immediate_operand (src2, mode))
10678 if (immediate_operand (src1, mode))
10681 /* Lowest priority is that memory references should come second. */
10691 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10692 destination to use for the operation. If different from the true
10693 destination in operands[0], a copy operation will be required. */
10696 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10699 rtx dst = operands[0];
10700 rtx src1 = operands[1];
10701 rtx src2 = operands[2];
10703 /* Canonicalize operand order. */
10704 if (ix86_swap_binary_operands_p (code, mode, operands))
10708 /* It is invalid to swap operands of different modes. */
10709 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10716 /* Both source operands cannot be in memory. */
10717 if (MEM_P (src1) && MEM_P (src2))
10719 /* Optimization: Only read from memory once. */
10720 if (rtx_equal_p (src1, src2))
10722 src2 = force_reg (mode, src2);
10726 src2 = force_reg (mode, src2);
10729 /* If the destination is memory, and we do not have matching source
10730 operands, do things in registers. */
10731 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10732 dst = gen_reg_rtx (mode);
10734 /* Source 1 cannot be a constant. */
10735 if (CONSTANT_P (src1))
10736 src1 = force_reg (mode, src1);
10738 /* Source 1 cannot be a non-matching memory. */
10739 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10740 src1 = force_reg (mode, src1);
10742 operands[1] = src1;
10743 operands[2] = src2;
10747 /* Similarly, but assume that the destination has already been
10748 set up properly. */
10751 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10752 enum machine_mode mode, rtx operands[])
10754 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10755 gcc_assert (dst == operands[0]);
10758 /* Attempt to expand a binary operator. Make the expansion closer to the
10759 actual machine, then just general_operand, which will allow 3 separate
10760 memory references (one output, two input) in a single insn. */
10763 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10766 rtx src1, src2, dst, op, clob;
10768 dst = ix86_fixup_binary_operands (code, mode, operands);
10769 src1 = operands[1];
10770 src2 = operands[2];
10772 /* Emit the instruction. */
10774 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10775 if (reload_in_progress)
10777 /* Reload doesn't know about the flags register, and doesn't know that
10778 it doesn't want to clobber it. We can only do this with PLUS. */
10779 gcc_assert (code == PLUS);
10784 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10785 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10788 /* Fix up the destination if needed. */
10789 if (dst != operands[0])
10790 emit_move_insn (operands[0], dst);
10793 /* Return TRUE or FALSE depending on whether the binary operator meets the
10794 appropriate constraints. */
10797 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10800 rtx dst = operands[0];
10801 rtx src1 = operands[1];
10802 rtx src2 = operands[2];
10804 /* Both source operands cannot be in memory. */
10805 if (MEM_P (src1) && MEM_P (src2))
10808 /* Canonicalize operand order for commutative operators. */
10809 if (ix86_swap_binary_operands_p (code, mode, operands))
10816 /* If the destination is memory, we must have a matching source operand. */
10817 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10820 /* Source 1 cannot be a constant. */
10821 if (CONSTANT_P (src1))
10824 /* Source 1 cannot be a non-matching memory. */
10825 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10831 /* Attempt to expand a unary operator. Make the expansion closer to the
10832 actual machine, then just general_operand, which will allow 2 separate
10833 memory references (one output, one input) in a single insn. */
10836 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10839 int matching_memory;
10840 rtx src, dst, op, clob;
10845 /* If the destination is memory, and we do not have matching source
10846 operands, do things in registers. */
10847 matching_memory = 0;
10850 if (rtx_equal_p (dst, src))
10851 matching_memory = 1;
10853 dst = gen_reg_rtx (mode);
10856 /* When source operand is memory, destination must match. */
10857 if (MEM_P (src) && !matching_memory)
10858 src = force_reg (mode, src);
10860 /* Emit the instruction. */
10862 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10863 if (reload_in_progress || code == NOT)
10865 /* Reload doesn't know about the flags register, and doesn't know that
10866 it doesn't want to clobber it. */
10867 gcc_assert (code == NOT);
10872 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10873 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10876 /* Fix up the destination if needed. */
10877 if (dst != operands[0])
10878 emit_move_insn (operands[0], dst);
10881 /* Return TRUE or FALSE depending on whether the unary operator meets the
10882 appropriate constraints. */
10885 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10886 enum machine_mode mode ATTRIBUTE_UNUSED,
10887 rtx operands[2] ATTRIBUTE_UNUSED)
10889 /* If one of operands is memory, source and destination must match. */
10890 if ((MEM_P (operands[0])
10891 || MEM_P (operands[1]))
10892 && ! rtx_equal_p (operands[0], operands[1]))
10897 /* Post-reload splitter for converting an SF or DFmode value in an
10898 SSE register into an unsigned SImode. */
10901 ix86_split_convert_uns_si_sse (rtx operands[])
10903 enum machine_mode vecmode;
10904 rtx value, large, zero_or_two31, input, two31, x;
10906 large = operands[1];
10907 zero_or_two31 = operands[2];
10908 input = operands[3];
10909 two31 = operands[4];
10910 vecmode = GET_MODE (large);
10911 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10913 /* Load up the value into the low element. We must ensure that the other
10914 elements are valid floats -- zero is the easiest such value. */
10917 if (vecmode == V4SFmode)
10918 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10920 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10924 input = gen_rtx_REG (vecmode, REGNO (input));
10925 emit_move_insn (value, CONST0_RTX (vecmode));
10926 if (vecmode == V4SFmode)
10927 emit_insn (gen_sse_movss (value, value, input));
10929 emit_insn (gen_sse2_movsd (value, value, input));
10932 emit_move_insn (large, two31);
10933 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10935 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10936 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10938 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10939 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10941 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10942 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10944 large = gen_rtx_REG (V4SImode, REGNO (large));
10945 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10947 x = gen_rtx_REG (V4SImode, REGNO (value));
10948 if (vecmode == V4SFmode)
10949 emit_insn (gen_sse2_cvttps2dq (x, value));
10951 emit_insn (gen_sse2_cvttpd2dq (x, value));
10954 emit_insn (gen_xorv4si3 (value, value, large));
10957 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10958 Expects the 64-bit DImode to be supplied in a pair of integral
10959 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10960 -mfpmath=sse, !optimize_size only. */
10963 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10965 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10966 rtx int_xmm, fp_xmm;
10967 rtx biases, exponents;
10970 int_xmm = gen_reg_rtx (V4SImode);
10971 if (TARGET_INTER_UNIT_MOVES)
10972 emit_insn (gen_movdi_to_sse (int_xmm, input));
10973 else if (TARGET_SSE_SPLIT_REGS)
10975 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10976 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10980 x = gen_reg_rtx (V2DImode);
10981 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10982 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10985 x = gen_rtx_CONST_VECTOR (V4SImode,
10986 gen_rtvec (4, GEN_INT (0x43300000UL),
10987 GEN_INT (0x45300000UL),
10988 const0_rtx, const0_rtx));
10989 exponents = validize_mem (force_const_mem (V4SImode, x));
10991 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10992 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10994 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10995 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10996 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10997 (0x1.0p84 + double(fp_value_hi_xmm)).
10998 Note these exponents differ by 32. */
11000 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11002 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11003 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11004 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11005 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11006 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11007 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11008 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11009 biases = validize_mem (force_const_mem (V2DFmode, biases));
11010 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11012 /* Add the upper and lower DFmode values together. */
11014 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11017 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11018 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11019 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11022 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11025 /* Not used, but eases macroization of patterns. */
11027 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11028 rtx input ATTRIBUTE_UNUSED)
11030 gcc_unreachable ();
11033 /* Convert an unsigned SImode value into a DFmode. Only currently used
11034 for SSE, but applicable anywhere. */
11037 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11039 REAL_VALUE_TYPE TWO31r;
11042 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11043 NULL, 1, OPTAB_DIRECT);
11045 fp = gen_reg_rtx (DFmode);
11046 emit_insn (gen_floatsidf2 (fp, x));
11048 real_ldexp (&TWO31r, &dconst1, 31);
11049 x = const_double_from_real_value (TWO31r, DFmode);
11051 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11053 emit_move_insn (target, x);
11056 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11057 32-bit mode; otherwise we have a direct convert instruction. */
11060 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11062 REAL_VALUE_TYPE TWO32r;
11063 rtx fp_lo, fp_hi, x;
11065 fp_lo = gen_reg_rtx (DFmode);
11066 fp_hi = gen_reg_rtx (DFmode);
11068 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11070 real_ldexp (&TWO32r, &dconst1, 32);
11071 x = const_double_from_real_value (TWO32r, DFmode);
11072 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11074 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11076 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11079 emit_move_insn (target, x);
11082 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11083 For x86_32, -mfpmath=sse, !optimize_size only. */
11085 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11087 REAL_VALUE_TYPE ONE16r;
11088 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11090 real_ldexp (&ONE16r, &dconst1, 16);
11091 x = const_double_from_real_value (ONE16r, SFmode);
11092 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11093 NULL, 0, OPTAB_DIRECT);
11094 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11095 NULL, 0, OPTAB_DIRECT);
11096 fp_hi = gen_reg_rtx (SFmode);
11097 fp_lo = gen_reg_rtx (SFmode);
11098 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11099 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11100 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11102 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11104 if (!rtx_equal_p (target, fp_hi))
11105 emit_move_insn (target, fp_hi);
11108 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11109 then replicate the value for all elements of the vector
11113 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11120 v = gen_rtvec (4, value, value, value, value);
11121 return gen_rtx_CONST_VECTOR (V4SImode, v);
11125 v = gen_rtvec (2, value, value);
11126 return gen_rtx_CONST_VECTOR (V2DImode, v);
11130 v = gen_rtvec (4, value, value, value, value);
11132 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11133 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11134 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11138 v = gen_rtvec (2, value, value);
11140 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11141 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11144 gcc_unreachable ();
11148 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11149 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11150 for an SSE register. If VECT is true, then replicate the mask for
11151 all elements of the vector register. If INVERT is true, then create
11152 a mask excluding the sign bit. */
11155 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11157 enum machine_mode vec_mode, imode;
11158 HOST_WIDE_INT hi, lo;
11163 /* Find the sign bit, sign extended to 2*HWI. */
11169 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11170 lo = 0x80000000, hi = lo < 0;
11176 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11177 if (HOST_BITS_PER_WIDE_INT >= 64)
11178 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11180 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11186 vec_mode = VOIDmode;
11187 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11188 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11192 gcc_unreachable ();
11196 lo = ~lo, hi = ~hi;
11198 /* Force this value into the low part of a fp vector constant. */
11199 mask = immed_double_const (lo, hi, imode);
11200 mask = gen_lowpart (mode, mask);
11202 if (vec_mode == VOIDmode)
11203 return force_reg (mode, mask);
11205 v = ix86_build_const_vector (mode, vect, mask);
11206 return force_reg (vec_mode, v);
11209 /* Generate code for floating point ABS or NEG. */
11212 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11215 rtx mask, set, use, clob, dst, src;
11216 bool use_sse = false;
11217 bool vector_mode = VECTOR_MODE_P (mode);
11218 enum machine_mode elt_mode = mode;
11222 elt_mode = GET_MODE_INNER (mode);
11225 else if (mode == TFmode)
11227 else if (TARGET_SSE_MATH)
11228 use_sse = SSE_FLOAT_MODE_P (mode);
11230 /* NEG and ABS performed with SSE use bitwise mask operations.
11231 Create the appropriate mask now. */
11233 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11242 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11243 set = gen_rtx_SET (VOIDmode, dst, set);
11248 set = gen_rtx_fmt_e (code, mode, src);
11249 set = gen_rtx_SET (VOIDmode, dst, set);
11252 use = gen_rtx_USE (VOIDmode, mask);
11253 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11254 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11255 gen_rtvec (3, set, use, clob)));
11262 /* Expand a copysign operation. Special case operand 0 being a constant. */
11265 ix86_expand_copysign (rtx operands[])
11267 enum machine_mode mode;
11268 rtx dest, op0, op1, mask, nmask;
11270 dest = operands[0];
11274 mode = GET_MODE (dest);
11276 if (GET_CODE (op0) == CONST_DOUBLE)
11278 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11280 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11281 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11283 if (mode == SFmode || mode == DFmode)
11285 enum machine_mode vmode;
11287 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11289 if (op0 == CONST0_RTX (mode))
11290 op0 = CONST0_RTX (vmode);
11295 if (mode == SFmode)
11296 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11297 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11299 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11301 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11304 else if (op0 != CONST0_RTX (mode))
11305 op0 = force_reg (mode, op0);
11307 mask = ix86_build_signbit_mask (mode, 0, 0);
11309 if (mode == SFmode)
11310 copysign_insn = gen_copysignsf3_const;
11311 else if (mode == DFmode)
11312 copysign_insn = gen_copysigndf3_const;
11314 copysign_insn = gen_copysigntf3_const;
11316 emit_insn (copysign_insn (dest, op0, op1, mask));
11320 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11322 nmask = ix86_build_signbit_mask (mode, 0, 1);
11323 mask = ix86_build_signbit_mask (mode, 0, 0);
11325 if (mode == SFmode)
11326 copysign_insn = gen_copysignsf3_var;
11327 else if (mode == DFmode)
11328 copysign_insn = gen_copysigndf3_var;
11330 copysign_insn = gen_copysigntf3_var;
11332 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11336 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11337 be a constant, and so has already been expanded into a vector constant. */
11340 ix86_split_copysign_const (rtx operands[])
11342 enum machine_mode mode, vmode;
11343 rtx dest, op0, op1, mask, x;
11345 dest = operands[0];
11348 mask = operands[3];
11350 mode = GET_MODE (dest);
11351 vmode = GET_MODE (mask);
11353 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11354 x = gen_rtx_AND (vmode, dest, mask);
11355 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11357 if (op0 != CONST0_RTX (vmode))
11359 x = gen_rtx_IOR (vmode, dest, op0);
11360 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11364 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11365 so we have to do two masks. */
11368 ix86_split_copysign_var (rtx operands[])
11370 enum machine_mode mode, vmode;
11371 rtx dest, scratch, op0, op1, mask, nmask, x;
11373 dest = operands[0];
11374 scratch = operands[1];
11377 nmask = operands[4];
11378 mask = operands[5];
11380 mode = GET_MODE (dest);
11381 vmode = GET_MODE (mask);
11383 if (rtx_equal_p (op0, op1))
11385 /* Shouldn't happen often (it's useless, obviously), but when it does
11386 we'd generate incorrect code if we continue below. */
11387 emit_move_insn (dest, op0);
11391 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11393 gcc_assert (REGNO (op1) == REGNO (scratch));
11395 x = gen_rtx_AND (vmode, scratch, mask);
11396 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11399 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11400 x = gen_rtx_NOT (vmode, dest);
11401 x = gen_rtx_AND (vmode, x, op0);
11402 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11406 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11408 x = gen_rtx_AND (vmode, scratch, mask);
11410 else /* alternative 2,4 */
11412 gcc_assert (REGNO (mask) == REGNO (scratch));
11413 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11414 x = gen_rtx_AND (vmode, scratch, op1);
11416 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11418 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11420 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11421 x = gen_rtx_AND (vmode, dest, nmask);
11423 else /* alternative 3,4 */
11425 gcc_assert (REGNO (nmask) == REGNO (dest));
11427 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11428 x = gen_rtx_AND (vmode, dest, op0);
11430 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11433 x = gen_rtx_IOR (vmode, dest, scratch);
11434 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11437 /* Return TRUE or FALSE depending on whether the first SET in INSN
11438 has source and destination with matching CC modes, and that the
11439 CC mode is at least as constrained as REQ_MODE. */
11442 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11445 enum machine_mode set_mode;
11447 set = PATTERN (insn);
11448 if (GET_CODE (set) == PARALLEL)
11449 set = XVECEXP (set, 0, 0);
11450 gcc_assert (GET_CODE (set) == SET);
11451 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11453 set_mode = GET_MODE (SET_DEST (set));
11457 if (req_mode != CCNOmode
11458 && (req_mode != CCmode
11459 || XEXP (SET_SRC (set), 1) != const0_rtx))
11463 if (req_mode == CCGCmode)
11467 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11471 if (req_mode == CCZmode)
11478 gcc_unreachable ();
11481 return (GET_MODE (SET_SRC (set)) == set_mode);
11484 /* Generate insn patterns to do an integer compare of OPERANDS. */
11487 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11489 enum machine_mode cmpmode;
11492 cmpmode = SELECT_CC_MODE (code, op0, op1);
11493 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11495 /* This is very simple, but making the interface the same as in the
11496 FP case makes the rest of the code easier. */
11497 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11498 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11500 /* Return the test that should be put into the flags user, i.e.
11501 the bcc, scc, or cmov instruction. */
11502 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11505 /* Figure out whether to use ordered or unordered fp comparisons.
11506 Return the appropriate mode to use. */
11509 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11511 /* ??? In order to make all comparisons reversible, we do all comparisons
11512 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11513 all forms trapping and nontrapping comparisons, we can make inequality
11514 comparisons trapping again, since it results in better code when using
11515 FCOM based compares. */
11516 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11520 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11522 enum machine_mode mode = GET_MODE (op0);
11524 if (SCALAR_FLOAT_MODE_P (mode))
11526 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11527 return ix86_fp_compare_mode (code);
11532 /* Only zero flag is needed. */
11533 case EQ: /* ZF=0 */
11534 case NE: /* ZF!=0 */
11536 /* Codes needing carry flag. */
11537 case GEU: /* CF=0 */
11538 case LTU: /* CF=1 */
11539 /* Detect overflow checks. They need just the carry flag. */
11540 if (GET_CODE (op0) == PLUS
11541 && rtx_equal_p (op1, XEXP (op0, 0)))
11545 case GTU: /* CF=0 & ZF=0 */
11546 case LEU: /* CF=1 | ZF=1 */
11547 /* Detect overflow checks. They need just the carry flag. */
11548 if (GET_CODE (op0) == MINUS
11549 && rtx_equal_p (op1, XEXP (op0, 0)))
11553 /* Codes possibly doable only with sign flag when
11554 comparing against zero. */
11555 case GE: /* SF=OF or SF=0 */
11556 case LT: /* SF<>OF or SF=1 */
11557 if (op1 == const0_rtx)
11560 /* For other cases Carry flag is not required. */
11562 /* Codes doable only with sign flag when comparing
11563 against zero, but we miss jump instruction for it
11564 so we need to use relational tests against overflow
11565 that thus needs to be zero. */
11566 case GT: /* ZF=0 & SF=OF */
11567 case LE: /* ZF=1 | SF<>OF */
11568 if (op1 == const0_rtx)
11572 /* strcmp pattern do (use flags) and combine may ask us for proper
11577 gcc_unreachable ();
11581 /* Return the fixed registers used for condition codes. */
11584 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11591 /* If two condition code modes are compatible, return a condition code
11592 mode which is compatible with both. Otherwise, return
11595 static enum machine_mode
11596 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11601 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11604 if ((m1 == CCGCmode && m2 == CCGOCmode)
11605 || (m1 == CCGOCmode && m2 == CCGCmode))
11611 gcc_unreachable ();
11641 /* These are only compatible with themselves, which we already
11647 /* Split comparison code CODE into comparisons we can do using branch
11648 instructions. BYPASS_CODE is comparison code for branch that will
11649 branch around FIRST_CODE and SECOND_CODE. If some of branches
11650 is not required, set value to UNKNOWN.
11651 We never require more than two branches. */
11654 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11655 enum rtx_code *first_code,
11656 enum rtx_code *second_code)
11658 *first_code = code;
11659 *bypass_code = UNKNOWN;
11660 *second_code = UNKNOWN;
11662 /* The fcomi comparison sets flags as follows:
11672 case GT: /* GTU - CF=0 & ZF=0 */
11673 case GE: /* GEU - CF=0 */
11674 case ORDERED: /* PF=0 */
11675 case UNORDERED: /* PF=1 */
11676 case UNEQ: /* EQ - ZF=1 */
11677 case UNLT: /* LTU - CF=1 */
11678 case UNLE: /* LEU - CF=1 | ZF=1 */
11679 case LTGT: /* EQ - ZF=0 */
11681 case LT: /* LTU - CF=1 - fails on unordered */
11682 *first_code = UNLT;
11683 *bypass_code = UNORDERED;
11685 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11686 *first_code = UNLE;
11687 *bypass_code = UNORDERED;
11689 case EQ: /* EQ - ZF=1 - fails on unordered */
11690 *first_code = UNEQ;
11691 *bypass_code = UNORDERED;
11693 case NE: /* NE - ZF=0 - fails on unordered */
11694 *first_code = LTGT;
11695 *second_code = UNORDERED;
11697 case UNGE: /* GEU - CF=0 - fails on unordered */
11699 *second_code = UNORDERED;
11701 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11703 *second_code = UNORDERED;
11706 gcc_unreachable ();
11708 if (!TARGET_IEEE_FP)
11710 *second_code = UNKNOWN;
11711 *bypass_code = UNKNOWN;
11715 /* Return cost of comparison done fcom + arithmetics operations on AX.
11716 All following functions do use number of instructions as a cost metrics.
11717 In future this should be tweaked to compute bytes for optimize_size and
11718 take into account performance of various instructions on various CPUs. */
11720 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11722 if (!TARGET_IEEE_FP)
11724 /* The cost of code output by ix86_expand_fp_compare. */
11748 gcc_unreachable ();
11752 /* Return cost of comparison done using fcomi operation.
11753 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11755 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11757 enum rtx_code bypass_code, first_code, second_code;
11758 /* Return arbitrarily high cost when instruction is not supported - this
11759 prevents gcc from using it. */
11762 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11763 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11766 /* Return cost of comparison done using sahf operation.
11767 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11769 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11771 enum rtx_code bypass_code, first_code, second_code;
11772 /* Return arbitrarily high cost when instruction is not preferred - this
11773 avoids gcc from using it. */
11774 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11776 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11777 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11780 /* Compute cost of the comparison done using any method.
11781 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11783 ix86_fp_comparison_cost (enum rtx_code code)
11785 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11788 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11789 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11791 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11792 if (min > sahf_cost)
11794 if (min > fcomi_cost)
11799 /* Return true if we should use an FCOMI instruction for this
11803 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11805 enum rtx_code swapped_code = swap_condition (code);
11807 return ((ix86_fp_comparison_cost (code)
11808 == ix86_fp_comparison_fcomi_cost (code))
11809 || (ix86_fp_comparison_cost (swapped_code)
11810 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11813 /* Swap, force into registers, or otherwise massage the two operands
11814 to a fp comparison. The operands are updated in place; the new
11815 comparison code is returned. */
11817 static enum rtx_code
11818 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11820 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11821 rtx op0 = *pop0, op1 = *pop1;
11822 enum machine_mode op_mode = GET_MODE (op0);
11823 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11825 /* All of the unordered compare instructions only work on registers.
11826 The same is true of the fcomi compare instructions. The XFmode
11827 compare instructions require registers except when comparing
11828 against zero or when converting operand 1 from fixed point to
11832 && (fpcmp_mode == CCFPUmode
11833 || (op_mode == XFmode
11834 && ! (standard_80387_constant_p (op0) == 1
11835 || standard_80387_constant_p (op1) == 1)
11836 && GET_CODE (op1) != FLOAT)
11837 || ix86_use_fcomi_compare (code)))
11839 op0 = force_reg (op_mode, op0);
11840 op1 = force_reg (op_mode, op1);
11844 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11845 things around if they appear profitable, otherwise force op0
11846 into a register. */
11848 if (standard_80387_constant_p (op0) == 0
11850 && ! (standard_80387_constant_p (op1) == 0
11854 tmp = op0, op0 = op1, op1 = tmp;
11855 code = swap_condition (code);
11859 op0 = force_reg (op_mode, op0);
11861 if (CONSTANT_P (op1))
11863 int tmp = standard_80387_constant_p (op1);
11865 op1 = validize_mem (force_const_mem (op_mode, op1));
11869 op1 = force_reg (op_mode, op1);
11872 op1 = force_reg (op_mode, op1);
11876 /* Try to rearrange the comparison to make it cheaper. */
11877 if (ix86_fp_comparison_cost (code)
11878 > ix86_fp_comparison_cost (swap_condition (code))
11879 && (REG_P (op1) || can_create_pseudo_p ()))
11882 tmp = op0, op0 = op1, op1 = tmp;
11883 code = swap_condition (code);
11885 op0 = force_reg (op_mode, op0);
11893 /* Convert comparison codes we use to represent FP comparison to integer
11894 code that will result in proper branch. Return UNKNOWN if no such code
11898 ix86_fp_compare_code_to_integer (enum rtx_code code)
11927 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11930 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11931 rtx *second_test, rtx *bypass_test)
11933 enum machine_mode fpcmp_mode, intcmp_mode;
11935 int cost = ix86_fp_comparison_cost (code);
11936 enum rtx_code bypass_code, first_code, second_code;
11938 fpcmp_mode = ix86_fp_compare_mode (code);
11939 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11942 *second_test = NULL_RTX;
11944 *bypass_test = NULL_RTX;
11946 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11948 /* Do fcomi/sahf based test when profitable. */
11949 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11950 && (bypass_code == UNKNOWN || bypass_test)
11951 && (second_code == UNKNOWN || second_test))
11953 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11954 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11960 gcc_assert (TARGET_SAHF);
11963 scratch = gen_reg_rtx (HImode);
11964 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11966 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11969 /* The FP codes work out to act like unsigned. */
11970 intcmp_mode = fpcmp_mode;
11972 if (bypass_code != UNKNOWN)
11973 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11974 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11976 if (second_code != UNKNOWN)
11977 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11978 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11983 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11984 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11985 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11987 scratch = gen_reg_rtx (HImode);
11988 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11990 /* In the unordered case, we have to check C2 for NaN's, which
11991 doesn't happen to work out to anything nice combination-wise.
11992 So do some bit twiddling on the value we've got in AH to come
11993 up with an appropriate set of condition codes. */
11995 intcmp_mode = CCNOmode;
12000 if (code == GT || !TARGET_IEEE_FP)
12002 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12007 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12008 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12009 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12010 intcmp_mode = CCmode;
12016 if (code == LT && TARGET_IEEE_FP)
12018 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12019 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12020 intcmp_mode = CCmode;
12025 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12031 if (code == GE || !TARGET_IEEE_FP)
12033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12038 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12039 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12046 if (code == LE && TARGET_IEEE_FP)
12048 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12049 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12050 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12051 intcmp_mode = CCmode;
12056 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12062 if (code == EQ && TARGET_IEEE_FP)
12064 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12065 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12066 intcmp_mode = CCmode;
12071 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12078 if (code == NE && TARGET_IEEE_FP)
12080 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12081 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12087 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12093 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12097 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12102 gcc_unreachable ();
12106 /* Return the test that should be put into the flags user, i.e.
12107 the bcc, scc, or cmov instruction. */
12108 return gen_rtx_fmt_ee (code, VOIDmode,
12109 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12114 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12117 op0 = ix86_compare_op0;
12118 op1 = ix86_compare_op1;
12121 *second_test = NULL_RTX;
12123 *bypass_test = NULL_RTX;
12125 if (ix86_compare_emitted)
12127 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12128 ix86_compare_emitted = NULL_RTX;
12130 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12132 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12133 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12134 second_test, bypass_test);
12137 ret = ix86_expand_int_compare (code, op0, op1);
12142 /* Return true if the CODE will result in nontrivial jump sequence. */
12144 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12146 enum rtx_code bypass_code, first_code, second_code;
12149 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12150 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12154 ix86_expand_branch (enum rtx_code code, rtx label)
12158 /* If we have emitted a compare insn, go straight to simple.
12159 ix86_expand_compare won't emit anything if ix86_compare_emitted
12161 if (ix86_compare_emitted)
12164 switch (GET_MODE (ix86_compare_op0))
12170 tmp = ix86_expand_compare (code, NULL, NULL);
12171 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12172 gen_rtx_LABEL_REF (VOIDmode, label),
12174 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12183 enum rtx_code bypass_code, first_code, second_code;
12185 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12186 &ix86_compare_op1);
12188 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12190 /* Check whether we will use the natural sequence with one jump. If
12191 so, we can expand jump early. Otherwise delay expansion by
12192 creating compound insn to not confuse optimizers. */
12193 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12195 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12196 gen_rtx_LABEL_REF (VOIDmode, label),
12197 pc_rtx, NULL_RTX, NULL_RTX);
12201 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12202 ix86_compare_op0, ix86_compare_op1);
12203 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12204 gen_rtx_LABEL_REF (VOIDmode, label),
12206 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12208 use_fcomi = ix86_use_fcomi_compare (code);
12209 vec = rtvec_alloc (3 + !use_fcomi);
12210 RTVEC_ELT (vec, 0) = tmp;
12212 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12214 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12217 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12219 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12228 /* Expand DImode branch into multiple compare+branch. */
12230 rtx lo[2], hi[2], label2;
12231 enum rtx_code code1, code2, code3;
12232 enum machine_mode submode;
12234 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12236 tmp = ix86_compare_op0;
12237 ix86_compare_op0 = ix86_compare_op1;
12238 ix86_compare_op1 = tmp;
12239 code = swap_condition (code);
12241 if (GET_MODE (ix86_compare_op0) == DImode)
12243 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12244 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12249 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12250 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12254 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12255 avoid two branches. This costs one extra insn, so disable when
12256 optimizing for size. */
12258 if ((code == EQ || code == NE)
12260 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12265 if (hi[1] != const0_rtx)
12266 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12267 NULL_RTX, 0, OPTAB_WIDEN);
12270 if (lo[1] != const0_rtx)
12271 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12272 NULL_RTX, 0, OPTAB_WIDEN);
12274 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12275 NULL_RTX, 0, OPTAB_WIDEN);
12277 ix86_compare_op0 = tmp;
12278 ix86_compare_op1 = const0_rtx;
12279 ix86_expand_branch (code, label);
12283 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12284 op1 is a constant and the low word is zero, then we can just
12285 examine the high word. Similarly for low word -1 and
12286 less-or-equal-than or greater-than. */
12288 if (CONST_INT_P (hi[1]))
12291 case LT: case LTU: case GE: case GEU:
12292 if (lo[1] == const0_rtx)
12294 ix86_compare_op0 = hi[0];
12295 ix86_compare_op1 = hi[1];
12296 ix86_expand_branch (code, label);
12300 case LE: case LEU: case GT: case GTU:
12301 if (lo[1] == constm1_rtx)
12303 ix86_compare_op0 = hi[0];
12304 ix86_compare_op1 = hi[1];
12305 ix86_expand_branch (code, label);
12313 /* Otherwise, we need two or three jumps. */
12315 label2 = gen_label_rtx ();
12318 code2 = swap_condition (code);
12319 code3 = unsigned_condition (code);
12323 case LT: case GT: case LTU: case GTU:
12326 case LE: code1 = LT; code2 = GT; break;
12327 case GE: code1 = GT; code2 = LT; break;
12328 case LEU: code1 = LTU; code2 = GTU; break;
12329 case GEU: code1 = GTU; code2 = LTU; break;
12331 case EQ: code1 = UNKNOWN; code2 = NE; break;
12332 case NE: code2 = UNKNOWN; break;
12335 gcc_unreachable ();
12340 * if (hi(a) < hi(b)) goto true;
12341 * if (hi(a) > hi(b)) goto false;
12342 * if (lo(a) < lo(b)) goto true;
12346 ix86_compare_op0 = hi[0];
12347 ix86_compare_op1 = hi[1];
12349 if (code1 != UNKNOWN)
12350 ix86_expand_branch (code1, label);
12351 if (code2 != UNKNOWN)
12352 ix86_expand_branch (code2, label2);
12354 ix86_compare_op0 = lo[0];
12355 ix86_compare_op1 = lo[1];
12356 ix86_expand_branch (code3, label);
12358 if (code2 != UNKNOWN)
12359 emit_label (label2);
12364 gcc_unreachable ();
12368 /* Split branch based on floating point condition. */
12370 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12371 rtx target1, rtx target2, rtx tmp, rtx pushed)
12373 rtx second, bypass;
12374 rtx label = NULL_RTX;
12376 int bypass_probability = -1, second_probability = -1, probability = -1;
12379 if (target2 != pc_rtx)
12382 code = reverse_condition_maybe_unordered (code);
12387 condition = ix86_expand_fp_compare (code, op1, op2,
12388 tmp, &second, &bypass);
12390 /* Remove pushed operand from stack. */
12392 ix86_free_from_memory (GET_MODE (pushed));
12394 if (split_branch_probability >= 0)
12396 /* Distribute the probabilities across the jumps.
12397 Assume the BYPASS and SECOND to be always test
12399 probability = split_branch_probability;
12401 /* Value of 1 is low enough to make no need for probability
12402 to be updated. Later we may run some experiments and see
12403 if unordered values are more frequent in practice. */
12405 bypass_probability = 1;
12407 second_probability = 1;
12409 if (bypass != NULL_RTX)
12411 label = gen_label_rtx ();
12412 i = emit_jump_insn (gen_rtx_SET
12414 gen_rtx_IF_THEN_ELSE (VOIDmode,
12416 gen_rtx_LABEL_REF (VOIDmode,
12419 if (bypass_probability >= 0)
12421 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12422 GEN_INT (bypass_probability),
12425 i = emit_jump_insn (gen_rtx_SET
12427 gen_rtx_IF_THEN_ELSE (VOIDmode,
12428 condition, target1, target2)));
12429 if (probability >= 0)
12431 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12432 GEN_INT (probability),
12434 if (second != NULL_RTX)
12436 i = emit_jump_insn (gen_rtx_SET
12438 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12440 if (second_probability >= 0)
12442 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12443 GEN_INT (second_probability),
12446 if (label != NULL_RTX)
12447 emit_label (label);
12451 ix86_expand_setcc (enum rtx_code code, rtx dest)
12453 rtx ret, tmp, tmpreg, equiv;
12454 rtx second_test, bypass_test;
12456 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12457 return 0; /* FAIL */
12459 gcc_assert (GET_MODE (dest) == QImode);
12461 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12462 PUT_MODE (ret, QImode);
12467 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12468 if (bypass_test || second_test)
12470 rtx test = second_test;
12472 rtx tmp2 = gen_reg_rtx (QImode);
12475 gcc_assert (!second_test);
12476 test = bypass_test;
12478 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12480 PUT_MODE (test, QImode);
12481 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12484 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12486 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12489 /* Attach a REG_EQUAL note describing the comparison result. */
12490 if (ix86_compare_op0 && ix86_compare_op1)
12492 equiv = simplify_gen_relational (code, QImode,
12493 GET_MODE (ix86_compare_op0),
12494 ix86_compare_op0, ix86_compare_op1);
12495 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12498 return 1; /* DONE */
12501 /* Expand comparison setting or clearing carry flag. Return true when
12502 successful and set pop for the operation. */
12504 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12506 enum machine_mode mode =
12507 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12509 /* Do not handle DImode compares that go through special path. */
12510 if (mode == (TARGET_64BIT ? TImode : DImode))
12513 if (SCALAR_FLOAT_MODE_P (mode))
12515 rtx second_test = NULL, bypass_test = NULL;
12516 rtx compare_op, compare_seq;
12518 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12520 /* Shortcut: following common codes never translate
12521 into carry flag compares. */
12522 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12523 || code == ORDERED || code == UNORDERED)
12526 /* These comparisons require zero flag; swap operands so they won't. */
12527 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12528 && !TARGET_IEEE_FP)
12533 code = swap_condition (code);
12536 /* Try to expand the comparison and verify that we end up with
12537 carry flag based comparison. This fails to be true only when
12538 we decide to expand comparison using arithmetic that is not
12539 too common scenario. */
12541 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12542 &second_test, &bypass_test);
12543 compare_seq = get_insns ();
12546 if (second_test || bypass_test)
12549 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12550 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12551 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12553 code = GET_CODE (compare_op);
12555 if (code != LTU && code != GEU)
12558 emit_insn (compare_seq);
12563 if (!INTEGRAL_MODE_P (mode))
12572 /* Convert a==0 into (unsigned)a<1. */
12575 if (op1 != const0_rtx)
12578 code = (code == EQ ? LTU : GEU);
12581 /* Convert a>b into b<a or a>=b-1. */
12584 if (CONST_INT_P (op1))
12586 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12587 /* Bail out on overflow. We still can swap operands but that
12588 would force loading of the constant into register. */
12589 if (op1 == const0_rtx
12590 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12592 code = (code == GTU ? GEU : LTU);
12599 code = (code == GTU ? LTU : GEU);
12603 /* Convert a>=0 into (unsigned)a<0x80000000. */
12606 if (mode == DImode || op1 != const0_rtx)
12608 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12609 code = (code == LT ? GEU : LTU);
12613 if (mode == DImode || op1 != constm1_rtx)
12615 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12616 code = (code == LE ? GEU : LTU);
12622 /* Swapping operands may cause constant to appear as first operand. */
12623 if (!nonimmediate_operand (op0, VOIDmode))
12625 if (!can_create_pseudo_p ())
12627 op0 = force_reg (mode, op0);
12629 ix86_compare_op0 = op0;
12630 ix86_compare_op1 = op1;
12631 *pop = ix86_expand_compare (code, NULL, NULL);
12632 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12637 ix86_expand_int_movcc (rtx operands[])
12639 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12640 rtx compare_seq, compare_op;
12641 rtx second_test, bypass_test;
12642 enum machine_mode mode = GET_MODE (operands[0]);
12643 bool sign_bit_compare_p = false;;
12646 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12647 compare_seq = get_insns ();
12650 compare_code = GET_CODE (compare_op);
12652 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12653 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12654 sign_bit_compare_p = true;
12656 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12657 HImode insns, we'd be swallowed in word prefix ops. */
12659 if ((mode != HImode || TARGET_FAST_PREFIX)
12660 && (mode != (TARGET_64BIT ? TImode : DImode))
12661 && CONST_INT_P (operands[2])
12662 && CONST_INT_P (operands[3]))
12664 rtx out = operands[0];
12665 HOST_WIDE_INT ct = INTVAL (operands[2]);
12666 HOST_WIDE_INT cf = INTVAL (operands[3]);
12667 HOST_WIDE_INT diff;
12670 /* Sign bit compares are better done using shifts than we do by using
12672 if (sign_bit_compare_p
12673 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12674 ix86_compare_op1, &compare_op))
12676 /* Detect overlap between destination and compare sources. */
12679 if (!sign_bit_compare_p)
12681 bool fpcmp = false;
12683 compare_code = GET_CODE (compare_op);
12685 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12686 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12689 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12692 /* To simplify rest of code, restrict to the GEU case. */
12693 if (compare_code == LTU)
12695 HOST_WIDE_INT tmp = ct;
12698 compare_code = reverse_condition (compare_code);
12699 code = reverse_condition (code);
12704 PUT_CODE (compare_op,
12705 reverse_condition_maybe_unordered
12706 (GET_CODE (compare_op)));
12708 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12712 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12713 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12714 tmp = gen_reg_rtx (mode);
12716 if (mode == DImode)
12717 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12719 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12723 if (code == GT || code == GE)
12724 code = reverse_condition (code);
12727 HOST_WIDE_INT tmp = ct;
12732 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12733 ix86_compare_op1, VOIDmode, 0, -1);
12746 tmp = expand_simple_binop (mode, PLUS,
12748 copy_rtx (tmp), 1, OPTAB_DIRECT);
12759 tmp = expand_simple_binop (mode, IOR,
12761 copy_rtx (tmp), 1, OPTAB_DIRECT);
12763 else if (diff == -1 && ct)
12773 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12775 tmp = expand_simple_binop (mode, PLUS,
12776 copy_rtx (tmp), GEN_INT (cf),
12777 copy_rtx (tmp), 1, OPTAB_DIRECT);
12785 * andl cf - ct, dest
12795 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12798 tmp = expand_simple_binop (mode, AND,
12800 gen_int_mode (cf - ct, mode),
12801 copy_rtx (tmp), 1, OPTAB_DIRECT);
12803 tmp = expand_simple_binop (mode, PLUS,
12804 copy_rtx (tmp), GEN_INT (ct),
12805 copy_rtx (tmp), 1, OPTAB_DIRECT);
12808 if (!rtx_equal_p (tmp, out))
12809 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12811 return 1; /* DONE */
12816 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12819 tmp = ct, ct = cf, cf = tmp;
12822 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12824 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12826 /* We may be reversing unordered compare to normal compare, that
12827 is not valid in general (we may convert non-trapping condition
12828 to trapping one), however on i386 we currently emit all
12829 comparisons unordered. */
12830 compare_code = reverse_condition_maybe_unordered (compare_code);
12831 code = reverse_condition_maybe_unordered (code);
12835 compare_code = reverse_condition (compare_code);
12836 code = reverse_condition (code);
12840 compare_code = UNKNOWN;
12841 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12842 && CONST_INT_P (ix86_compare_op1))
12844 if (ix86_compare_op1 == const0_rtx
12845 && (code == LT || code == GE))
12846 compare_code = code;
12847 else if (ix86_compare_op1 == constm1_rtx)
12851 else if (code == GT)
12856 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12857 if (compare_code != UNKNOWN
12858 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12859 && (cf == -1 || ct == -1))
12861 /* If lea code below could be used, only optimize
12862 if it results in a 2 insn sequence. */
12864 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12865 || diff == 3 || diff == 5 || diff == 9)
12866 || (compare_code == LT && ct == -1)
12867 || (compare_code == GE && cf == -1))
12870 * notl op1 (if necessary)
12878 code = reverse_condition (code);
12881 out = emit_store_flag (out, code, ix86_compare_op0,
12882 ix86_compare_op1, VOIDmode, 0, -1);
12884 out = expand_simple_binop (mode, IOR,
12886 out, 1, OPTAB_DIRECT);
12887 if (out != operands[0])
12888 emit_move_insn (operands[0], out);
12890 return 1; /* DONE */
12895 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12896 || diff == 3 || diff == 5 || diff == 9)
12897 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12899 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12905 * lea cf(dest*(ct-cf)),dest
12909 * This also catches the degenerate setcc-only case.
12915 out = emit_store_flag (out, code, ix86_compare_op0,
12916 ix86_compare_op1, VOIDmode, 0, 1);
12919 /* On x86_64 the lea instruction operates on Pmode, so we need
12920 to get arithmetics done in proper mode to match. */
12922 tmp = copy_rtx (out);
12926 out1 = copy_rtx (out);
12927 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12931 tmp = gen_rtx_PLUS (mode, tmp, out1);
12937 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12940 if (!rtx_equal_p (tmp, out))
12943 out = force_operand (tmp, copy_rtx (out));
12945 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12947 if (!rtx_equal_p (out, operands[0]))
12948 emit_move_insn (operands[0], copy_rtx (out));
12950 return 1; /* DONE */
12954 * General case: Jumpful:
12955 * xorl dest,dest cmpl op1, op2
12956 * cmpl op1, op2 movl ct, dest
12957 * setcc dest jcc 1f
12958 * decl dest movl cf, dest
12959 * andl (cf-ct),dest 1:
12962 * Size 20. Size 14.
12964 * This is reasonably steep, but branch mispredict costs are
12965 * high on modern cpus, so consider failing only if optimizing
12969 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12970 && BRANCH_COST >= 2)
12974 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12979 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12981 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12983 /* We may be reversing unordered compare to normal compare,
12984 that is not valid in general (we may convert non-trapping
12985 condition to trapping one), however on i386 we currently
12986 emit all comparisons unordered. */
12987 code = reverse_condition_maybe_unordered (code);
12991 code = reverse_condition (code);
12992 if (compare_code != UNKNOWN)
12993 compare_code = reverse_condition (compare_code);
12997 if (compare_code != UNKNOWN)
12999 /* notl op1 (if needed)
13004 For x < 0 (resp. x <= -1) there will be no notl,
13005 so if possible swap the constants to get rid of the
13007 True/false will be -1/0 while code below (store flag
13008 followed by decrement) is 0/-1, so the constants need
13009 to be exchanged once more. */
13011 if (compare_code == GE || !cf)
13013 code = reverse_condition (code);
13018 HOST_WIDE_INT tmp = cf;
13023 out = emit_store_flag (out, code, ix86_compare_op0,
13024 ix86_compare_op1, VOIDmode, 0, -1);
13028 out = emit_store_flag (out, code, ix86_compare_op0,
13029 ix86_compare_op1, VOIDmode, 0, 1);
13031 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13032 copy_rtx (out), 1, OPTAB_DIRECT);
13035 out = expand_simple_binop (mode, AND, copy_rtx (out),
13036 gen_int_mode (cf - ct, mode),
13037 copy_rtx (out), 1, OPTAB_DIRECT);
13039 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13040 copy_rtx (out), 1, OPTAB_DIRECT);
13041 if (!rtx_equal_p (out, operands[0]))
13042 emit_move_insn (operands[0], copy_rtx (out));
13044 return 1; /* DONE */
13048 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13050 /* Try a few things more with specific constants and a variable. */
13053 rtx var, orig_out, out, tmp;
13055 if (BRANCH_COST <= 2)
13056 return 0; /* FAIL */
13058 /* If one of the two operands is an interesting constant, load a
13059 constant with the above and mask it in with a logical operation. */
13061 if (CONST_INT_P (operands[2]))
13064 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13065 operands[3] = constm1_rtx, op = and_optab;
13066 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13067 operands[3] = const0_rtx, op = ior_optab;
13069 return 0; /* FAIL */
13071 else if (CONST_INT_P (operands[3]))
13074 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13075 operands[2] = constm1_rtx, op = and_optab;
13076 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13077 operands[2] = const0_rtx, op = ior_optab;
13079 return 0; /* FAIL */
13082 return 0; /* FAIL */
13084 orig_out = operands[0];
13085 tmp = gen_reg_rtx (mode);
13088 /* Recurse to get the constant loaded. */
13089 if (ix86_expand_int_movcc (operands) == 0)
13090 return 0; /* FAIL */
13092 /* Mask in the interesting variable. */
13093 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13095 if (!rtx_equal_p (out, orig_out))
13096 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13098 return 1; /* DONE */
13102 * For comparison with above,
13112 if (! nonimmediate_operand (operands[2], mode))
13113 operands[2] = force_reg (mode, operands[2]);
13114 if (! nonimmediate_operand (operands[3], mode))
13115 operands[3] = force_reg (mode, operands[3]);
13117 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13119 rtx tmp = gen_reg_rtx (mode);
13120 emit_move_insn (tmp, operands[3]);
13123 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13125 rtx tmp = gen_reg_rtx (mode);
13126 emit_move_insn (tmp, operands[2]);
13130 if (! register_operand (operands[2], VOIDmode)
13132 || ! register_operand (operands[3], VOIDmode)))
13133 operands[2] = force_reg (mode, operands[2]);
13136 && ! register_operand (operands[3], VOIDmode))
13137 operands[3] = force_reg (mode, operands[3]);
13139 emit_insn (compare_seq);
13140 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13141 gen_rtx_IF_THEN_ELSE (mode,
13142 compare_op, operands[2],
13145 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13146 gen_rtx_IF_THEN_ELSE (mode,
13148 copy_rtx (operands[3]),
13149 copy_rtx (operands[0]))));
13151 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13152 gen_rtx_IF_THEN_ELSE (mode,
13154 copy_rtx (operands[2]),
13155 copy_rtx (operands[0]))));
13157 return 1; /* DONE */
13160 /* Swap, force into registers, or otherwise massage the two operands
13161 to an sse comparison with a mask result. Thus we differ a bit from
13162 ix86_prepare_fp_compare_args which expects to produce a flags result.
13164 The DEST operand exists to help determine whether to commute commutative
13165 operators. The POP0/POP1 operands are updated in place. The new
13166 comparison code is returned, or UNKNOWN if not implementable. */
13168 static enum rtx_code
13169 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13170 rtx *pop0, rtx *pop1)
13178 /* We have no LTGT as an operator. We could implement it with
13179 NE & ORDERED, but this requires an extra temporary. It's
13180 not clear that it's worth it. */
13187 /* These are supported directly. */
13194 /* For commutative operators, try to canonicalize the destination
13195 operand to be first in the comparison - this helps reload to
13196 avoid extra moves. */
13197 if (!dest || !rtx_equal_p (dest, *pop1))
13205 /* These are not supported directly. Swap the comparison operands
13206 to transform into something that is supported. */
13210 code = swap_condition (code);
13214 gcc_unreachable ();
13220 /* Detect conditional moves that exactly match min/max operational
13221 semantics. Note that this is IEEE safe, as long as we don't
13222 interchange the operands.
13224 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13225 and TRUE if the operation is successful and instructions are emitted. */
13228 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13229 rtx cmp_op1, rtx if_true, rtx if_false)
13231 enum machine_mode mode;
13237 else if (code == UNGE)
13240 if_true = if_false;
13246 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13248 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13253 mode = GET_MODE (dest);
13255 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13256 but MODE may be a vector mode and thus not appropriate. */
13257 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13259 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13262 if_true = force_reg (mode, if_true);
13263 v = gen_rtvec (2, if_true, if_false);
13264 tmp = gen_rtx_UNSPEC (mode, v, u);
13268 code = is_min ? SMIN : SMAX;
13269 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13272 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13276 /* Expand an sse vector comparison. Return the register with the result. */
13279 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13280 rtx op_true, rtx op_false)
13282 enum machine_mode mode = GET_MODE (dest);
13285 cmp_op0 = force_reg (mode, cmp_op0);
13286 if (!nonimmediate_operand (cmp_op1, mode))
13287 cmp_op1 = force_reg (mode, cmp_op1);
13290 || reg_overlap_mentioned_p (dest, op_true)
13291 || reg_overlap_mentioned_p (dest, op_false))
13292 dest = gen_reg_rtx (mode);
13294 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13295 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13300 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13301 operations. This is used for both scalar and vector conditional moves. */
13304 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13306 enum machine_mode mode = GET_MODE (dest);
13311 rtx pcmov = gen_rtx_SET (mode, dest,
13312 gen_rtx_IF_THEN_ELSE (mode, cmp,
13317 else if (op_false == CONST0_RTX (mode))
13319 op_true = force_reg (mode, op_true);
13320 x = gen_rtx_AND (mode, cmp, op_true);
13321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13323 else if (op_true == CONST0_RTX (mode))
13325 op_false = force_reg (mode, op_false);
13326 x = gen_rtx_NOT (mode, cmp);
13327 x = gen_rtx_AND (mode, x, op_false);
13328 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13332 op_true = force_reg (mode, op_true);
13333 op_false = force_reg (mode, op_false);
13335 t2 = gen_reg_rtx (mode);
13337 t3 = gen_reg_rtx (mode);
13341 x = gen_rtx_AND (mode, op_true, cmp);
13342 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13344 x = gen_rtx_NOT (mode, cmp);
13345 x = gen_rtx_AND (mode, x, op_false);
13346 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13348 x = gen_rtx_IOR (mode, t3, t2);
13349 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13353 /* Expand a floating-point conditional move. Return true if successful. */
13356 ix86_expand_fp_movcc (rtx operands[])
13358 enum machine_mode mode = GET_MODE (operands[0]);
13359 enum rtx_code code = GET_CODE (operands[1]);
13360 rtx tmp, compare_op, second_test, bypass_test;
13362 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13364 enum machine_mode cmode;
13366 /* Since we've no cmove for sse registers, don't force bad register
13367 allocation just to gain access to it. Deny movcc when the
13368 comparison mode doesn't match the move mode. */
13369 cmode = GET_MODE (ix86_compare_op0);
13370 if (cmode == VOIDmode)
13371 cmode = GET_MODE (ix86_compare_op1);
13375 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13377 &ix86_compare_op1);
13378 if (code == UNKNOWN)
13381 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13382 ix86_compare_op1, operands[2],
13386 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13387 ix86_compare_op1, operands[2], operands[3]);
13388 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13392 /* The floating point conditional move instructions don't directly
13393 support conditions resulting from a signed integer comparison. */
13395 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13397 /* The floating point conditional move instructions don't directly
13398 support signed integer comparisons. */
13400 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13402 gcc_assert (!second_test && !bypass_test);
13403 tmp = gen_reg_rtx (QImode);
13404 ix86_expand_setcc (code, tmp);
13406 ix86_compare_op0 = tmp;
13407 ix86_compare_op1 = const0_rtx;
13408 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13410 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13412 tmp = gen_reg_rtx (mode);
13413 emit_move_insn (tmp, operands[3]);
13416 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13418 tmp = gen_reg_rtx (mode);
13419 emit_move_insn (tmp, operands[2]);
13423 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13424 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13425 operands[2], operands[3])));
13427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13428 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13429 operands[3], operands[0])));
13431 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13432 gen_rtx_IF_THEN_ELSE (mode, second_test,
13433 operands[2], operands[0])));
13438 /* Expand a floating-point vector conditional move; a vcond operation
13439 rather than a movcc operation. */
13442 ix86_expand_fp_vcond (rtx operands[])
13444 enum rtx_code code = GET_CODE (operands[3]);
13447 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13448 &operands[4], &operands[5]);
13449 if (code == UNKNOWN)
13452 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13453 operands[5], operands[1], operands[2]))
13456 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13457 operands[1], operands[2]);
13458 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13462 /* Expand a signed/unsigned integral vector conditional move. */
13465 ix86_expand_int_vcond (rtx operands[])
13467 enum machine_mode mode = GET_MODE (operands[0]);
13468 enum rtx_code code = GET_CODE (operands[3]);
13469 bool negate = false;
13472 cop0 = operands[4];
13473 cop1 = operands[5];
13475 /* Canonicalize the comparison to EQ, GT, GTU. */
13486 code = reverse_condition (code);
13492 code = reverse_condition (code);
13498 code = swap_condition (code);
13499 x = cop0, cop0 = cop1, cop1 = x;
13503 gcc_unreachable ();
13506 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13507 if (mode == V2DImode)
13512 /* SSE4.1 supports EQ. */
13513 if (!TARGET_SSE4_1)
13519 /* SSE4.2 supports GT/GTU. */
13520 if (!TARGET_SSE4_2)
13525 gcc_unreachable ();
13529 /* Unsigned parallel compare is not supported by the hardware. Play some
13530 tricks to turn this into a signed comparison against 0. */
13533 cop0 = force_reg (mode, cop0);
13542 /* Perform a parallel modulo subtraction. */
13543 t1 = gen_reg_rtx (mode);
13544 emit_insn ((mode == V4SImode
13546 : gen_subv2di3) (t1, cop0, cop1));
13548 /* Extract the original sign bit of op0. */
13549 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13551 t2 = gen_reg_rtx (mode);
13552 emit_insn ((mode == V4SImode
13554 : gen_andv2di3) (t2, cop0, mask));
13556 /* XOR it back into the result of the subtraction. This results
13557 in the sign bit set iff we saw unsigned underflow. */
13558 x = gen_reg_rtx (mode);
13559 emit_insn ((mode == V4SImode
13561 : gen_xorv2di3) (x, t1, t2));
13569 /* Perform a parallel unsigned saturating subtraction. */
13570 x = gen_reg_rtx (mode);
13571 emit_insn (gen_rtx_SET (VOIDmode, x,
13572 gen_rtx_US_MINUS (mode, cop0, cop1)));
13579 gcc_unreachable ();
13583 cop1 = CONST0_RTX (mode);
13586 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13587 operands[1+negate], operands[2-negate]);
13589 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13590 operands[2-negate]);
13594 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13595 true if we should do zero extension, else sign extension. HIGH_P is
13596 true if we want the N/2 high elements, else the low elements. */
13599 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13601 enum machine_mode imode = GET_MODE (operands[1]);
13602 rtx (*unpack)(rtx, rtx, rtx);
13609 unpack = gen_vec_interleave_highv16qi;
13611 unpack = gen_vec_interleave_lowv16qi;
13615 unpack = gen_vec_interleave_highv8hi;
13617 unpack = gen_vec_interleave_lowv8hi;
13621 unpack = gen_vec_interleave_highv4si;
13623 unpack = gen_vec_interleave_lowv4si;
13626 gcc_unreachable ();
13629 dest = gen_lowpart (imode, operands[0]);
13632 se = force_reg (imode, CONST0_RTX (imode));
13634 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13635 operands[1], pc_rtx, pc_rtx);
13637 emit_insn (unpack (dest, operands[1], se));
13640 /* This function performs the same task as ix86_expand_sse_unpack,
13641 but with SSE4.1 instructions. */
13644 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13646 enum machine_mode imode = GET_MODE (operands[1]);
13647 rtx (*unpack)(rtx, rtx);
13654 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13656 unpack = gen_sse4_1_extendv8qiv8hi2;
13660 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13662 unpack = gen_sse4_1_extendv4hiv4si2;
13666 unpack = gen_sse4_1_zero_extendv2siv2di2;
13668 unpack = gen_sse4_1_extendv2siv2di2;
13671 gcc_unreachable ();
13674 dest = operands[0];
13677 /* Shift higher 8 bytes to lower 8 bytes. */
13678 src = gen_reg_rtx (imode);
13679 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13680 gen_lowpart (TImode, operands[1]),
13686 emit_insn (unpack (dest, src));
13689 /* This function performs the same task as ix86_expand_sse_unpack,
13690 but with amdfam15 instructions. */
13692 #define PPERM_SRC 0x00 /* copy source */
13693 #define PPERM_INVERT 0x20 /* invert source */
13694 #define PPERM_REVERSE 0x40 /* bit reverse source */
13695 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13696 #define PPERM_ZERO 0x80 /* all 0's */
13697 #define PPERM_ONES 0xa0 /* all 1's */
13698 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13699 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13701 #define PPERM_SRC1 0x00 /* use first source byte */
13702 #define PPERM_SRC2 0x10 /* use second source byte */
13705 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13707 enum machine_mode imode = GET_MODE (operands[1]);
13708 int pperm_bytes[16];
13710 int h = (high_p) ? 8 : 0;
13713 rtvec v = rtvec_alloc (16);
13716 rtx op0 = operands[0], op1 = operands[1];
13721 vs = rtvec_alloc (8);
13722 h2 = (high_p) ? 8 : 0;
13723 for (i = 0; i < 8; i++)
13725 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13726 pperm_bytes[2*i+1] = ((unsigned_p)
13728 : PPERM_SIGN | PPERM_SRC2 | i | h);
13731 for (i = 0; i < 16; i++)
13732 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13734 for (i = 0; i < 8; i++)
13735 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13737 p = gen_rtx_PARALLEL (VOIDmode, vs);
13738 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13740 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13742 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13746 vs = rtvec_alloc (4);
13747 h2 = (high_p) ? 4 : 0;
13748 for (i = 0; i < 4; i++)
13750 sign_extend = ((unsigned_p)
13752 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13753 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13754 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13755 pperm_bytes[4*i+2] = sign_extend;
13756 pperm_bytes[4*i+3] = sign_extend;
13759 for (i = 0; i < 16; i++)
13760 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13762 for (i = 0; i < 4; i++)
13763 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13765 p = gen_rtx_PARALLEL (VOIDmode, vs);
13766 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13768 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13770 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13774 vs = rtvec_alloc (2);
13775 h2 = (high_p) ? 2 : 0;
13776 for (i = 0; i < 2; i++)
13778 sign_extend = ((unsigned_p)
13780 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13781 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13782 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13783 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13784 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13785 pperm_bytes[8*i+4] = sign_extend;
13786 pperm_bytes[8*i+5] = sign_extend;
13787 pperm_bytes[8*i+6] = sign_extend;
13788 pperm_bytes[8*i+7] = sign_extend;
13791 for (i = 0; i < 16; i++)
13792 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13794 for (i = 0; i < 2; i++)
13795 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13797 p = gen_rtx_PARALLEL (VOIDmode, vs);
13798 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13800 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13802 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13806 gcc_unreachable ();
13812 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13813 next narrower integer vector type */
13815 ix86_expand_sse5_pack (rtx operands[3])
13817 enum machine_mode imode = GET_MODE (operands[0]);
13818 int pperm_bytes[16];
13820 rtvec v = rtvec_alloc (16);
13822 rtx op0 = operands[0];
13823 rtx op1 = operands[1];
13824 rtx op2 = operands[2];
13829 for (i = 0; i < 8; i++)
13831 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13832 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13835 for (i = 0; i < 16; i++)
13836 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13838 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13839 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13843 for (i = 0; i < 4; i++)
13845 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13846 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13847 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13848 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13851 for (i = 0; i < 16; i++)
13852 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13854 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13855 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13859 for (i = 0; i < 2; i++)
13861 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13862 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13863 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13864 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13865 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13866 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13867 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13868 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13871 for (i = 0; i < 16; i++)
13872 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13874 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13875 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13879 gcc_unreachable ();
13885 /* Expand conditional increment or decrement using adb/sbb instructions.
13886 The default case using setcc followed by the conditional move can be
13887 done by generic code. */
13889 ix86_expand_int_addcc (rtx operands[])
13891 enum rtx_code code = GET_CODE (operands[1]);
13893 rtx val = const0_rtx;
13894 bool fpcmp = false;
13895 enum machine_mode mode = GET_MODE (operands[0]);
13897 if (operands[3] != const1_rtx
13898 && operands[3] != constm1_rtx)
13900 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13901 ix86_compare_op1, &compare_op))
13903 code = GET_CODE (compare_op);
13905 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13906 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13909 code = ix86_fp_compare_code_to_integer (code);
13916 PUT_CODE (compare_op,
13917 reverse_condition_maybe_unordered
13918 (GET_CODE (compare_op)));
13920 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13922 PUT_MODE (compare_op, mode);
13924 /* Construct either adc or sbb insn. */
13925 if ((code == LTU) == (operands[3] == constm1_rtx))
13927 switch (GET_MODE (operands[0]))
13930 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13933 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13936 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13939 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13942 gcc_unreachable ();
13947 switch (GET_MODE (operands[0]))
13950 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13953 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13956 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13959 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13962 gcc_unreachable ();
13965 return 1; /* DONE */
13969 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13970 works for floating pointer parameters and nonoffsetable memories.
13971 For pushes, it returns just stack offsets; the values will be saved
13972 in the right order. Maximally three parts are generated. */
13975 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13980 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13982 size = (GET_MODE_SIZE (mode) + 4) / 8;
13984 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13985 gcc_assert (size >= 2 && size <= 3);
13987 /* Optimize constant pool reference to immediates. This is used by fp
13988 moves, that force all constants to memory to allow combining. */
13989 if (MEM_P (operand) && MEM_READONLY_P (operand))
13991 rtx tmp = maybe_get_pool_constant (operand);
13996 if (MEM_P (operand) && !offsettable_memref_p (operand))
13998 /* The only non-offsetable memories we handle are pushes. */
13999 int ok = push_operand (operand, VOIDmode);
14003 operand = copy_rtx (operand);
14004 PUT_MODE (operand, Pmode);
14005 parts[0] = parts[1] = parts[2] = operand;
14009 if (GET_CODE (operand) == CONST_VECTOR)
14011 enum machine_mode imode = int_mode_for_mode (mode);
14012 /* Caution: if we looked through a constant pool memory above,
14013 the operand may actually have a different mode now. That's
14014 ok, since we want to pun this all the way back to an integer. */
14015 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14016 gcc_assert (operand != NULL);
14022 if (mode == DImode)
14023 split_di (&operand, 1, &parts[0], &parts[1]);
14026 if (REG_P (operand))
14028 gcc_assert (reload_completed);
14029 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
14030 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
14032 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
14034 else if (offsettable_memref_p (operand))
14036 operand = adjust_address (operand, SImode, 0);
14037 parts[0] = operand;
14038 parts[1] = adjust_address (operand, SImode, 4);
14040 parts[2] = adjust_address (operand, SImode, 8);
14042 else if (GET_CODE (operand) == CONST_DOUBLE)
14047 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14051 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14052 parts[2] = gen_int_mode (l[2], SImode);
14055 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14058 gcc_unreachable ();
14060 parts[1] = gen_int_mode (l[1], SImode);
14061 parts[0] = gen_int_mode (l[0], SImode);
14064 gcc_unreachable ();
14069 if (mode == TImode)
14070 split_ti (&operand, 1, &parts[0], &parts[1]);
14071 if (mode == XFmode || mode == TFmode)
14073 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14074 if (REG_P (operand))
14076 gcc_assert (reload_completed);
14077 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14078 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14080 else if (offsettable_memref_p (operand))
14082 operand = adjust_address (operand, DImode, 0);
14083 parts[0] = operand;
14084 parts[1] = adjust_address (operand, upper_mode, 8);
14086 else if (GET_CODE (operand) == CONST_DOUBLE)
14091 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14092 real_to_target (l, &r, mode);
14094 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14095 if (HOST_BITS_PER_WIDE_INT >= 64)
14098 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14099 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14102 parts[0] = immed_double_const (l[0], l[1], DImode);
14104 if (upper_mode == SImode)
14105 parts[1] = gen_int_mode (l[2], SImode);
14106 else if (HOST_BITS_PER_WIDE_INT >= 64)
14109 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14110 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14113 parts[1] = immed_double_const (l[2], l[3], DImode);
14116 gcc_unreachable ();
14123 /* Emit insns to perform a move or push of DI, DF, and XF values.
14124 Return false when normal moves are needed; true when all required
14125 insns have been emitted. Operands 2-4 contain the input values
14126 int the correct order; operands 5-7 contain the output values. */
14129 ix86_split_long_move (rtx operands[])
14134 int collisions = 0;
14135 enum machine_mode mode = GET_MODE (operands[0]);
14137 /* The DFmode expanders may ask us to move double.
14138 For 64bit target this is single move. By hiding the fact
14139 here we simplify i386.md splitters. */
14140 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14142 /* Optimize constant pool reference to immediates. This is used by
14143 fp moves, that force all constants to memory to allow combining. */
14145 if (MEM_P (operands[1])
14146 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14147 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14148 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14149 if (push_operand (operands[0], VOIDmode))
14151 operands[0] = copy_rtx (operands[0]);
14152 PUT_MODE (operands[0], Pmode);
14155 operands[0] = gen_lowpart (DImode, operands[0]);
14156 operands[1] = gen_lowpart (DImode, operands[1]);
14157 emit_move_insn (operands[0], operands[1]);
14161 /* The only non-offsettable memory we handle is push. */
14162 if (push_operand (operands[0], VOIDmode))
14165 gcc_assert (!MEM_P (operands[0])
14166 || offsettable_memref_p (operands[0]));
14168 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14169 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14171 /* When emitting push, take care for source operands on the stack. */
14172 if (push && MEM_P (operands[1])
14173 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14176 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14177 XEXP (part[1][2], 0));
14178 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14179 XEXP (part[1][1], 0));
14182 /* We need to do copy in the right order in case an address register
14183 of the source overlaps the destination. */
14184 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14186 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14188 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14191 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14194 /* Collision in the middle part can be handled by reordering. */
14195 if (collisions == 1 && nparts == 3
14196 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14199 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14200 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14203 /* If there are more collisions, we can't handle it by reordering.
14204 Do an lea to the last part and use only one colliding move. */
14205 else if (collisions > 1)
14211 base = part[0][nparts - 1];
14213 /* Handle the case when the last part isn't valid for lea.
14214 Happens in 64-bit mode storing the 12-byte XFmode. */
14215 if (GET_MODE (base) != Pmode)
14216 base = gen_rtx_REG (Pmode, REGNO (base));
14218 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14219 part[1][0] = replace_equiv_address (part[1][0], base);
14220 part[1][1] = replace_equiv_address (part[1][1],
14221 plus_constant (base, UNITS_PER_WORD));
14223 part[1][2] = replace_equiv_address (part[1][2],
14224 plus_constant (base, 8));
14234 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14235 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14236 emit_move_insn (part[0][2], part[1][2]);
14241 /* In 64bit mode we don't have 32bit push available. In case this is
14242 register, it is OK - we will just use larger counterpart. We also
14243 retype memory - these comes from attempt to avoid REX prefix on
14244 moving of second half of TFmode value. */
14245 if (GET_MODE (part[1][1]) == SImode)
14247 switch (GET_CODE (part[1][1]))
14250 part[1][1] = adjust_address (part[1][1], DImode, 0);
14254 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14258 gcc_unreachable ();
14261 if (GET_MODE (part[1][0]) == SImode)
14262 part[1][0] = part[1][1];
14265 emit_move_insn (part[0][1], part[1][1]);
14266 emit_move_insn (part[0][0], part[1][0]);
14270 /* Choose correct order to not overwrite the source before it is copied. */
14271 if ((REG_P (part[0][0])
14272 && REG_P (part[1][1])
14273 && (REGNO (part[0][0]) == REGNO (part[1][1])
14275 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14277 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14281 operands[2] = part[0][2];
14282 operands[3] = part[0][1];
14283 operands[4] = part[0][0];
14284 operands[5] = part[1][2];
14285 operands[6] = part[1][1];
14286 operands[7] = part[1][0];
14290 operands[2] = part[0][1];
14291 operands[3] = part[0][0];
14292 operands[5] = part[1][1];
14293 operands[6] = part[1][0];
14300 operands[2] = part[0][0];
14301 operands[3] = part[0][1];
14302 operands[4] = part[0][2];
14303 operands[5] = part[1][0];
14304 operands[6] = part[1][1];
14305 operands[7] = part[1][2];
14309 operands[2] = part[0][0];
14310 operands[3] = part[0][1];
14311 operands[5] = part[1][0];
14312 operands[6] = part[1][1];
14316 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14319 if (CONST_INT_P (operands[5])
14320 && operands[5] != const0_rtx
14321 && REG_P (operands[2]))
14323 if (CONST_INT_P (operands[6])
14324 && INTVAL (operands[6]) == INTVAL (operands[5]))
14325 operands[6] = operands[2];
14328 && CONST_INT_P (operands[7])
14329 && INTVAL (operands[7]) == INTVAL (operands[5]))
14330 operands[7] = operands[2];
14334 && CONST_INT_P (operands[6])
14335 && operands[6] != const0_rtx
14336 && REG_P (operands[3])
14337 && CONST_INT_P (operands[7])
14338 && INTVAL (operands[7]) == INTVAL (operands[6]))
14339 operands[7] = operands[3];
14342 emit_move_insn (operands[2], operands[5]);
14343 emit_move_insn (operands[3], operands[6]);
14345 emit_move_insn (operands[4], operands[7]);
14350 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14351 left shift by a constant, either using a single shift or
14352 a sequence of add instructions. */
14355 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14359 emit_insn ((mode == DImode
14361 : gen_adddi3) (operand, operand, operand));
14363 else if (!optimize_size
14364 && count * ix86_cost->add <= ix86_cost->shift_const)
14367 for (i=0; i<count; i++)
14369 emit_insn ((mode == DImode
14371 : gen_adddi3) (operand, operand, operand));
14375 emit_insn ((mode == DImode
14377 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14381 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14383 rtx low[2], high[2];
14385 const int single_width = mode == DImode ? 32 : 64;
14387 if (CONST_INT_P (operands[2]))
14389 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14390 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14392 if (count >= single_width)
14394 emit_move_insn (high[0], low[1]);
14395 emit_move_insn (low[0], const0_rtx);
14397 if (count > single_width)
14398 ix86_expand_ashl_const (high[0], count - single_width, mode);
14402 if (!rtx_equal_p (operands[0], operands[1]))
14403 emit_move_insn (operands[0], operands[1]);
14404 emit_insn ((mode == DImode
14406 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14407 ix86_expand_ashl_const (low[0], count, mode);
14412 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14414 if (operands[1] == const1_rtx)
14416 /* Assuming we've chosen a QImode capable registers, then 1 << N
14417 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14418 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14420 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14422 ix86_expand_clear (low[0]);
14423 ix86_expand_clear (high[0]);
14424 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14426 d = gen_lowpart (QImode, low[0]);
14427 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14428 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14429 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14431 d = gen_lowpart (QImode, high[0]);
14432 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14433 s = gen_rtx_NE (QImode, flags, const0_rtx);
14434 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14437 /* Otherwise, we can get the same results by manually performing
14438 a bit extract operation on bit 5/6, and then performing the two
14439 shifts. The two methods of getting 0/1 into low/high are exactly
14440 the same size. Avoiding the shift in the bit extract case helps
14441 pentium4 a bit; no one else seems to care much either way. */
14446 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14447 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14449 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14450 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14452 emit_insn ((mode == DImode
14454 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14455 emit_insn ((mode == DImode
14457 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14458 emit_move_insn (low[0], high[0]);
14459 emit_insn ((mode == DImode
14461 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14464 emit_insn ((mode == DImode
14466 : gen_ashldi3) (low[0], low[0], operands[2]));
14467 emit_insn ((mode == DImode
14469 : gen_ashldi3) (high[0], high[0], operands[2]));
14473 if (operands[1] == constm1_rtx)
14475 /* For -1 << N, we can avoid the shld instruction, because we
14476 know that we're shifting 0...31/63 ones into a -1. */
14477 emit_move_insn (low[0], constm1_rtx);
14479 emit_move_insn (high[0], low[0]);
14481 emit_move_insn (high[0], constm1_rtx);
14485 if (!rtx_equal_p (operands[0], operands[1]))
14486 emit_move_insn (operands[0], operands[1]);
14488 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14489 emit_insn ((mode == DImode
14491 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14494 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14496 if (TARGET_CMOVE && scratch)
14498 ix86_expand_clear (scratch);
14499 emit_insn ((mode == DImode
14500 ? gen_x86_shift_adj_1
14501 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14504 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14508 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14510 rtx low[2], high[2];
14512 const int single_width = mode == DImode ? 32 : 64;
14514 if (CONST_INT_P (operands[2]))
14516 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14517 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14519 if (count == single_width * 2 - 1)
14521 emit_move_insn (high[0], high[1]);
14522 emit_insn ((mode == DImode
14524 : gen_ashrdi3) (high[0], high[0],
14525 GEN_INT (single_width - 1)));
14526 emit_move_insn (low[0], high[0]);
14529 else if (count >= single_width)
14531 emit_move_insn (low[0], high[1]);
14532 emit_move_insn (high[0], low[0]);
14533 emit_insn ((mode == DImode
14535 : gen_ashrdi3) (high[0], high[0],
14536 GEN_INT (single_width - 1)));
14537 if (count > single_width)
14538 emit_insn ((mode == DImode
14540 : gen_ashrdi3) (low[0], low[0],
14541 GEN_INT (count - single_width)));
14545 if (!rtx_equal_p (operands[0], operands[1]))
14546 emit_move_insn (operands[0], operands[1]);
14547 emit_insn ((mode == DImode
14549 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14550 emit_insn ((mode == DImode
14552 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14557 if (!rtx_equal_p (operands[0], operands[1]))
14558 emit_move_insn (operands[0], operands[1]);
14560 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14562 emit_insn ((mode == DImode
14564 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14565 emit_insn ((mode == DImode
14567 : gen_ashrdi3) (high[0], high[0], operands[2]));
14569 if (TARGET_CMOVE && scratch)
14571 emit_move_insn (scratch, high[0]);
14572 emit_insn ((mode == DImode
14574 : gen_ashrdi3) (scratch, scratch,
14575 GEN_INT (single_width - 1)));
14576 emit_insn ((mode == DImode
14577 ? gen_x86_shift_adj_1
14578 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14582 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14587 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14589 rtx low[2], high[2];
14591 const int single_width = mode == DImode ? 32 : 64;
14593 if (CONST_INT_P (operands[2]))
14595 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14596 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14598 if (count >= single_width)
14600 emit_move_insn (low[0], high[1]);
14601 ix86_expand_clear (high[0]);
14603 if (count > single_width)
14604 emit_insn ((mode == DImode
14606 : gen_lshrdi3) (low[0], low[0],
14607 GEN_INT (count - single_width)));
14611 if (!rtx_equal_p (operands[0], operands[1]))
14612 emit_move_insn (operands[0], operands[1]);
14613 emit_insn ((mode == DImode
14615 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14616 emit_insn ((mode == DImode
14618 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14623 if (!rtx_equal_p (operands[0], operands[1]))
14624 emit_move_insn (operands[0], operands[1]);
14626 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14628 emit_insn ((mode == DImode
14630 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14631 emit_insn ((mode == DImode
14633 : gen_lshrdi3) (high[0], high[0], operands[2]));
14635 /* Heh. By reversing the arguments, we can reuse this pattern. */
14636 if (TARGET_CMOVE && scratch)
14638 ix86_expand_clear (scratch);
14639 emit_insn ((mode == DImode
14640 ? gen_x86_shift_adj_1
14641 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14645 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14649 /* Predict just emitted jump instruction to be taken with probability PROB. */
14651 predict_jump (int prob)
14653 rtx insn = get_last_insn ();
14654 gcc_assert (JUMP_P (insn));
14656 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14661 /* Helper function for the string operations below. Dest VARIABLE whether
14662 it is aligned to VALUE bytes. If true, jump to the label. */
14664 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14666 rtx label = gen_label_rtx ();
14667 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14668 if (GET_MODE (variable) == DImode)
14669 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14671 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14672 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14675 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14677 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14681 /* Adjust COUNTER by the VALUE. */
14683 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14685 if (GET_MODE (countreg) == DImode)
14686 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14688 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14691 /* Zero extend possibly SImode EXP to Pmode register. */
14693 ix86_zero_extend_to_Pmode (rtx exp)
14696 if (GET_MODE (exp) == VOIDmode)
14697 return force_reg (Pmode, exp);
14698 if (GET_MODE (exp) == Pmode)
14699 return copy_to_mode_reg (Pmode, exp);
14700 r = gen_reg_rtx (Pmode);
14701 emit_insn (gen_zero_extendsidi2 (r, exp));
14705 /* Divide COUNTREG by SCALE. */
14707 scale_counter (rtx countreg, int scale)
14710 rtx piece_size_mask;
14714 if (CONST_INT_P (countreg))
14715 return GEN_INT (INTVAL (countreg) / scale);
14716 gcc_assert (REG_P (countreg));
14718 piece_size_mask = GEN_INT (scale - 1);
14719 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14720 GEN_INT (exact_log2 (scale)),
14721 NULL, 1, OPTAB_DIRECT);
14725 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14726 DImode for constant loop counts. */
14728 static enum machine_mode
14729 counter_mode (rtx count_exp)
14731 if (GET_MODE (count_exp) != VOIDmode)
14732 return GET_MODE (count_exp);
14733 if (GET_CODE (count_exp) != CONST_INT)
14735 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14740 /* When SRCPTR is non-NULL, output simple loop to move memory
14741 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14742 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14743 equivalent loop to set memory by VALUE (supposed to be in MODE).
14745 The size is rounded down to whole number of chunk size moved at once.
14746 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14750 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14751 rtx destptr, rtx srcptr, rtx value,
14752 rtx count, enum machine_mode mode, int unroll,
14755 rtx out_label, top_label, iter, tmp;
14756 enum machine_mode iter_mode = counter_mode (count);
14757 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14758 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14764 top_label = gen_label_rtx ();
14765 out_label = gen_label_rtx ();
14766 iter = gen_reg_rtx (iter_mode);
14768 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14769 NULL, 1, OPTAB_DIRECT);
14770 /* Those two should combine. */
14771 if (piece_size == const1_rtx)
14773 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14775 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14777 emit_move_insn (iter, const0_rtx);
14779 emit_label (top_label);
14781 tmp = convert_modes (Pmode, iter_mode, iter, true);
14782 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14783 destmem = change_address (destmem, mode, x_addr);
14787 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14788 srcmem = change_address (srcmem, mode, y_addr);
14790 /* When unrolling for chips that reorder memory reads and writes,
14791 we can save registers by using single temporary.
14792 Also using 4 temporaries is overkill in 32bit mode. */
14793 if (!TARGET_64BIT && 0)
14795 for (i = 0; i < unroll; i++)
14800 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14802 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14804 emit_move_insn (destmem, srcmem);
14810 gcc_assert (unroll <= 4);
14811 for (i = 0; i < unroll; i++)
14813 tmpreg[i] = gen_reg_rtx (mode);
14817 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14819 emit_move_insn (tmpreg[i], srcmem);
14821 for (i = 0; i < unroll; i++)
14826 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14828 emit_move_insn (destmem, tmpreg[i]);
14833 for (i = 0; i < unroll; i++)
14837 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14838 emit_move_insn (destmem, value);
14841 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14842 true, OPTAB_LIB_WIDEN);
14844 emit_move_insn (iter, tmp);
14846 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14848 if (expected_size != -1)
14850 expected_size /= GET_MODE_SIZE (mode) * unroll;
14851 if (expected_size == 0)
14853 else if (expected_size > REG_BR_PROB_BASE)
14854 predict_jump (REG_BR_PROB_BASE - 1);
14856 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14859 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14860 iter = ix86_zero_extend_to_Pmode (iter);
14861 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14862 true, OPTAB_LIB_WIDEN);
14863 if (tmp != destptr)
14864 emit_move_insn (destptr, tmp);
14867 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14868 true, OPTAB_LIB_WIDEN);
14870 emit_move_insn (srcptr, tmp);
14872 emit_label (out_label);
14875 /* Output "rep; mov" instruction.
14876 Arguments have same meaning as for previous function */
14878 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14879 rtx destptr, rtx srcptr,
14881 enum machine_mode mode)
14887 /* If the size is known, it is shorter to use rep movs. */
14888 if (mode == QImode && CONST_INT_P (count)
14889 && !(INTVAL (count) & 3))
14892 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14893 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14894 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14895 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14896 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14897 if (mode != QImode)
14899 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14900 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14901 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14902 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14903 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14904 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14908 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14909 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14911 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14915 /* Output "rep; stos" instruction.
14916 Arguments have same meaning as for previous function */
14918 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14920 enum machine_mode mode)
14925 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14926 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14927 value = force_reg (mode, gen_lowpart (mode, value));
14928 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14929 if (mode != QImode)
14931 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14932 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14933 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14936 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14937 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14941 emit_strmov (rtx destmem, rtx srcmem,
14942 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14944 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14945 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14946 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14949 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14951 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14952 rtx destptr, rtx srcptr, rtx count, int max_size)
14955 if (CONST_INT_P (count))
14957 HOST_WIDE_INT countval = INTVAL (count);
14960 if ((countval & 0x10) && max_size > 16)
14964 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14965 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14968 gcc_unreachable ();
14971 if ((countval & 0x08) && max_size > 8)
14974 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14977 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14978 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14982 if ((countval & 0x04) && max_size > 4)
14984 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14987 if ((countval & 0x02) && max_size > 2)
14989 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14992 if ((countval & 0x01) && max_size > 1)
14994 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15001 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15002 count, 1, OPTAB_DIRECT);
15003 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15004 count, QImode, 1, 4);
15008 /* When there are stringops, we can cheaply increase dest and src pointers.
15009 Otherwise we save code size by maintaining offset (zero is readily
15010 available from preceding rep operation) and using x86 addressing modes.
15012 if (TARGET_SINGLE_STRINGOP)
15016 rtx label = ix86_expand_aligntest (count, 4, true);
15017 src = change_address (srcmem, SImode, srcptr);
15018 dest = change_address (destmem, SImode, destptr);
15019 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15020 emit_label (label);
15021 LABEL_NUSES (label) = 1;
15025 rtx label = ix86_expand_aligntest (count, 2, true);
15026 src = change_address (srcmem, HImode, srcptr);
15027 dest = change_address (destmem, HImode, destptr);
15028 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15029 emit_label (label);
15030 LABEL_NUSES (label) = 1;
15034 rtx label = ix86_expand_aligntest (count, 1, true);
15035 src = change_address (srcmem, QImode, srcptr);
15036 dest = change_address (destmem, QImode, destptr);
15037 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15038 emit_label (label);
15039 LABEL_NUSES (label) = 1;
15044 rtx offset = force_reg (Pmode, const0_rtx);
15049 rtx label = ix86_expand_aligntest (count, 4, true);
15050 src = change_address (srcmem, SImode, srcptr);
15051 dest = change_address (destmem, SImode, destptr);
15052 emit_move_insn (dest, src);
15053 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15054 true, OPTAB_LIB_WIDEN);
15056 emit_move_insn (offset, tmp);
15057 emit_label (label);
15058 LABEL_NUSES (label) = 1;
15062 rtx label = ix86_expand_aligntest (count, 2, true);
15063 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15064 src = change_address (srcmem, HImode, tmp);
15065 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15066 dest = change_address (destmem, HImode, tmp);
15067 emit_move_insn (dest, src);
15068 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15069 true, OPTAB_LIB_WIDEN);
15071 emit_move_insn (offset, tmp);
15072 emit_label (label);
15073 LABEL_NUSES (label) = 1;
15077 rtx label = ix86_expand_aligntest (count, 1, true);
15078 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15079 src = change_address (srcmem, QImode, tmp);
15080 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15081 dest = change_address (destmem, QImode, tmp);
15082 emit_move_insn (dest, src);
15083 emit_label (label);
15084 LABEL_NUSES (label) = 1;
15089 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15091 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15092 rtx count, int max_size)
15095 expand_simple_binop (counter_mode (count), AND, count,
15096 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15097 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15098 gen_lowpart (QImode, value), count, QImode,
15102 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15104 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15108 if (CONST_INT_P (count))
15110 HOST_WIDE_INT countval = INTVAL (count);
15113 if ((countval & 0x10) && max_size > 16)
15117 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15118 emit_insn (gen_strset (destptr, dest, value));
15119 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15120 emit_insn (gen_strset (destptr, dest, value));
15123 gcc_unreachable ();
15126 if ((countval & 0x08) && max_size > 8)
15130 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15131 emit_insn (gen_strset (destptr, dest, value));
15135 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15136 emit_insn (gen_strset (destptr, dest, value));
15137 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15138 emit_insn (gen_strset (destptr, dest, value));
15142 if ((countval & 0x04) && max_size > 4)
15144 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15145 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15148 if ((countval & 0x02) && max_size > 2)
15150 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15151 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15154 if ((countval & 0x01) && max_size > 1)
15156 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15157 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15164 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15169 rtx label = ix86_expand_aligntest (count, 16, true);
15172 dest = change_address (destmem, DImode, destptr);
15173 emit_insn (gen_strset (destptr, dest, value));
15174 emit_insn (gen_strset (destptr, dest, value));
15178 dest = change_address (destmem, SImode, destptr);
15179 emit_insn (gen_strset (destptr, dest, value));
15180 emit_insn (gen_strset (destptr, dest, value));
15181 emit_insn (gen_strset (destptr, dest, value));
15182 emit_insn (gen_strset (destptr, dest, value));
15184 emit_label (label);
15185 LABEL_NUSES (label) = 1;
15189 rtx label = ix86_expand_aligntest (count, 8, true);
15192 dest = change_address (destmem, DImode, destptr);
15193 emit_insn (gen_strset (destptr, dest, value));
15197 dest = change_address (destmem, SImode, destptr);
15198 emit_insn (gen_strset (destptr, dest, value));
15199 emit_insn (gen_strset (destptr, dest, value));
15201 emit_label (label);
15202 LABEL_NUSES (label) = 1;
15206 rtx label = ix86_expand_aligntest (count, 4, true);
15207 dest = change_address (destmem, SImode, destptr);
15208 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15209 emit_label (label);
15210 LABEL_NUSES (label) = 1;
15214 rtx label = ix86_expand_aligntest (count, 2, true);
15215 dest = change_address (destmem, HImode, destptr);
15216 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15217 emit_label (label);
15218 LABEL_NUSES (label) = 1;
15222 rtx label = ix86_expand_aligntest (count, 1, true);
15223 dest = change_address (destmem, QImode, destptr);
15224 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15225 emit_label (label);
15226 LABEL_NUSES (label) = 1;
15230 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15231 DESIRED_ALIGNMENT. */
15233 expand_movmem_prologue (rtx destmem, rtx srcmem,
15234 rtx destptr, rtx srcptr, rtx count,
15235 int align, int desired_alignment)
15237 if (align <= 1 && desired_alignment > 1)
15239 rtx label = ix86_expand_aligntest (destptr, 1, false);
15240 srcmem = change_address (srcmem, QImode, srcptr);
15241 destmem = change_address (destmem, QImode, destptr);
15242 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15243 ix86_adjust_counter (count, 1);
15244 emit_label (label);
15245 LABEL_NUSES (label) = 1;
15247 if (align <= 2 && desired_alignment > 2)
15249 rtx label = ix86_expand_aligntest (destptr, 2, false);
15250 srcmem = change_address (srcmem, HImode, srcptr);
15251 destmem = change_address (destmem, HImode, destptr);
15252 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15253 ix86_adjust_counter (count, 2);
15254 emit_label (label);
15255 LABEL_NUSES (label) = 1;
15257 if (align <= 4 && desired_alignment > 4)
15259 rtx label = ix86_expand_aligntest (destptr, 4, false);
15260 srcmem = change_address (srcmem, SImode, srcptr);
15261 destmem = change_address (destmem, SImode, destptr);
15262 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15263 ix86_adjust_counter (count, 4);
15264 emit_label (label);
15265 LABEL_NUSES (label) = 1;
15267 gcc_assert (desired_alignment <= 8);
15270 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15271 DESIRED_ALIGNMENT. */
15273 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15274 int align, int desired_alignment)
15276 if (align <= 1 && desired_alignment > 1)
15278 rtx label = ix86_expand_aligntest (destptr, 1, false);
15279 destmem = change_address (destmem, QImode, destptr);
15280 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15281 ix86_adjust_counter (count, 1);
15282 emit_label (label);
15283 LABEL_NUSES (label) = 1;
15285 if (align <= 2 && desired_alignment > 2)
15287 rtx label = ix86_expand_aligntest (destptr, 2, false);
15288 destmem = change_address (destmem, HImode, destptr);
15289 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15290 ix86_adjust_counter (count, 2);
15291 emit_label (label);
15292 LABEL_NUSES (label) = 1;
15294 if (align <= 4 && desired_alignment > 4)
15296 rtx label = ix86_expand_aligntest (destptr, 4, false);
15297 destmem = change_address (destmem, SImode, destptr);
15298 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15299 ix86_adjust_counter (count, 4);
15300 emit_label (label);
15301 LABEL_NUSES (label) = 1;
15303 gcc_assert (desired_alignment <= 8);
15306 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15307 static enum stringop_alg
15308 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15309 int *dynamic_check)
15311 const struct stringop_algs * algs;
15312 /* Algorithms using the rep prefix want at least edi and ecx;
15313 additionally, memset wants eax and memcpy wants esi. Don't
15314 consider such algorithms if the user has appropriated those
15315 registers for their own purposes. */
15316 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15318 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15320 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15321 || (alg != rep_prefix_1_byte \
15322 && alg != rep_prefix_4_byte \
15323 && alg != rep_prefix_8_byte))
15325 *dynamic_check = -1;
15327 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15329 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15330 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15331 return stringop_alg;
15332 /* rep; movq or rep; movl is the smallest variant. */
15333 else if (optimize_size)
15335 if (!count || (count & 3))
15336 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15338 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15340 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15342 else if (expected_size != -1 && expected_size < 4)
15343 return loop_1_byte;
15344 else if (expected_size != -1)
15347 enum stringop_alg alg = libcall;
15348 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15350 /* We get here if the algorithms that were not libcall-based
15351 were rep-prefix based and we are unable to use rep prefixes
15352 based on global register usage. Break out of the loop and
15353 use the heuristic below. */
15354 if (algs->size[i].max == 0)
15356 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15358 enum stringop_alg candidate = algs->size[i].alg;
15360 if (candidate != libcall && ALG_USABLE_P (candidate))
15362 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15363 last non-libcall inline algorithm. */
15364 if (TARGET_INLINE_ALL_STRINGOPS)
15366 /* When the current size is best to be copied by a libcall,
15367 but we are still forced to inline, run the heuristic below
15368 that will pick code for medium sized blocks. */
15369 if (alg != libcall)
15373 else if (ALG_USABLE_P (candidate))
15377 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15379 /* When asked to inline the call anyway, try to pick meaningful choice.
15380 We look for maximal size of block that is faster to copy by hand and
15381 take blocks of at most of that size guessing that average size will
15382 be roughly half of the block.
15384 If this turns out to be bad, we might simply specify the preferred
15385 choice in ix86_costs. */
15386 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15387 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15390 enum stringop_alg alg;
15392 bool any_alg_usable_p = true;
15394 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15396 enum stringop_alg candidate = algs->size[i].alg;
15397 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15399 if (candidate != libcall && candidate
15400 && ALG_USABLE_P (candidate))
15401 max = algs->size[i].max;
15403 /* If there aren't any usable algorithms, then recursing on
15404 smaller sizes isn't going to find anything. Just return the
15405 simple byte-at-a-time copy loop. */
15406 if (!any_alg_usable_p)
15408 /* Pick something reasonable. */
15409 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15410 *dynamic_check = 128;
15411 return loop_1_byte;
15415 alg = decide_alg (count, max / 2, memset, dynamic_check);
15416 gcc_assert (*dynamic_check == -1);
15417 gcc_assert (alg != libcall);
15418 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15419 *dynamic_check = max;
15422 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15423 #undef ALG_USABLE_P
15426 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15427 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15429 decide_alignment (int align,
15430 enum stringop_alg alg,
15433 int desired_align = 0;
15437 gcc_unreachable ();
15439 case unrolled_loop:
15440 desired_align = GET_MODE_SIZE (Pmode);
15442 case rep_prefix_8_byte:
15445 case rep_prefix_4_byte:
15446 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15447 copying whole cacheline at once. */
15448 if (TARGET_PENTIUMPRO)
15453 case rep_prefix_1_byte:
15454 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15455 copying whole cacheline at once. */
15456 if (TARGET_PENTIUMPRO)
15470 if (desired_align < align)
15471 desired_align = align;
15472 if (expected_size != -1 && expected_size < 4)
15473 desired_align = align;
15474 return desired_align;
15477 /* Return the smallest power of 2 greater than VAL. */
15479 smallest_pow2_greater_than (int val)
15487 /* Expand string move (memcpy) operation. Use i386 string operations when
15488 profitable. expand_setmem contains similar code. The code depends upon
15489 architecture, block size and alignment, but always has the same
15492 1) Prologue guard: Conditional that jumps up to epilogues for small
15493 blocks that can be handled by epilogue alone. This is faster but
15494 also needed for correctness, since prologue assume the block is larger
15495 than the desired alignment.
15497 Optional dynamic check for size and libcall for large
15498 blocks is emitted here too, with -minline-stringops-dynamically.
15500 2) Prologue: copy first few bytes in order to get destination aligned
15501 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15502 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15503 We emit either a jump tree on power of two sized blocks, or a byte loop.
15505 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15506 with specified algorithm.
15508 4) Epilogue: code copying tail of the block that is too small to be
15509 handled by main body (or up to size guarded by prologue guard). */
15512 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15513 rtx expected_align_exp, rtx expected_size_exp)
15519 rtx jump_around_label = NULL;
15520 HOST_WIDE_INT align = 1;
15521 unsigned HOST_WIDE_INT count = 0;
15522 HOST_WIDE_INT expected_size = -1;
15523 int size_needed = 0, epilogue_size_needed;
15524 int desired_align = 0;
15525 enum stringop_alg alg;
15528 if (CONST_INT_P (align_exp))
15529 align = INTVAL (align_exp);
15530 /* i386 can do misaligned access on reasonably increased cost. */
15531 if (CONST_INT_P (expected_align_exp)
15532 && INTVAL (expected_align_exp) > align)
15533 align = INTVAL (expected_align_exp);
15534 if (CONST_INT_P (count_exp))
15535 count = expected_size = INTVAL (count_exp);
15536 if (CONST_INT_P (expected_size_exp) && count == 0)
15537 expected_size = INTVAL (expected_size_exp);
15539 /* Make sure we don't need to care about overflow later on. */
15540 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15543 /* Step 0: Decide on preferred algorithm, desired alignment and
15544 size of chunks to be copied by main loop. */
15546 alg = decide_alg (count, expected_size, false, &dynamic_check);
15547 desired_align = decide_alignment (align, alg, expected_size);
15549 if (!TARGET_ALIGN_STRINGOPS)
15550 align = desired_align;
15552 if (alg == libcall)
15554 gcc_assert (alg != no_stringop);
15556 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15557 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15558 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15563 gcc_unreachable ();
15565 size_needed = GET_MODE_SIZE (Pmode);
15567 case unrolled_loop:
15568 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15570 case rep_prefix_8_byte:
15573 case rep_prefix_4_byte:
15576 case rep_prefix_1_byte:
15582 epilogue_size_needed = size_needed;
15584 /* Step 1: Prologue guard. */
15586 /* Alignment code needs count to be in register. */
15587 if (CONST_INT_P (count_exp) && desired_align > align)
15588 count_exp = force_reg (counter_mode (count_exp), count_exp);
15589 gcc_assert (desired_align >= 1 && align >= 1);
15591 /* Ensure that alignment prologue won't copy past end of block. */
15592 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15594 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15595 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15596 Make sure it is power of 2. */
15597 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15599 if (CONST_INT_P (count_exp))
15601 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15606 label = gen_label_rtx ();
15607 emit_cmp_and_jump_insns (count_exp,
15608 GEN_INT (epilogue_size_needed),
15609 LTU, 0, counter_mode (count_exp), 1, label);
15610 if (expected_size == -1 || expected_size < epilogue_size_needed)
15611 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15613 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15617 /* Emit code to decide on runtime whether library call or inline should be
15619 if (dynamic_check != -1)
15621 if (CONST_INT_P (count_exp))
15623 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15625 emit_block_move_via_libcall (dst, src, count_exp, false);
15626 count_exp = const0_rtx;
15632 rtx hot_label = gen_label_rtx ();
15633 jump_around_label = gen_label_rtx ();
15634 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15635 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15636 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15637 emit_block_move_via_libcall (dst, src, count_exp, false);
15638 emit_jump (jump_around_label);
15639 emit_label (hot_label);
15643 /* Step 2: Alignment prologue. */
15645 if (desired_align > align)
15647 /* Except for the first move in epilogue, we no longer know
15648 constant offset in aliasing info. It don't seems to worth
15649 the pain to maintain it for the first move, so throw away
15651 src = change_address (src, BLKmode, srcreg);
15652 dst = change_address (dst, BLKmode, destreg);
15653 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15656 if (label && size_needed == 1)
15658 emit_label (label);
15659 LABEL_NUSES (label) = 1;
15663 /* Step 3: Main loop. */
15669 gcc_unreachable ();
15671 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15672 count_exp, QImode, 1, expected_size);
15675 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15676 count_exp, Pmode, 1, expected_size);
15678 case unrolled_loop:
15679 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15680 registers for 4 temporaries anyway. */
15681 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15682 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15685 case rep_prefix_8_byte:
15686 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15689 case rep_prefix_4_byte:
15690 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15693 case rep_prefix_1_byte:
15694 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15698 /* Adjust properly the offset of src and dest memory for aliasing. */
15699 if (CONST_INT_P (count_exp))
15701 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15702 (count / size_needed) * size_needed);
15703 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15704 (count / size_needed) * size_needed);
15708 src = change_address (src, BLKmode, srcreg);
15709 dst = change_address (dst, BLKmode, destreg);
15712 /* Step 4: Epilogue to copy the remaining bytes. */
15716 /* When the main loop is done, COUNT_EXP might hold original count,
15717 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15718 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15719 bytes. Compensate if needed. */
15721 if (size_needed < epilogue_size_needed)
15724 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15725 GEN_INT (size_needed - 1), count_exp, 1,
15727 if (tmp != count_exp)
15728 emit_move_insn (count_exp, tmp);
15730 emit_label (label);
15731 LABEL_NUSES (label) = 1;
15734 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15735 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15736 epilogue_size_needed);
15737 if (jump_around_label)
15738 emit_label (jump_around_label);
15742 /* Helper function for memcpy. For QImode value 0xXY produce
15743 0xXYXYXYXY of wide specified by MODE. This is essentially
15744 a * 0x10101010, but we can do slightly better than
15745 synth_mult by unwinding the sequence by hand on CPUs with
15748 promote_duplicated_reg (enum machine_mode mode, rtx val)
15750 enum machine_mode valmode = GET_MODE (val);
15752 int nops = mode == DImode ? 3 : 2;
15754 gcc_assert (mode == SImode || mode == DImode);
15755 if (val == const0_rtx)
15756 return copy_to_mode_reg (mode, const0_rtx);
15757 if (CONST_INT_P (val))
15759 HOST_WIDE_INT v = INTVAL (val) & 255;
15763 if (mode == DImode)
15764 v |= (v << 16) << 16;
15765 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15768 if (valmode == VOIDmode)
15770 if (valmode != QImode)
15771 val = gen_lowpart (QImode, val);
15772 if (mode == QImode)
15774 if (!TARGET_PARTIAL_REG_STALL)
15776 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15777 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15778 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15779 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15781 rtx reg = convert_modes (mode, QImode, val, true);
15782 tmp = promote_duplicated_reg (mode, const1_rtx);
15783 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15788 rtx reg = convert_modes (mode, QImode, val, true);
15790 if (!TARGET_PARTIAL_REG_STALL)
15791 if (mode == SImode)
15792 emit_insn (gen_movsi_insv_1 (reg, reg));
15794 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15797 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15798 NULL, 1, OPTAB_DIRECT);
15800 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15802 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15803 NULL, 1, OPTAB_DIRECT);
15804 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15805 if (mode == SImode)
15807 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15808 NULL, 1, OPTAB_DIRECT);
15809 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15814 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15815 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15816 alignment from ALIGN to DESIRED_ALIGN. */
15818 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15823 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15824 promoted_val = promote_duplicated_reg (DImode, val);
15825 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15826 promoted_val = promote_duplicated_reg (SImode, val);
15827 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15828 promoted_val = promote_duplicated_reg (HImode, val);
15830 promoted_val = val;
15832 return promoted_val;
15835 /* Expand string clear operation (bzero). Use i386 string operations when
15836 profitable. See expand_movmem comment for explanation of individual
15837 steps performed. */
15839 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15840 rtx expected_align_exp, rtx expected_size_exp)
15845 rtx jump_around_label = NULL;
15846 HOST_WIDE_INT align = 1;
15847 unsigned HOST_WIDE_INT count = 0;
15848 HOST_WIDE_INT expected_size = -1;
15849 int size_needed = 0, epilogue_size_needed;
15850 int desired_align = 0;
15851 enum stringop_alg alg;
15852 rtx promoted_val = NULL;
15853 bool force_loopy_epilogue = false;
15856 if (CONST_INT_P (align_exp))
15857 align = INTVAL (align_exp);
15858 /* i386 can do misaligned access on reasonably increased cost. */
15859 if (CONST_INT_P (expected_align_exp)
15860 && INTVAL (expected_align_exp) > align)
15861 align = INTVAL (expected_align_exp);
15862 if (CONST_INT_P (count_exp))
15863 count = expected_size = INTVAL (count_exp);
15864 if (CONST_INT_P (expected_size_exp) && count == 0)
15865 expected_size = INTVAL (expected_size_exp);
15867 /* Make sure we don't need to care about overflow later on. */
15868 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15871 /* Step 0: Decide on preferred algorithm, desired alignment and
15872 size of chunks to be copied by main loop. */
15874 alg = decide_alg (count, expected_size, true, &dynamic_check);
15875 desired_align = decide_alignment (align, alg, expected_size);
15877 if (!TARGET_ALIGN_STRINGOPS)
15878 align = desired_align;
15880 if (alg == libcall)
15882 gcc_assert (alg != no_stringop);
15884 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15885 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15890 gcc_unreachable ();
15892 size_needed = GET_MODE_SIZE (Pmode);
15894 case unrolled_loop:
15895 size_needed = GET_MODE_SIZE (Pmode) * 4;
15897 case rep_prefix_8_byte:
15900 case rep_prefix_4_byte:
15903 case rep_prefix_1_byte:
15908 epilogue_size_needed = size_needed;
15910 /* Step 1: Prologue guard. */
15912 /* Alignment code needs count to be in register. */
15913 if (CONST_INT_P (count_exp) && desired_align > align)
15915 enum machine_mode mode = SImode;
15916 if (TARGET_64BIT && (count & ~0xffffffff))
15918 count_exp = force_reg (mode, count_exp);
15920 /* Do the cheap promotion to allow better CSE across the
15921 main loop and epilogue (ie one load of the big constant in the
15922 front of all code. */
15923 if (CONST_INT_P (val_exp))
15924 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15925 desired_align, align);
15926 /* Ensure that alignment prologue won't copy past end of block. */
15927 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15929 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15930 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15931 Make sure it is power of 2. */
15932 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15934 /* To improve performance of small blocks, we jump around the VAL
15935 promoting mode. This mean that if the promoted VAL is not constant,
15936 we might not use it in the epilogue and have to use byte
15938 if (epilogue_size_needed > 2 && !promoted_val)
15939 force_loopy_epilogue = true;
15940 label = gen_label_rtx ();
15941 emit_cmp_and_jump_insns (count_exp,
15942 GEN_INT (epilogue_size_needed),
15943 LTU, 0, counter_mode (count_exp), 1, label);
15944 if (GET_CODE (count_exp) == CONST_INT)
15946 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15947 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15949 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15951 if (dynamic_check != -1)
15953 rtx hot_label = gen_label_rtx ();
15954 jump_around_label = gen_label_rtx ();
15955 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15956 LEU, 0, counter_mode (count_exp), 1, hot_label);
15957 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15958 set_storage_via_libcall (dst, count_exp, val_exp, false);
15959 emit_jump (jump_around_label);
15960 emit_label (hot_label);
15963 /* Step 2: Alignment prologue. */
15965 /* Do the expensive promotion once we branched off the small blocks. */
15967 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15968 desired_align, align);
15969 gcc_assert (desired_align >= 1 && align >= 1);
15971 if (desired_align > align)
15973 /* Except for the first move in epilogue, we no longer know
15974 constant offset in aliasing info. It don't seems to worth
15975 the pain to maintain it for the first move, so throw away
15977 dst = change_address (dst, BLKmode, destreg);
15978 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15981 if (label && size_needed == 1)
15983 emit_label (label);
15984 LABEL_NUSES (label) = 1;
15988 /* Step 3: Main loop. */
15994 gcc_unreachable ();
15996 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15997 count_exp, QImode, 1, expected_size);
16000 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16001 count_exp, Pmode, 1, expected_size);
16003 case unrolled_loop:
16004 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16005 count_exp, Pmode, 4, expected_size);
16007 case rep_prefix_8_byte:
16008 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16011 case rep_prefix_4_byte:
16012 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16015 case rep_prefix_1_byte:
16016 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16020 /* Adjust properly the offset of src and dest memory for aliasing. */
16021 if (CONST_INT_P (count_exp))
16022 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16023 (count / size_needed) * size_needed);
16025 dst = change_address (dst, BLKmode, destreg);
16027 /* Step 4: Epilogue to copy the remaining bytes. */
16031 /* When the main loop is done, COUNT_EXP might hold original count,
16032 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16033 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16034 bytes. Compensate if needed. */
16036 if (size_needed < desired_align - align)
16039 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16040 GEN_INT (size_needed - 1), count_exp, 1,
16042 size_needed = desired_align - align + 1;
16043 if (tmp != count_exp)
16044 emit_move_insn (count_exp, tmp);
16046 emit_label (label);
16047 LABEL_NUSES (label) = 1;
16049 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16051 if (force_loopy_epilogue)
16052 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16055 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16058 if (jump_around_label)
16059 emit_label (jump_around_label);
16063 /* Expand the appropriate insns for doing strlen if not just doing
16066 out = result, initialized with the start address
16067 align_rtx = alignment of the address.
16068 scratch = scratch register, initialized with the startaddress when
16069 not aligned, otherwise undefined
16071 This is just the body. It needs the initializations mentioned above and
16072 some address computing at the end. These things are done in i386.md. */
16075 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16079 rtx align_2_label = NULL_RTX;
16080 rtx align_3_label = NULL_RTX;
16081 rtx align_4_label = gen_label_rtx ();
16082 rtx end_0_label = gen_label_rtx ();
16084 rtx tmpreg = gen_reg_rtx (SImode);
16085 rtx scratch = gen_reg_rtx (SImode);
16089 if (CONST_INT_P (align_rtx))
16090 align = INTVAL (align_rtx);
16092 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16094 /* Is there a known alignment and is it less than 4? */
16097 rtx scratch1 = gen_reg_rtx (Pmode);
16098 emit_move_insn (scratch1, out);
16099 /* Is there a known alignment and is it not 2? */
16102 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16103 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16105 /* Leave just the 3 lower bits. */
16106 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16107 NULL_RTX, 0, OPTAB_WIDEN);
16109 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16110 Pmode, 1, align_4_label);
16111 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16112 Pmode, 1, align_2_label);
16113 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16114 Pmode, 1, align_3_label);
16118 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16119 check if is aligned to 4 - byte. */
16121 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16122 NULL_RTX, 0, OPTAB_WIDEN);
16124 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16125 Pmode, 1, align_4_label);
16128 mem = change_address (src, QImode, out);
16130 /* Now compare the bytes. */
16132 /* Compare the first n unaligned byte on a byte per byte basis. */
16133 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16134 QImode, 1, end_0_label);
16136 /* Increment the address. */
16138 emit_insn (gen_adddi3 (out, out, const1_rtx));
16140 emit_insn (gen_addsi3 (out, out, const1_rtx));
16142 /* Not needed with an alignment of 2 */
16145 emit_label (align_2_label);
16147 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16151 emit_insn (gen_adddi3 (out, out, const1_rtx));
16153 emit_insn (gen_addsi3 (out, out, const1_rtx));
16155 emit_label (align_3_label);
16158 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16162 emit_insn (gen_adddi3 (out, out, const1_rtx));
16164 emit_insn (gen_addsi3 (out, out, const1_rtx));
16167 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16168 align this loop. It gives only huge programs, but does not help to
16170 emit_label (align_4_label);
16172 mem = change_address (src, SImode, out);
16173 emit_move_insn (scratch, mem);
16175 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16177 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16179 /* This formula yields a nonzero result iff one of the bytes is zero.
16180 This saves three branches inside loop and many cycles. */
16182 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16183 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16184 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16185 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16186 gen_int_mode (0x80808080, SImode)));
16187 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16192 rtx reg = gen_reg_rtx (SImode);
16193 rtx reg2 = gen_reg_rtx (Pmode);
16194 emit_move_insn (reg, tmpreg);
16195 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16197 /* If zero is not in the first two bytes, move two bytes forward. */
16198 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16199 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16200 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16201 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16202 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16205 /* Emit lea manually to avoid clobbering of flags. */
16206 emit_insn (gen_rtx_SET (SImode, reg2,
16207 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16209 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16210 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16211 emit_insn (gen_rtx_SET (VOIDmode, out,
16212 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16219 rtx end_2_label = gen_label_rtx ();
16220 /* Is zero in the first two bytes? */
16222 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16223 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16224 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16225 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16226 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16228 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16229 JUMP_LABEL (tmp) = end_2_label;
16231 /* Not in the first two. Move two bytes forward. */
16232 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16234 emit_insn (gen_adddi3 (out, out, const2_rtx));
16236 emit_insn (gen_addsi3 (out, out, const2_rtx));
16238 emit_label (end_2_label);
16242 /* Avoid branch in fixing the byte. */
16243 tmpreg = gen_lowpart (QImode, tmpreg);
16244 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16245 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16247 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16249 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16251 emit_label (end_0_label);
16254 /* Expand strlen. */
16257 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16259 rtx addr, scratch1, scratch2, scratch3, scratch4;
16261 /* The generic case of strlen expander is long. Avoid it's
16262 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16264 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16265 && !TARGET_INLINE_ALL_STRINGOPS
16267 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16270 addr = force_reg (Pmode, XEXP (src, 0));
16271 scratch1 = gen_reg_rtx (Pmode);
16273 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16276 /* Well it seems that some optimizer does not combine a call like
16277 foo(strlen(bar), strlen(bar));
16278 when the move and the subtraction is done here. It does calculate
16279 the length just once when these instructions are done inside of
16280 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16281 often used and I use one fewer register for the lifetime of
16282 output_strlen_unroll() this is better. */
16284 emit_move_insn (out, addr);
16286 ix86_expand_strlensi_unroll_1 (out, src, align);
16288 /* strlensi_unroll_1 returns the address of the zero at the end of
16289 the string, like memchr(), so compute the length by subtracting
16290 the start address. */
16292 emit_insn (gen_subdi3 (out, out, addr));
16294 emit_insn (gen_subsi3 (out, out, addr));
16300 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16301 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16304 scratch2 = gen_reg_rtx (Pmode);
16305 scratch3 = gen_reg_rtx (Pmode);
16306 scratch4 = force_reg (Pmode, constm1_rtx);
16308 emit_move_insn (scratch3, addr);
16309 eoschar = force_reg (QImode, eoschar);
16311 src = replace_equiv_address_nv (src, scratch3);
16313 /* If .md starts supporting :P, this can be done in .md. */
16314 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16315 scratch4), UNSPEC_SCAS);
16316 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16319 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16320 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16324 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16325 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16331 /* For given symbol (function) construct code to compute address of it's PLT
16332 entry in large x86-64 PIC model. */
16334 construct_plt_address (rtx symbol)
16336 rtx tmp = gen_reg_rtx (Pmode);
16337 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16339 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16340 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16342 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16343 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16348 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16349 rtx callarg2 ATTRIBUTE_UNUSED,
16350 rtx pop, int sibcall)
16352 rtx use = NULL, call;
16354 if (pop == const0_rtx)
16356 gcc_assert (!TARGET_64BIT || !pop);
16358 if (TARGET_MACHO && !TARGET_64BIT)
16361 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16362 fnaddr = machopic_indirect_call_target (fnaddr);
16367 /* Static functions and indirect calls don't need the pic register. */
16368 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16369 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16370 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16371 use_reg (&use, pic_offset_table_rtx);
16374 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16376 rtx al = gen_rtx_REG (QImode, AX_REG);
16377 emit_move_insn (al, callarg2);
16378 use_reg (&use, al);
16381 if (ix86_cmodel == CM_LARGE_PIC
16382 && GET_CODE (fnaddr) == MEM
16383 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16384 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16385 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16386 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16388 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16389 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16391 if (sibcall && TARGET_64BIT
16392 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16395 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16396 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16397 emit_move_insn (fnaddr, addr);
16398 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16401 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16403 call = gen_rtx_SET (VOIDmode, retval, call);
16406 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16407 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16408 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16411 call = emit_call_insn (call);
16413 CALL_INSN_FUNCTION_USAGE (call) = use;
16417 /* Clear stack slot assignments remembered from previous functions.
16418 This is called from INIT_EXPANDERS once before RTL is emitted for each
16421 static struct machine_function *
16422 ix86_init_machine_status (void)
16424 struct machine_function *f;
16426 f = GGC_CNEW (struct machine_function);
16427 f->use_fast_prologue_epilogue_nregs = -1;
16428 f->tls_descriptor_call_expanded_p = 0;
16433 /* Return a MEM corresponding to a stack slot with mode MODE.
16434 Allocate a new slot if necessary.
16436 The RTL for a function can have several slots available: N is
16437 which slot to use. */
16440 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16442 struct stack_local_entry *s;
16444 gcc_assert (n < MAX_386_STACK_LOCALS);
16446 /* Virtual slot is valid only before vregs are instantiated. */
16447 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16449 for (s = ix86_stack_locals; s; s = s->next)
16450 if (s->mode == mode && s->n == n)
16451 return copy_rtx (s->rtl);
16453 s = (struct stack_local_entry *)
16454 ggc_alloc (sizeof (struct stack_local_entry));
16457 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16459 s->next = ix86_stack_locals;
16460 ix86_stack_locals = s;
16464 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16466 static GTY(()) rtx ix86_tls_symbol;
16468 ix86_tls_get_addr (void)
16471 if (!ix86_tls_symbol)
16473 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16474 (TARGET_ANY_GNU_TLS
16476 ? "___tls_get_addr"
16477 : "__tls_get_addr");
16480 return ix86_tls_symbol;
16483 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16485 static GTY(()) rtx ix86_tls_module_base_symbol;
16487 ix86_tls_module_base (void)
16490 if (!ix86_tls_module_base_symbol)
16492 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16493 "_TLS_MODULE_BASE_");
16494 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16495 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16498 return ix86_tls_module_base_symbol;
16501 /* Calculate the length of the memory address in the instruction
16502 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16505 memory_address_length (rtx addr)
16507 struct ix86_address parts;
16508 rtx base, index, disp;
16512 if (GET_CODE (addr) == PRE_DEC
16513 || GET_CODE (addr) == POST_INC
16514 || GET_CODE (addr) == PRE_MODIFY
16515 || GET_CODE (addr) == POST_MODIFY)
16518 ok = ix86_decompose_address (addr, &parts);
16521 if (parts.base && GET_CODE (parts.base) == SUBREG)
16522 parts.base = SUBREG_REG (parts.base);
16523 if (parts.index && GET_CODE (parts.index) == SUBREG)
16524 parts.index = SUBREG_REG (parts.index);
16527 index = parts.index;
16532 - esp as the base always wants an index,
16533 - ebp as the base always wants a displacement. */
16535 /* Register Indirect. */
16536 if (base && !index && !disp)
16538 /* esp (for its index) and ebp (for its displacement) need
16539 the two-byte modrm form. */
16540 if (addr == stack_pointer_rtx
16541 || addr == arg_pointer_rtx
16542 || addr == frame_pointer_rtx
16543 || addr == hard_frame_pointer_rtx)
16547 /* Direct Addressing. */
16548 else if (disp && !base && !index)
16553 /* Find the length of the displacement constant. */
16556 if (base && satisfies_constraint_K (disp))
16561 /* ebp always wants a displacement. */
16562 else if (base == hard_frame_pointer_rtx)
16565 /* An index requires the two-byte modrm form.... */
16567 /* ...like esp, which always wants an index. */
16568 || base == stack_pointer_rtx
16569 || base == arg_pointer_rtx
16570 || base == frame_pointer_rtx)
16577 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16578 is set, expect that insn have 8bit immediate alternative. */
16580 ix86_attr_length_immediate_default (rtx insn, int shortform)
16584 extract_insn_cached (insn);
16585 for (i = recog_data.n_operands - 1; i >= 0; --i)
16586 if (CONSTANT_P (recog_data.operand[i]))
16589 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16593 switch (get_attr_mode (insn))
16604 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16609 fatal_insn ("unknown insn mode", insn);
16615 /* Compute default value for "length_address" attribute. */
16617 ix86_attr_length_address_default (rtx insn)
16621 if (get_attr_type (insn) == TYPE_LEA)
16623 rtx set = PATTERN (insn);
16625 if (GET_CODE (set) == PARALLEL)
16626 set = XVECEXP (set, 0, 0);
16628 gcc_assert (GET_CODE (set) == SET);
16630 return memory_address_length (SET_SRC (set));
16633 extract_insn_cached (insn);
16634 for (i = recog_data.n_operands - 1; i >= 0; --i)
16635 if (MEM_P (recog_data.operand[i]))
16637 return memory_address_length (XEXP (recog_data.operand[i], 0));
16643 /* Return the maximum number of instructions a cpu can issue. */
16646 ix86_issue_rate (void)
16650 case PROCESSOR_PENTIUM:
16654 case PROCESSOR_PENTIUMPRO:
16655 case PROCESSOR_PENTIUM4:
16656 case PROCESSOR_ATHLON:
16658 case PROCESSOR_AMDFAM10:
16659 case PROCESSOR_NOCONA:
16660 case PROCESSOR_GENERIC32:
16661 case PROCESSOR_GENERIC64:
16664 case PROCESSOR_CORE2:
16672 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16673 by DEP_INSN and nothing set by DEP_INSN. */
16676 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16680 /* Simplify the test for uninteresting insns. */
16681 if (insn_type != TYPE_SETCC
16682 && insn_type != TYPE_ICMOV
16683 && insn_type != TYPE_FCMOV
16684 && insn_type != TYPE_IBR)
16687 if ((set = single_set (dep_insn)) != 0)
16689 set = SET_DEST (set);
16692 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16693 && XVECLEN (PATTERN (dep_insn), 0) == 2
16694 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16695 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16697 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16698 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16703 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16706 /* This test is true if the dependent insn reads the flags but
16707 not any other potentially set register. */
16708 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16711 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16717 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16718 address with operands set by DEP_INSN. */
16721 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16725 if (insn_type == TYPE_LEA
16728 addr = PATTERN (insn);
16730 if (GET_CODE (addr) == PARALLEL)
16731 addr = XVECEXP (addr, 0, 0);
16733 gcc_assert (GET_CODE (addr) == SET);
16735 addr = SET_SRC (addr);
16740 extract_insn_cached (insn);
16741 for (i = recog_data.n_operands - 1; i >= 0; --i)
16742 if (MEM_P (recog_data.operand[i]))
16744 addr = XEXP (recog_data.operand[i], 0);
16751 return modified_in_p (addr, dep_insn);
16755 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16757 enum attr_type insn_type, dep_insn_type;
16758 enum attr_memory memory;
16760 int dep_insn_code_number;
16762 /* Anti and output dependencies have zero cost on all CPUs. */
16763 if (REG_NOTE_KIND (link) != 0)
16766 dep_insn_code_number = recog_memoized (dep_insn);
16768 /* If we can't recognize the insns, we can't really do anything. */
16769 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16772 insn_type = get_attr_type (insn);
16773 dep_insn_type = get_attr_type (dep_insn);
16777 case PROCESSOR_PENTIUM:
16778 /* Address Generation Interlock adds a cycle of latency. */
16779 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16782 /* ??? Compares pair with jump/setcc. */
16783 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16786 /* Floating point stores require value to be ready one cycle earlier. */
16787 if (insn_type == TYPE_FMOV
16788 && get_attr_memory (insn) == MEMORY_STORE
16789 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16793 case PROCESSOR_PENTIUMPRO:
16794 memory = get_attr_memory (insn);
16796 /* INT->FP conversion is expensive. */
16797 if (get_attr_fp_int_src (dep_insn))
16800 /* There is one cycle extra latency between an FP op and a store. */
16801 if (insn_type == TYPE_FMOV
16802 && (set = single_set (dep_insn)) != NULL_RTX
16803 && (set2 = single_set (insn)) != NULL_RTX
16804 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16805 && MEM_P (SET_DEST (set2)))
16808 /* Show ability of reorder buffer to hide latency of load by executing
16809 in parallel with previous instruction in case
16810 previous instruction is not needed to compute the address. */
16811 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16812 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16814 /* Claim moves to take one cycle, as core can issue one load
16815 at time and the next load can start cycle later. */
16816 if (dep_insn_type == TYPE_IMOV
16817 || dep_insn_type == TYPE_FMOV)
16825 memory = get_attr_memory (insn);
16827 /* The esp dependency is resolved before the instruction is really
16829 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16830 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16833 /* INT->FP conversion is expensive. */
16834 if (get_attr_fp_int_src (dep_insn))
16837 /* Show ability of reorder buffer to hide latency of load by executing
16838 in parallel with previous instruction in case
16839 previous instruction is not needed to compute the address. */
16840 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16841 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16843 /* Claim moves to take one cycle, as core can issue one load
16844 at time and the next load can start cycle later. */
16845 if (dep_insn_type == TYPE_IMOV
16846 || dep_insn_type == TYPE_FMOV)
16855 case PROCESSOR_ATHLON:
16857 case PROCESSOR_AMDFAM10:
16858 case PROCESSOR_GENERIC32:
16859 case PROCESSOR_GENERIC64:
16860 memory = get_attr_memory (insn);
16862 /* Show ability of reorder buffer to hide latency of load by executing
16863 in parallel with previous instruction in case
16864 previous instruction is not needed to compute the address. */
16865 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16866 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16868 enum attr_unit unit = get_attr_unit (insn);
16871 /* Because of the difference between the length of integer and
16872 floating unit pipeline preparation stages, the memory operands
16873 for floating point are cheaper.
16875 ??? For Athlon it the difference is most probably 2. */
16876 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16879 loadcost = TARGET_ATHLON ? 2 : 0;
16881 if (cost >= loadcost)
16894 /* How many alternative schedules to try. This should be as wide as the
16895 scheduling freedom in the DFA, but no wider. Making this value too
16896 large results extra work for the scheduler. */
16899 ia32_multipass_dfa_lookahead (void)
16903 case PROCESSOR_PENTIUM:
16906 case PROCESSOR_PENTIUMPRO:
16916 /* Compute the alignment given to a constant that is being placed in memory.
16917 EXP is the constant and ALIGN is the alignment that the object would
16919 The value of this function is used instead of that alignment to align
16923 ix86_constant_alignment (tree exp, int align)
16925 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16926 || TREE_CODE (exp) == INTEGER_CST)
16928 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16930 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16933 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16934 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16935 return BITS_PER_WORD;
16940 /* Compute the alignment for a static variable.
16941 TYPE is the data type, and ALIGN is the alignment that
16942 the object would ordinarily have. The value of this function is used
16943 instead of that alignment to align the object. */
16946 ix86_data_alignment (tree type, int align)
16948 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16950 if (AGGREGATE_TYPE_P (type)
16951 && TYPE_SIZE (type)
16952 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16953 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16954 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16955 && align < max_align)
16958 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16959 to 16byte boundary. */
16962 if (AGGREGATE_TYPE_P (type)
16963 && TYPE_SIZE (type)
16964 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16965 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16966 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16970 if (TREE_CODE (type) == ARRAY_TYPE)
16972 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16974 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16977 else if (TREE_CODE (type) == COMPLEX_TYPE)
16980 if (TYPE_MODE (type) == DCmode && align < 64)
16982 if (TYPE_MODE (type) == XCmode && align < 128)
16985 else if ((TREE_CODE (type) == RECORD_TYPE
16986 || TREE_CODE (type) == UNION_TYPE
16987 || TREE_CODE (type) == QUAL_UNION_TYPE)
16988 && TYPE_FIELDS (type))
16990 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16992 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16995 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16996 || TREE_CODE (type) == INTEGER_TYPE)
16998 if (TYPE_MODE (type) == DFmode && align < 64)
17000 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17007 /* Compute the alignment for a local variable.
17008 TYPE is the data type, and ALIGN is the alignment that
17009 the object would ordinarily have. The value of this macro is used
17010 instead of that alignment to align the object. */
17013 ix86_local_alignment (tree type, int align)
17015 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17016 to 16byte boundary. */
17019 if (AGGREGATE_TYPE_P (type)
17020 && TYPE_SIZE (type)
17021 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17022 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17023 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17026 if (TREE_CODE (type) == ARRAY_TYPE)
17028 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17030 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17033 else if (TREE_CODE (type) == COMPLEX_TYPE)
17035 if (TYPE_MODE (type) == DCmode && align < 64)
17037 if (TYPE_MODE (type) == XCmode && align < 128)
17040 else if ((TREE_CODE (type) == RECORD_TYPE
17041 || TREE_CODE (type) == UNION_TYPE
17042 || TREE_CODE (type) == QUAL_UNION_TYPE)
17043 && TYPE_FIELDS (type))
17045 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17047 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17050 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17051 || TREE_CODE (type) == INTEGER_TYPE)
17054 if (TYPE_MODE (type) == DFmode && align < 64)
17056 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17062 /* Emit RTL insns to initialize the variable parts of a trampoline.
17063 FNADDR is an RTX for the address of the function's pure code.
17064 CXT is an RTX for the static chain value for the function. */
17066 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17070 /* Compute offset from the end of the jmp to the target function. */
17071 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17072 plus_constant (tramp, 10),
17073 NULL_RTX, 1, OPTAB_DIRECT);
17074 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17075 gen_int_mode (0xb9, QImode));
17076 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17077 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17078 gen_int_mode (0xe9, QImode));
17079 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17084 /* Try to load address using shorter movl instead of movabs.
17085 We may want to support movq for kernel mode, but kernel does not use
17086 trampolines at the moment. */
17087 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17089 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17090 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17091 gen_int_mode (0xbb41, HImode));
17092 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17093 gen_lowpart (SImode, fnaddr));
17098 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17099 gen_int_mode (0xbb49, HImode));
17100 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17104 /* Load static chain using movabs to r10. */
17105 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17106 gen_int_mode (0xba49, HImode));
17107 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17110 /* Jump to the r11 */
17111 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17112 gen_int_mode (0xff49, HImode));
17113 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17114 gen_int_mode (0xe3, QImode));
17116 gcc_assert (offset <= TRAMPOLINE_SIZE);
17119 #ifdef ENABLE_EXECUTE_STACK
17120 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17121 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17125 /* Codes for all the SSE/MMX builtins. */
17128 IX86_BUILTIN_ADDPS,
17129 IX86_BUILTIN_ADDSS,
17130 IX86_BUILTIN_DIVPS,
17131 IX86_BUILTIN_DIVSS,
17132 IX86_BUILTIN_MULPS,
17133 IX86_BUILTIN_MULSS,
17134 IX86_BUILTIN_SUBPS,
17135 IX86_BUILTIN_SUBSS,
17137 IX86_BUILTIN_CMPEQPS,
17138 IX86_BUILTIN_CMPLTPS,
17139 IX86_BUILTIN_CMPLEPS,
17140 IX86_BUILTIN_CMPGTPS,
17141 IX86_BUILTIN_CMPGEPS,
17142 IX86_BUILTIN_CMPNEQPS,
17143 IX86_BUILTIN_CMPNLTPS,
17144 IX86_BUILTIN_CMPNLEPS,
17145 IX86_BUILTIN_CMPNGTPS,
17146 IX86_BUILTIN_CMPNGEPS,
17147 IX86_BUILTIN_CMPORDPS,
17148 IX86_BUILTIN_CMPUNORDPS,
17149 IX86_BUILTIN_CMPEQSS,
17150 IX86_BUILTIN_CMPLTSS,
17151 IX86_BUILTIN_CMPLESS,
17152 IX86_BUILTIN_CMPNEQSS,
17153 IX86_BUILTIN_CMPNLTSS,
17154 IX86_BUILTIN_CMPNLESS,
17155 IX86_BUILTIN_CMPNGTSS,
17156 IX86_BUILTIN_CMPNGESS,
17157 IX86_BUILTIN_CMPORDSS,
17158 IX86_BUILTIN_CMPUNORDSS,
17160 IX86_BUILTIN_COMIEQSS,
17161 IX86_BUILTIN_COMILTSS,
17162 IX86_BUILTIN_COMILESS,
17163 IX86_BUILTIN_COMIGTSS,
17164 IX86_BUILTIN_COMIGESS,
17165 IX86_BUILTIN_COMINEQSS,
17166 IX86_BUILTIN_UCOMIEQSS,
17167 IX86_BUILTIN_UCOMILTSS,
17168 IX86_BUILTIN_UCOMILESS,
17169 IX86_BUILTIN_UCOMIGTSS,
17170 IX86_BUILTIN_UCOMIGESS,
17171 IX86_BUILTIN_UCOMINEQSS,
17173 IX86_BUILTIN_CVTPI2PS,
17174 IX86_BUILTIN_CVTPS2PI,
17175 IX86_BUILTIN_CVTSI2SS,
17176 IX86_BUILTIN_CVTSI642SS,
17177 IX86_BUILTIN_CVTSS2SI,
17178 IX86_BUILTIN_CVTSS2SI64,
17179 IX86_BUILTIN_CVTTPS2PI,
17180 IX86_BUILTIN_CVTTSS2SI,
17181 IX86_BUILTIN_CVTTSS2SI64,
17183 IX86_BUILTIN_MAXPS,
17184 IX86_BUILTIN_MAXSS,
17185 IX86_BUILTIN_MINPS,
17186 IX86_BUILTIN_MINSS,
17188 IX86_BUILTIN_LOADUPS,
17189 IX86_BUILTIN_STOREUPS,
17190 IX86_BUILTIN_MOVSS,
17192 IX86_BUILTIN_MOVHLPS,
17193 IX86_BUILTIN_MOVLHPS,
17194 IX86_BUILTIN_LOADHPS,
17195 IX86_BUILTIN_LOADLPS,
17196 IX86_BUILTIN_STOREHPS,
17197 IX86_BUILTIN_STORELPS,
17199 IX86_BUILTIN_MASKMOVQ,
17200 IX86_BUILTIN_MOVMSKPS,
17201 IX86_BUILTIN_PMOVMSKB,
17203 IX86_BUILTIN_MOVNTPS,
17204 IX86_BUILTIN_MOVNTQ,
17206 IX86_BUILTIN_LOADDQU,
17207 IX86_BUILTIN_STOREDQU,
17209 IX86_BUILTIN_PACKSSWB,
17210 IX86_BUILTIN_PACKSSDW,
17211 IX86_BUILTIN_PACKUSWB,
17213 IX86_BUILTIN_PADDB,
17214 IX86_BUILTIN_PADDW,
17215 IX86_BUILTIN_PADDD,
17216 IX86_BUILTIN_PADDQ,
17217 IX86_BUILTIN_PADDSB,
17218 IX86_BUILTIN_PADDSW,
17219 IX86_BUILTIN_PADDUSB,
17220 IX86_BUILTIN_PADDUSW,
17221 IX86_BUILTIN_PSUBB,
17222 IX86_BUILTIN_PSUBW,
17223 IX86_BUILTIN_PSUBD,
17224 IX86_BUILTIN_PSUBQ,
17225 IX86_BUILTIN_PSUBSB,
17226 IX86_BUILTIN_PSUBSW,
17227 IX86_BUILTIN_PSUBUSB,
17228 IX86_BUILTIN_PSUBUSW,
17231 IX86_BUILTIN_PANDN,
17235 IX86_BUILTIN_PAVGB,
17236 IX86_BUILTIN_PAVGW,
17238 IX86_BUILTIN_PCMPEQB,
17239 IX86_BUILTIN_PCMPEQW,
17240 IX86_BUILTIN_PCMPEQD,
17241 IX86_BUILTIN_PCMPGTB,
17242 IX86_BUILTIN_PCMPGTW,
17243 IX86_BUILTIN_PCMPGTD,
17245 IX86_BUILTIN_PMADDWD,
17247 IX86_BUILTIN_PMAXSW,
17248 IX86_BUILTIN_PMAXUB,
17249 IX86_BUILTIN_PMINSW,
17250 IX86_BUILTIN_PMINUB,
17252 IX86_BUILTIN_PMULHUW,
17253 IX86_BUILTIN_PMULHW,
17254 IX86_BUILTIN_PMULLW,
17256 IX86_BUILTIN_PSADBW,
17257 IX86_BUILTIN_PSHUFW,
17259 IX86_BUILTIN_PSLLW,
17260 IX86_BUILTIN_PSLLD,
17261 IX86_BUILTIN_PSLLQ,
17262 IX86_BUILTIN_PSRAW,
17263 IX86_BUILTIN_PSRAD,
17264 IX86_BUILTIN_PSRLW,
17265 IX86_BUILTIN_PSRLD,
17266 IX86_BUILTIN_PSRLQ,
17267 IX86_BUILTIN_PSLLWI,
17268 IX86_BUILTIN_PSLLDI,
17269 IX86_BUILTIN_PSLLQI,
17270 IX86_BUILTIN_PSRAWI,
17271 IX86_BUILTIN_PSRADI,
17272 IX86_BUILTIN_PSRLWI,
17273 IX86_BUILTIN_PSRLDI,
17274 IX86_BUILTIN_PSRLQI,
17276 IX86_BUILTIN_PUNPCKHBW,
17277 IX86_BUILTIN_PUNPCKHWD,
17278 IX86_BUILTIN_PUNPCKHDQ,
17279 IX86_BUILTIN_PUNPCKLBW,
17280 IX86_BUILTIN_PUNPCKLWD,
17281 IX86_BUILTIN_PUNPCKLDQ,
17283 IX86_BUILTIN_SHUFPS,
17285 IX86_BUILTIN_RCPPS,
17286 IX86_BUILTIN_RCPSS,
17287 IX86_BUILTIN_RSQRTPS,
17288 IX86_BUILTIN_RSQRTPS_NR,
17289 IX86_BUILTIN_RSQRTSS,
17290 IX86_BUILTIN_RSQRTF,
17291 IX86_BUILTIN_SQRTPS,
17292 IX86_BUILTIN_SQRTPS_NR,
17293 IX86_BUILTIN_SQRTSS,
17295 IX86_BUILTIN_UNPCKHPS,
17296 IX86_BUILTIN_UNPCKLPS,
17298 IX86_BUILTIN_ANDPS,
17299 IX86_BUILTIN_ANDNPS,
17301 IX86_BUILTIN_XORPS,
17304 IX86_BUILTIN_LDMXCSR,
17305 IX86_BUILTIN_STMXCSR,
17306 IX86_BUILTIN_SFENCE,
17308 /* 3DNow! Original */
17309 IX86_BUILTIN_FEMMS,
17310 IX86_BUILTIN_PAVGUSB,
17311 IX86_BUILTIN_PF2ID,
17312 IX86_BUILTIN_PFACC,
17313 IX86_BUILTIN_PFADD,
17314 IX86_BUILTIN_PFCMPEQ,
17315 IX86_BUILTIN_PFCMPGE,
17316 IX86_BUILTIN_PFCMPGT,
17317 IX86_BUILTIN_PFMAX,
17318 IX86_BUILTIN_PFMIN,
17319 IX86_BUILTIN_PFMUL,
17320 IX86_BUILTIN_PFRCP,
17321 IX86_BUILTIN_PFRCPIT1,
17322 IX86_BUILTIN_PFRCPIT2,
17323 IX86_BUILTIN_PFRSQIT1,
17324 IX86_BUILTIN_PFRSQRT,
17325 IX86_BUILTIN_PFSUB,
17326 IX86_BUILTIN_PFSUBR,
17327 IX86_BUILTIN_PI2FD,
17328 IX86_BUILTIN_PMULHRW,
17330 /* 3DNow! Athlon Extensions */
17331 IX86_BUILTIN_PF2IW,
17332 IX86_BUILTIN_PFNACC,
17333 IX86_BUILTIN_PFPNACC,
17334 IX86_BUILTIN_PI2FW,
17335 IX86_BUILTIN_PSWAPDSI,
17336 IX86_BUILTIN_PSWAPDSF,
17339 IX86_BUILTIN_ADDPD,
17340 IX86_BUILTIN_ADDSD,
17341 IX86_BUILTIN_DIVPD,
17342 IX86_BUILTIN_DIVSD,
17343 IX86_BUILTIN_MULPD,
17344 IX86_BUILTIN_MULSD,
17345 IX86_BUILTIN_SUBPD,
17346 IX86_BUILTIN_SUBSD,
17348 IX86_BUILTIN_CMPEQPD,
17349 IX86_BUILTIN_CMPLTPD,
17350 IX86_BUILTIN_CMPLEPD,
17351 IX86_BUILTIN_CMPGTPD,
17352 IX86_BUILTIN_CMPGEPD,
17353 IX86_BUILTIN_CMPNEQPD,
17354 IX86_BUILTIN_CMPNLTPD,
17355 IX86_BUILTIN_CMPNLEPD,
17356 IX86_BUILTIN_CMPNGTPD,
17357 IX86_BUILTIN_CMPNGEPD,
17358 IX86_BUILTIN_CMPORDPD,
17359 IX86_BUILTIN_CMPUNORDPD,
17360 IX86_BUILTIN_CMPEQSD,
17361 IX86_BUILTIN_CMPLTSD,
17362 IX86_BUILTIN_CMPLESD,
17363 IX86_BUILTIN_CMPNEQSD,
17364 IX86_BUILTIN_CMPNLTSD,
17365 IX86_BUILTIN_CMPNLESD,
17366 IX86_BUILTIN_CMPORDSD,
17367 IX86_BUILTIN_CMPUNORDSD,
17369 IX86_BUILTIN_COMIEQSD,
17370 IX86_BUILTIN_COMILTSD,
17371 IX86_BUILTIN_COMILESD,
17372 IX86_BUILTIN_COMIGTSD,
17373 IX86_BUILTIN_COMIGESD,
17374 IX86_BUILTIN_COMINEQSD,
17375 IX86_BUILTIN_UCOMIEQSD,
17376 IX86_BUILTIN_UCOMILTSD,
17377 IX86_BUILTIN_UCOMILESD,
17378 IX86_BUILTIN_UCOMIGTSD,
17379 IX86_BUILTIN_UCOMIGESD,
17380 IX86_BUILTIN_UCOMINEQSD,
17382 IX86_BUILTIN_MAXPD,
17383 IX86_BUILTIN_MAXSD,
17384 IX86_BUILTIN_MINPD,
17385 IX86_BUILTIN_MINSD,
17387 IX86_BUILTIN_ANDPD,
17388 IX86_BUILTIN_ANDNPD,
17390 IX86_BUILTIN_XORPD,
17392 IX86_BUILTIN_SQRTPD,
17393 IX86_BUILTIN_SQRTSD,
17395 IX86_BUILTIN_UNPCKHPD,
17396 IX86_BUILTIN_UNPCKLPD,
17398 IX86_BUILTIN_SHUFPD,
17400 IX86_BUILTIN_LOADUPD,
17401 IX86_BUILTIN_STOREUPD,
17402 IX86_BUILTIN_MOVSD,
17404 IX86_BUILTIN_LOADHPD,
17405 IX86_BUILTIN_LOADLPD,
17407 IX86_BUILTIN_CVTDQ2PD,
17408 IX86_BUILTIN_CVTDQ2PS,
17410 IX86_BUILTIN_CVTPD2DQ,
17411 IX86_BUILTIN_CVTPD2PI,
17412 IX86_BUILTIN_CVTPD2PS,
17413 IX86_BUILTIN_CVTTPD2DQ,
17414 IX86_BUILTIN_CVTTPD2PI,
17416 IX86_BUILTIN_CVTPI2PD,
17417 IX86_BUILTIN_CVTSI2SD,
17418 IX86_BUILTIN_CVTSI642SD,
17420 IX86_BUILTIN_CVTSD2SI,
17421 IX86_BUILTIN_CVTSD2SI64,
17422 IX86_BUILTIN_CVTSD2SS,
17423 IX86_BUILTIN_CVTSS2SD,
17424 IX86_BUILTIN_CVTTSD2SI,
17425 IX86_BUILTIN_CVTTSD2SI64,
17427 IX86_BUILTIN_CVTPS2DQ,
17428 IX86_BUILTIN_CVTPS2PD,
17429 IX86_BUILTIN_CVTTPS2DQ,
17431 IX86_BUILTIN_MOVNTI,
17432 IX86_BUILTIN_MOVNTPD,
17433 IX86_BUILTIN_MOVNTDQ,
17436 IX86_BUILTIN_MASKMOVDQU,
17437 IX86_BUILTIN_MOVMSKPD,
17438 IX86_BUILTIN_PMOVMSKB128,
17440 IX86_BUILTIN_PACKSSWB128,
17441 IX86_BUILTIN_PACKSSDW128,
17442 IX86_BUILTIN_PACKUSWB128,
17444 IX86_BUILTIN_PADDB128,
17445 IX86_BUILTIN_PADDW128,
17446 IX86_BUILTIN_PADDD128,
17447 IX86_BUILTIN_PADDQ128,
17448 IX86_BUILTIN_PADDSB128,
17449 IX86_BUILTIN_PADDSW128,
17450 IX86_BUILTIN_PADDUSB128,
17451 IX86_BUILTIN_PADDUSW128,
17452 IX86_BUILTIN_PSUBB128,
17453 IX86_BUILTIN_PSUBW128,
17454 IX86_BUILTIN_PSUBD128,
17455 IX86_BUILTIN_PSUBQ128,
17456 IX86_BUILTIN_PSUBSB128,
17457 IX86_BUILTIN_PSUBSW128,
17458 IX86_BUILTIN_PSUBUSB128,
17459 IX86_BUILTIN_PSUBUSW128,
17461 IX86_BUILTIN_PAND128,
17462 IX86_BUILTIN_PANDN128,
17463 IX86_BUILTIN_POR128,
17464 IX86_BUILTIN_PXOR128,
17466 IX86_BUILTIN_PAVGB128,
17467 IX86_BUILTIN_PAVGW128,
17469 IX86_BUILTIN_PCMPEQB128,
17470 IX86_BUILTIN_PCMPEQW128,
17471 IX86_BUILTIN_PCMPEQD128,
17472 IX86_BUILTIN_PCMPGTB128,
17473 IX86_BUILTIN_PCMPGTW128,
17474 IX86_BUILTIN_PCMPGTD128,
17476 IX86_BUILTIN_PMADDWD128,
17478 IX86_BUILTIN_PMAXSW128,
17479 IX86_BUILTIN_PMAXUB128,
17480 IX86_BUILTIN_PMINSW128,
17481 IX86_BUILTIN_PMINUB128,
17483 IX86_BUILTIN_PMULUDQ,
17484 IX86_BUILTIN_PMULUDQ128,
17485 IX86_BUILTIN_PMULHUW128,
17486 IX86_BUILTIN_PMULHW128,
17487 IX86_BUILTIN_PMULLW128,
17489 IX86_BUILTIN_PSADBW128,
17490 IX86_BUILTIN_PSHUFHW,
17491 IX86_BUILTIN_PSHUFLW,
17492 IX86_BUILTIN_PSHUFD,
17494 IX86_BUILTIN_PSLLDQI128,
17495 IX86_BUILTIN_PSLLWI128,
17496 IX86_BUILTIN_PSLLDI128,
17497 IX86_BUILTIN_PSLLQI128,
17498 IX86_BUILTIN_PSRAWI128,
17499 IX86_BUILTIN_PSRADI128,
17500 IX86_BUILTIN_PSRLDQI128,
17501 IX86_BUILTIN_PSRLWI128,
17502 IX86_BUILTIN_PSRLDI128,
17503 IX86_BUILTIN_PSRLQI128,
17505 IX86_BUILTIN_PSLLDQ128,
17506 IX86_BUILTIN_PSLLW128,
17507 IX86_BUILTIN_PSLLD128,
17508 IX86_BUILTIN_PSLLQ128,
17509 IX86_BUILTIN_PSRAW128,
17510 IX86_BUILTIN_PSRAD128,
17511 IX86_BUILTIN_PSRLW128,
17512 IX86_BUILTIN_PSRLD128,
17513 IX86_BUILTIN_PSRLQ128,
17515 IX86_BUILTIN_PUNPCKHBW128,
17516 IX86_BUILTIN_PUNPCKHWD128,
17517 IX86_BUILTIN_PUNPCKHDQ128,
17518 IX86_BUILTIN_PUNPCKHQDQ128,
17519 IX86_BUILTIN_PUNPCKLBW128,
17520 IX86_BUILTIN_PUNPCKLWD128,
17521 IX86_BUILTIN_PUNPCKLDQ128,
17522 IX86_BUILTIN_PUNPCKLQDQ128,
17524 IX86_BUILTIN_CLFLUSH,
17525 IX86_BUILTIN_MFENCE,
17526 IX86_BUILTIN_LFENCE,
17529 IX86_BUILTIN_ADDSUBPS,
17530 IX86_BUILTIN_HADDPS,
17531 IX86_BUILTIN_HSUBPS,
17532 IX86_BUILTIN_MOVSHDUP,
17533 IX86_BUILTIN_MOVSLDUP,
17534 IX86_BUILTIN_ADDSUBPD,
17535 IX86_BUILTIN_HADDPD,
17536 IX86_BUILTIN_HSUBPD,
17537 IX86_BUILTIN_LDDQU,
17539 IX86_BUILTIN_MONITOR,
17540 IX86_BUILTIN_MWAIT,
17543 IX86_BUILTIN_PHADDW,
17544 IX86_BUILTIN_PHADDD,
17545 IX86_BUILTIN_PHADDSW,
17546 IX86_BUILTIN_PHSUBW,
17547 IX86_BUILTIN_PHSUBD,
17548 IX86_BUILTIN_PHSUBSW,
17549 IX86_BUILTIN_PMADDUBSW,
17550 IX86_BUILTIN_PMULHRSW,
17551 IX86_BUILTIN_PSHUFB,
17552 IX86_BUILTIN_PSIGNB,
17553 IX86_BUILTIN_PSIGNW,
17554 IX86_BUILTIN_PSIGND,
17555 IX86_BUILTIN_PALIGNR,
17556 IX86_BUILTIN_PABSB,
17557 IX86_BUILTIN_PABSW,
17558 IX86_BUILTIN_PABSD,
17560 IX86_BUILTIN_PHADDW128,
17561 IX86_BUILTIN_PHADDD128,
17562 IX86_BUILTIN_PHADDSW128,
17563 IX86_BUILTIN_PHSUBW128,
17564 IX86_BUILTIN_PHSUBD128,
17565 IX86_BUILTIN_PHSUBSW128,
17566 IX86_BUILTIN_PMADDUBSW128,
17567 IX86_BUILTIN_PMULHRSW128,
17568 IX86_BUILTIN_PSHUFB128,
17569 IX86_BUILTIN_PSIGNB128,
17570 IX86_BUILTIN_PSIGNW128,
17571 IX86_BUILTIN_PSIGND128,
17572 IX86_BUILTIN_PALIGNR128,
17573 IX86_BUILTIN_PABSB128,
17574 IX86_BUILTIN_PABSW128,
17575 IX86_BUILTIN_PABSD128,
17577 /* AMDFAM10 - SSE4A New Instructions. */
17578 IX86_BUILTIN_MOVNTSD,
17579 IX86_BUILTIN_MOVNTSS,
17580 IX86_BUILTIN_EXTRQI,
17581 IX86_BUILTIN_EXTRQ,
17582 IX86_BUILTIN_INSERTQI,
17583 IX86_BUILTIN_INSERTQ,
17586 IX86_BUILTIN_BLENDPD,
17587 IX86_BUILTIN_BLENDPS,
17588 IX86_BUILTIN_BLENDVPD,
17589 IX86_BUILTIN_BLENDVPS,
17590 IX86_BUILTIN_PBLENDVB128,
17591 IX86_BUILTIN_PBLENDW128,
17596 IX86_BUILTIN_INSERTPS128,
17598 IX86_BUILTIN_MOVNTDQA,
17599 IX86_BUILTIN_MPSADBW128,
17600 IX86_BUILTIN_PACKUSDW128,
17601 IX86_BUILTIN_PCMPEQQ,
17602 IX86_BUILTIN_PHMINPOSUW128,
17604 IX86_BUILTIN_PMAXSB128,
17605 IX86_BUILTIN_PMAXSD128,
17606 IX86_BUILTIN_PMAXUD128,
17607 IX86_BUILTIN_PMAXUW128,
17609 IX86_BUILTIN_PMINSB128,
17610 IX86_BUILTIN_PMINSD128,
17611 IX86_BUILTIN_PMINUD128,
17612 IX86_BUILTIN_PMINUW128,
17614 IX86_BUILTIN_PMOVSXBW128,
17615 IX86_BUILTIN_PMOVSXBD128,
17616 IX86_BUILTIN_PMOVSXBQ128,
17617 IX86_BUILTIN_PMOVSXWD128,
17618 IX86_BUILTIN_PMOVSXWQ128,
17619 IX86_BUILTIN_PMOVSXDQ128,
17621 IX86_BUILTIN_PMOVZXBW128,
17622 IX86_BUILTIN_PMOVZXBD128,
17623 IX86_BUILTIN_PMOVZXBQ128,
17624 IX86_BUILTIN_PMOVZXWD128,
17625 IX86_BUILTIN_PMOVZXWQ128,
17626 IX86_BUILTIN_PMOVZXDQ128,
17628 IX86_BUILTIN_PMULDQ128,
17629 IX86_BUILTIN_PMULLD128,
17631 IX86_BUILTIN_ROUNDPD,
17632 IX86_BUILTIN_ROUNDPS,
17633 IX86_BUILTIN_ROUNDSD,
17634 IX86_BUILTIN_ROUNDSS,
17636 IX86_BUILTIN_PTESTZ,
17637 IX86_BUILTIN_PTESTC,
17638 IX86_BUILTIN_PTESTNZC,
17640 IX86_BUILTIN_VEC_INIT_V2SI,
17641 IX86_BUILTIN_VEC_INIT_V4HI,
17642 IX86_BUILTIN_VEC_INIT_V8QI,
17643 IX86_BUILTIN_VEC_EXT_V2DF,
17644 IX86_BUILTIN_VEC_EXT_V2DI,
17645 IX86_BUILTIN_VEC_EXT_V4SF,
17646 IX86_BUILTIN_VEC_EXT_V4SI,
17647 IX86_BUILTIN_VEC_EXT_V8HI,
17648 IX86_BUILTIN_VEC_EXT_V2SI,
17649 IX86_BUILTIN_VEC_EXT_V4HI,
17650 IX86_BUILTIN_VEC_EXT_V16QI,
17651 IX86_BUILTIN_VEC_SET_V2DI,
17652 IX86_BUILTIN_VEC_SET_V4SF,
17653 IX86_BUILTIN_VEC_SET_V4SI,
17654 IX86_BUILTIN_VEC_SET_V8HI,
17655 IX86_BUILTIN_VEC_SET_V4HI,
17656 IX86_BUILTIN_VEC_SET_V16QI,
17658 IX86_BUILTIN_VEC_PACK_SFIX,
17661 IX86_BUILTIN_CRC32QI,
17662 IX86_BUILTIN_CRC32HI,
17663 IX86_BUILTIN_CRC32SI,
17664 IX86_BUILTIN_CRC32DI,
17666 IX86_BUILTIN_PCMPESTRI128,
17667 IX86_BUILTIN_PCMPESTRM128,
17668 IX86_BUILTIN_PCMPESTRA128,
17669 IX86_BUILTIN_PCMPESTRC128,
17670 IX86_BUILTIN_PCMPESTRO128,
17671 IX86_BUILTIN_PCMPESTRS128,
17672 IX86_BUILTIN_PCMPESTRZ128,
17673 IX86_BUILTIN_PCMPISTRI128,
17674 IX86_BUILTIN_PCMPISTRM128,
17675 IX86_BUILTIN_PCMPISTRA128,
17676 IX86_BUILTIN_PCMPISTRC128,
17677 IX86_BUILTIN_PCMPISTRO128,
17678 IX86_BUILTIN_PCMPISTRS128,
17679 IX86_BUILTIN_PCMPISTRZ128,
17681 IX86_BUILTIN_PCMPGTQ,
17683 /* AES instructions */
17684 IX86_BUILTIN_AESENC128,
17685 IX86_BUILTIN_AESENCLAST128,
17686 IX86_BUILTIN_AESDEC128,
17687 IX86_BUILTIN_AESDECLAST128,
17688 IX86_BUILTIN_AESIMC128,
17689 IX86_BUILTIN_AESKEYGENASSIST128,
17691 /* PCLMUL instruction */
17692 IX86_BUILTIN_PCLMULQDQ128,
17694 /* TFmode support builtins. */
17696 IX86_BUILTIN_FABSQ,
17697 IX86_BUILTIN_COPYSIGNQ,
17699 /* SSE5 instructions */
17700 IX86_BUILTIN_FMADDSS,
17701 IX86_BUILTIN_FMADDSD,
17702 IX86_BUILTIN_FMADDPS,
17703 IX86_BUILTIN_FMADDPD,
17704 IX86_BUILTIN_FMSUBSS,
17705 IX86_BUILTIN_FMSUBSD,
17706 IX86_BUILTIN_FMSUBPS,
17707 IX86_BUILTIN_FMSUBPD,
17708 IX86_BUILTIN_FNMADDSS,
17709 IX86_BUILTIN_FNMADDSD,
17710 IX86_BUILTIN_FNMADDPS,
17711 IX86_BUILTIN_FNMADDPD,
17712 IX86_BUILTIN_FNMSUBSS,
17713 IX86_BUILTIN_FNMSUBSD,
17714 IX86_BUILTIN_FNMSUBPS,
17715 IX86_BUILTIN_FNMSUBPD,
17716 IX86_BUILTIN_PCMOV_V2DI,
17717 IX86_BUILTIN_PCMOV_V4SI,
17718 IX86_BUILTIN_PCMOV_V8HI,
17719 IX86_BUILTIN_PCMOV_V16QI,
17720 IX86_BUILTIN_PCMOV_V4SF,
17721 IX86_BUILTIN_PCMOV_V2DF,
17722 IX86_BUILTIN_PPERM,
17723 IX86_BUILTIN_PERMPS,
17724 IX86_BUILTIN_PERMPD,
17725 IX86_BUILTIN_PMACSSWW,
17726 IX86_BUILTIN_PMACSWW,
17727 IX86_BUILTIN_PMACSSWD,
17728 IX86_BUILTIN_PMACSWD,
17729 IX86_BUILTIN_PMACSSDD,
17730 IX86_BUILTIN_PMACSDD,
17731 IX86_BUILTIN_PMACSSDQL,
17732 IX86_BUILTIN_PMACSSDQH,
17733 IX86_BUILTIN_PMACSDQL,
17734 IX86_BUILTIN_PMACSDQH,
17735 IX86_BUILTIN_PMADCSSWD,
17736 IX86_BUILTIN_PMADCSWD,
17737 IX86_BUILTIN_PHADDBW,
17738 IX86_BUILTIN_PHADDBD,
17739 IX86_BUILTIN_PHADDBQ,
17740 IX86_BUILTIN_PHADDWD,
17741 IX86_BUILTIN_PHADDWQ,
17742 IX86_BUILTIN_PHADDDQ,
17743 IX86_BUILTIN_PHADDUBW,
17744 IX86_BUILTIN_PHADDUBD,
17745 IX86_BUILTIN_PHADDUBQ,
17746 IX86_BUILTIN_PHADDUWD,
17747 IX86_BUILTIN_PHADDUWQ,
17748 IX86_BUILTIN_PHADDUDQ,
17749 IX86_BUILTIN_PHSUBBW,
17750 IX86_BUILTIN_PHSUBWD,
17751 IX86_BUILTIN_PHSUBDQ,
17752 IX86_BUILTIN_PROTB,
17753 IX86_BUILTIN_PROTW,
17754 IX86_BUILTIN_PROTD,
17755 IX86_BUILTIN_PROTQ,
17756 IX86_BUILTIN_PROTB_IMM,
17757 IX86_BUILTIN_PROTW_IMM,
17758 IX86_BUILTIN_PROTD_IMM,
17759 IX86_BUILTIN_PROTQ_IMM,
17760 IX86_BUILTIN_PSHLB,
17761 IX86_BUILTIN_PSHLW,
17762 IX86_BUILTIN_PSHLD,
17763 IX86_BUILTIN_PSHLQ,
17764 IX86_BUILTIN_PSHAB,
17765 IX86_BUILTIN_PSHAW,
17766 IX86_BUILTIN_PSHAD,
17767 IX86_BUILTIN_PSHAQ,
17768 IX86_BUILTIN_FRCZSS,
17769 IX86_BUILTIN_FRCZSD,
17770 IX86_BUILTIN_FRCZPS,
17771 IX86_BUILTIN_FRCZPD,
17772 IX86_BUILTIN_CVTPH2PS,
17773 IX86_BUILTIN_CVTPS2PH,
17775 IX86_BUILTIN_COMEQSS,
17776 IX86_BUILTIN_COMNESS,
17777 IX86_BUILTIN_COMLTSS,
17778 IX86_BUILTIN_COMLESS,
17779 IX86_BUILTIN_COMGTSS,
17780 IX86_BUILTIN_COMGESS,
17781 IX86_BUILTIN_COMUEQSS,
17782 IX86_BUILTIN_COMUNESS,
17783 IX86_BUILTIN_COMULTSS,
17784 IX86_BUILTIN_COMULESS,
17785 IX86_BUILTIN_COMUGTSS,
17786 IX86_BUILTIN_COMUGESS,
17787 IX86_BUILTIN_COMORDSS,
17788 IX86_BUILTIN_COMUNORDSS,
17789 IX86_BUILTIN_COMFALSESS,
17790 IX86_BUILTIN_COMTRUESS,
17792 IX86_BUILTIN_COMEQSD,
17793 IX86_BUILTIN_COMNESD,
17794 IX86_BUILTIN_COMLTSD,
17795 IX86_BUILTIN_COMLESD,
17796 IX86_BUILTIN_COMGTSD,
17797 IX86_BUILTIN_COMGESD,
17798 IX86_BUILTIN_COMUEQSD,
17799 IX86_BUILTIN_COMUNESD,
17800 IX86_BUILTIN_COMULTSD,
17801 IX86_BUILTIN_COMULESD,
17802 IX86_BUILTIN_COMUGTSD,
17803 IX86_BUILTIN_COMUGESD,
17804 IX86_BUILTIN_COMORDSD,
17805 IX86_BUILTIN_COMUNORDSD,
17806 IX86_BUILTIN_COMFALSESD,
17807 IX86_BUILTIN_COMTRUESD,
17809 IX86_BUILTIN_COMEQPS,
17810 IX86_BUILTIN_COMNEPS,
17811 IX86_BUILTIN_COMLTPS,
17812 IX86_BUILTIN_COMLEPS,
17813 IX86_BUILTIN_COMGTPS,
17814 IX86_BUILTIN_COMGEPS,
17815 IX86_BUILTIN_COMUEQPS,
17816 IX86_BUILTIN_COMUNEPS,
17817 IX86_BUILTIN_COMULTPS,
17818 IX86_BUILTIN_COMULEPS,
17819 IX86_BUILTIN_COMUGTPS,
17820 IX86_BUILTIN_COMUGEPS,
17821 IX86_BUILTIN_COMORDPS,
17822 IX86_BUILTIN_COMUNORDPS,
17823 IX86_BUILTIN_COMFALSEPS,
17824 IX86_BUILTIN_COMTRUEPS,
17826 IX86_BUILTIN_COMEQPD,
17827 IX86_BUILTIN_COMNEPD,
17828 IX86_BUILTIN_COMLTPD,
17829 IX86_BUILTIN_COMLEPD,
17830 IX86_BUILTIN_COMGTPD,
17831 IX86_BUILTIN_COMGEPD,
17832 IX86_BUILTIN_COMUEQPD,
17833 IX86_BUILTIN_COMUNEPD,
17834 IX86_BUILTIN_COMULTPD,
17835 IX86_BUILTIN_COMULEPD,
17836 IX86_BUILTIN_COMUGTPD,
17837 IX86_BUILTIN_COMUGEPD,
17838 IX86_BUILTIN_COMORDPD,
17839 IX86_BUILTIN_COMUNORDPD,
17840 IX86_BUILTIN_COMFALSEPD,
17841 IX86_BUILTIN_COMTRUEPD,
17843 IX86_BUILTIN_PCOMEQUB,
17844 IX86_BUILTIN_PCOMNEUB,
17845 IX86_BUILTIN_PCOMLTUB,
17846 IX86_BUILTIN_PCOMLEUB,
17847 IX86_BUILTIN_PCOMGTUB,
17848 IX86_BUILTIN_PCOMGEUB,
17849 IX86_BUILTIN_PCOMFALSEUB,
17850 IX86_BUILTIN_PCOMTRUEUB,
17851 IX86_BUILTIN_PCOMEQUW,
17852 IX86_BUILTIN_PCOMNEUW,
17853 IX86_BUILTIN_PCOMLTUW,
17854 IX86_BUILTIN_PCOMLEUW,
17855 IX86_BUILTIN_PCOMGTUW,
17856 IX86_BUILTIN_PCOMGEUW,
17857 IX86_BUILTIN_PCOMFALSEUW,
17858 IX86_BUILTIN_PCOMTRUEUW,
17859 IX86_BUILTIN_PCOMEQUD,
17860 IX86_BUILTIN_PCOMNEUD,
17861 IX86_BUILTIN_PCOMLTUD,
17862 IX86_BUILTIN_PCOMLEUD,
17863 IX86_BUILTIN_PCOMGTUD,
17864 IX86_BUILTIN_PCOMGEUD,
17865 IX86_BUILTIN_PCOMFALSEUD,
17866 IX86_BUILTIN_PCOMTRUEUD,
17867 IX86_BUILTIN_PCOMEQUQ,
17868 IX86_BUILTIN_PCOMNEUQ,
17869 IX86_BUILTIN_PCOMLTUQ,
17870 IX86_BUILTIN_PCOMLEUQ,
17871 IX86_BUILTIN_PCOMGTUQ,
17872 IX86_BUILTIN_PCOMGEUQ,
17873 IX86_BUILTIN_PCOMFALSEUQ,
17874 IX86_BUILTIN_PCOMTRUEUQ,
17876 IX86_BUILTIN_PCOMEQB,
17877 IX86_BUILTIN_PCOMNEB,
17878 IX86_BUILTIN_PCOMLTB,
17879 IX86_BUILTIN_PCOMLEB,
17880 IX86_BUILTIN_PCOMGTB,
17881 IX86_BUILTIN_PCOMGEB,
17882 IX86_BUILTIN_PCOMFALSEB,
17883 IX86_BUILTIN_PCOMTRUEB,
17884 IX86_BUILTIN_PCOMEQW,
17885 IX86_BUILTIN_PCOMNEW,
17886 IX86_BUILTIN_PCOMLTW,
17887 IX86_BUILTIN_PCOMLEW,
17888 IX86_BUILTIN_PCOMGTW,
17889 IX86_BUILTIN_PCOMGEW,
17890 IX86_BUILTIN_PCOMFALSEW,
17891 IX86_BUILTIN_PCOMTRUEW,
17892 IX86_BUILTIN_PCOMEQD,
17893 IX86_BUILTIN_PCOMNED,
17894 IX86_BUILTIN_PCOMLTD,
17895 IX86_BUILTIN_PCOMLED,
17896 IX86_BUILTIN_PCOMGTD,
17897 IX86_BUILTIN_PCOMGED,
17898 IX86_BUILTIN_PCOMFALSED,
17899 IX86_BUILTIN_PCOMTRUED,
17900 IX86_BUILTIN_PCOMEQQ,
17901 IX86_BUILTIN_PCOMNEQ,
17902 IX86_BUILTIN_PCOMLTQ,
17903 IX86_BUILTIN_PCOMLEQ,
17904 IX86_BUILTIN_PCOMGTQ,
17905 IX86_BUILTIN_PCOMGEQ,
17906 IX86_BUILTIN_PCOMFALSEQ,
17907 IX86_BUILTIN_PCOMTRUEQ,
17912 /* Table for the ix86 builtin decls. */
17913 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17915 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17916 * if the target_flags include one of MASK. Stores the function decl
17917 * in the ix86_builtins array.
17918 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17921 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17923 tree decl = NULL_TREE;
17925 if (mask & ix86_isa_flags
17926 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17928 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17930 ix86_builtins[(int) code] = decl;
17936 /* Like def_builtin, but also marks the function decl "const". */
17939 def_builtin_const (int mask, const char *name, tree type,
17940 enum ix86_builtins code)
17942 tree decl = def_builtin (mask, name, type, code);
17944 TREE_READONLY (decl) = 1;
17948 /* Bits for builtin_description.flag. */
17950 /* Set when we don't support the comparison natively, and should
17951 swap_comparison in order to support it. */
17952 #define BUILTIN_DESC_SWAP_OPERANDS 1
17954 struct builtin_description
17956 const unsigned int mask;
17957 const enum insn_code icode;
17958 const char *const name;
17959 const enum ix86_builtins code;
17960 const enum rtx_code comparison;
17964 static const struct builtin_description bdesc_comi[] =
17966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17992 static const struct builtin_description bdesc_pcmpestr[] =
17995 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17996 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17997 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17998 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17999 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18000 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18001 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18004 static const struct builtin_description bdesc_pcmpistr[] =
18007 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18009 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18010 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18016 /* Special builtin types */
18017 enum ix86_special_builtin_type
18019 SPECIAL_FTYPE_UNKNOWN,
18021 V16QI_FTYPE_PCCHAR,
18022 V4SF_FTYPE_PCFLOAT,
18023 V2DF_FTYPE_PCDOUBLE,
18024 V4SF_FTYPE_V4SF_PCV2SF,
18025 V2DF_FTYPE_V2DF_PCDOUBLE,
18027 VOID_FTYPE_PV2SF_V4SF,
18028 VOID_FTYPE_PV2DI_V2DI,
18029 VOID_FTYPE_PCHAR_V16QI,
18030 VOID_FTYPE_PFLOAT_V4SF,
18031 VOID_FTYPE_PDOUBLE_V2DF,
18033 VOID_FTYPE_PINT_INT
18036 /* Builtin types */
18037 enum ix86_builtin_type
18040 FLOAT128_FTYPE_FLOAT128,
18042 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18043 INT_FTYPE_V2DI_V2DI_PTEST,
18061 V4SF_FTYPE_V4SF_VEC_MERGE,
18069 V2DF_FTYPE_V2DF_VEC_MERGE,
18079 V16QI_FTYPE_V16QI_V16QI,
18080 V16QI_FTYPE_V8HI_V8HI,
18081 V8QI_FTYPE_V8QI_V8QI,
18082 V8QI_FTYPE_V4HI_V4HI,
18083 V8HI_FTYPE_V8HI_V8HI,
18084 V8HI_FTYPE_V8HI_V8HI_COUNT,
18085 V8HI_FTYPE_V16QI_V16QI,
18086 V8HI_FTYPE_V4SI_V4SI,
18087 V8HI_FTYPE_V8HI_SI_COUNT,
18088 V4SI_FTYPE_V4SI_V4SI,
18089 V4SI_FTYPE_V4SI_V4SI_COUNT,
18090 V4SI_FTYPE_V8HI_V8HI,
18091 V4SI_FTYPE_V4SF_V4SF,
18092 V4SI_FTYPE_V2DF_V2DF,
18093 V4SI_FTYPE_V4SI_SI_COUNT,
18094 V4HI_FTYPE_V4HI_V4HI,
18095 V4HI_FTYPE_V4HI_V4HI_COUNT,
18096 V4HI_FTYPE_V8QI_V8QI,
18097 V4HI_FTYPE_V2SI_V2SI,
18098 V4HI_FTYPE_V4HI_SI_COUNT,
18099 V4SF_FTYPE_V4SF_V4SF,
18100 V4SF_FTYPE_V4SF_V4SF_SWAP,
18101 V4SF_FTYPE_V4SF_V2SI,
18102 V4SF_FTYPE_V4SF_V2DF,
18103 V4SF_FTYPE_V4SF_DI,
18104 V4SF_FTYPE_V4SF_SI,
18105 V2DI_FTYPE_V2DI_V2DI,
18106 V2DI_FTYPE_V2DI_V2DI_COUNT,
18107 V2DI_FTYPE_V16QI_V16QI,
18108 V2DI_FTYPE_V4SI_V4SI,
18109 V2DI_FTYPE_V2DI_V16QI,
18110 V2DI_FTYPE_V2DF_V2DF,
18111 V2DI_FTYPE_V2DI_SI_COUNT,
18112 V2SI_FTYPE_V2SI_V2SI,
18113 V2SI_FTYPE_V2SI_V2SI_COUNT,
18114 V2SI_FTYPE_V4HI_V4HI,
18115 V2SI_FTYPE_V2SF_V2SF,
18116 V2SI_FTYPE_V2SI_SI_COUNT,
18117 V2DF_FTYPE_V2DF_V2DF,
18118 V2DF_FTYPE_V2DF_V2DF_SWAP,
18119 V2DF_FTYPE_V2DF_V4SF,
18120 V2DF_FTYPE_V2DF_DI,
18121 V2DF_FTYPE_V2DF_SI,
18122 V2SF_FTYPE_V2SF_V2SF,
18123 V1DI_FTYPE_V1DI_V1DI,
18124 V1DI_FTYPE_V1DI_V1DI_COUNT,
18125 V1DI_FTYPE_V8QI_V8QI,
18126 V1DI_FTYPE_V2SI_V2SI,
18127 V1DI_FTYPE_V1DI_SI_COUNT,
18128 UINT64_FTYPE_UINT64_UINT64,
18129 UINT_FTYPE_UINT_UINT,
18130 UINT_FTYPE_UINT_USHORT,
18131 UINT_FTYPE_UINT_UCHAR,
18132 V8HI_FTYPE_V8HI_INT,
18133 V4SI_FTYPE_V4SI_INT,
18134 V4HI_FTYPE_V4HI_INT,
18135 V4SF_FTYPE_V4SF_INT,
18136 V2DI_FTYPE_V2DI_INT,
18137 V2DI2TI_FTYPE_V2DI_INT,
18138 V2DF_FTYPE_V2DF_INT,
18139 V16QI_FTYPE_V16QI_V16QI_V16QI,
18140 V4SF_FTYPE_V4SF_V4SF_V4SF,
18141 V2DF_FTYPE_V2DF_V2DF_V2DF,
18142 V16QI_FTYPE_V16QI_V16QI_INT,
18143 V8HI_FTYPE_V8HI_V8HI_INT,
18144 V4SI_FTYPE_V4SI_V4SI_INT,
18145 V4SF_FTYPE_V4SF_V4SF_INT,
18146 V2DI_FTYPE_V2DI_V2DI_INT,
18147 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18148 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18149 V2DF_FTYPE_V2DF_V2DF_INT,
18150 V2DI_FTYPE_V2DI_UINT_UINT,
18151 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18154 /* Special builtins with variable number of arguments. */
18155 static const struct builtin_description bdesc_special_args[] =
18158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18161 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18164 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18166 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18168 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18173 /* SSE or 3DNow!A */
18174 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18175 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18192 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18195 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18198 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18199 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18202 /* Builtins with variable number of arguments. */
18203 static const struct builtin_description bdesc_args[] =
18206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18280 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18281 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18282 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18283 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18284 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18285 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18286 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18287 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18288 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18289 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18292 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18293 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18294 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18295 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18296 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18297 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18302 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18304 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18308 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18311 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18315 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18316 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18317 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18347 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18348 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18352 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18354 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18355 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18357 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18367 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18373 /* SSE MMX or 3Dnow!A */
18374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18376 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18378 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18379 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18380 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18381 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18383 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18384 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18386 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18407 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18408 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18414 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18415 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18416 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18417 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18445 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18449 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18451 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18452 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18458 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18460 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18461 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18462 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18463 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18464 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18465 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18466 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18467 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18478 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18479 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18481 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18483 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18484 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18496 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18497 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18498 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18514 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18523 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18531 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18539 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18540 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18543 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18544 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18545 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18546 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18555 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18556 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18559 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18560 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18562 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18563 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18564 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18565 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18566 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18567 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18645 /* SSE4.1 and SSE5 */
18646 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18647 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18656 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18657 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18658 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18659 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18660 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18663 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18664 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18665 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18666 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18669 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18670 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18672 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18673 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18674 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18675 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18678 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18681 { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18682 { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18686 enum multi_arg_type {
18696 MULTI_ARG_3_PERMPS,
18697 MULTI_ARG_3_PERMPD,
18704 MULTI_ARG_2_DI_IMM,
18705 MULTI_ARG_2_SI_IMM,
18706 MULTI_ARG_2_HI_IMM,
18707 MULTI_ARG_2_QI_IMM,
18708 MULTI_ARG_2_SF_CMP,
18709 MULTI_ARG_2_DF_CMP,
18710 MULTI_ARG_2_DI_CMP,
18711 MULTI_ARG_2_SI_CMP,
18712 MULTI_ARG_2_HI_CMP,
18713 MULTI_ARG_2_QI_CMP,
18736 static const struct builtin_description bdesc_multi_arg[] =
18738 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18739 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18740 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18741 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18742 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18743 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18744 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18745 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18746 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18747 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18748 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18749 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18750 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18751 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18752 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18753 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18754 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18755 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18756 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18757 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18758 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18759 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18760 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18761 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18762 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18763 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18764 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18765 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18766 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18767 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18768 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18769 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18770 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18771 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18772 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18773 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18774 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18775 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18776 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18777 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18778 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18779 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18780 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18781 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18782 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18783 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18974 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18975 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18978 ix86_init_mmx_sse_builtins (void)
18980 const struct builtin_description * d;
18983 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18984 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18985 tree V1DI_type_node
18986 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
18987 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18988 tree V2DI_type_node
18989 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18990 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18991 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18992 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18993 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18994 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18995 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18997 tree pchar_type_node = build_pointer_type (char_type_node);
18998 tree pcchar_type_node
18999 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19000 tree pfloat_type_node = build_pointer_type (float_type_node);
19001 tree pcfloat_type_node
19002 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19003 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19004 tree pcv2sf_type_node
19005 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19006 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19007 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19010 tree int_ftype_v4sf_v4sf
19011 = build_function_type_list (integer_type_node,
19012 V4SF_type_node, V4SF_type_node, NULL_TREE);
19013 tree v4si_ftype_v4sf_v4sf
19014 = build_function_type_list (V4SI_type_node,
19015 V4SF_type_node, V4SF_type_node, NULL_TREE);
19016 /* MMX/SSE/integer conversions. */
19017 tree int_ftype_v4sf
19018 = build_function_type_list (integer_type_node,
19019 V4SF_type_node, NULL_TREE);
19020 tree int64_ftype_v4sf
19021 = build_function_type_list (long_long_integer_type_node,
19022 V4SF_type_node, NULL_TREE);
19023 tree int_ftype_v8qi
19024 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19025 tree v4sf_ftype_v4sf_int
19026 = build_function_type_list (V4SF_type_node,
19027 V4SF_type_node, integer_type_node, NULL_TREE);
19028 tree v4sf_ftype_v4sf_int64
19029 = build_function_type_list (V4SF_type_node,
19030 V4SF_type_node, long_long_integer_type_node,
19032 tree v4sf_ftype_v4sf_v2si
19033 = build_function_type_list (V4SF_type_node,
19034 V4SF_type_node, V2SI_type_node, NULL_TREE);
19036 /* Miscellaneous. */
19037 tree v8qi_ftype_v4hi_v4hi
19038 = build_function_type_list (V8QI_type_node,
19039 V4HI_type_node, V4HI_type_node, NULL_TREE);
19040 tree v4hi_ftype_v2si_v2si
19041 = build_function_type_list (V4HI_type_node,
19042 V2SI_type_node, V2SI_type_node, NULL_TREE);
19043 tree v4sf_ftype_v4sf_v4sf_int
19044 = build_function_type_list (V4SF_type_node,
19045 V4SF_type_node, V4SF_type_node,
19046 integer_type_node, NULL_TREE);
19047 tree v2si_ftype_v4hi_v4hi
19048 = build_function_type_list (V2SI_type_node,
19049 V4HI_type_node, V4HI_type_node, NULL_TREE);
19050 tree v4hi_ftype_v4hi_int
19051 = build_function_type_list (V4HI_type_node,
19052 V4HI_type_node, integer_type_node, NULL_TREE);
19053 tree v2si_ftype_v2si_int
19054 = build_function_type_list (V2SI_type_node,
19055 V2SI_type_node, integer_type_node, NULL_TREE);
19056 tree v1di_ftype_v1di_int
19057 = build_function_type_list (V1DI_type_node,
19058 V1DI_type_node, integer_type_node, NULL_TREE);
19060 tree void_ftype_void
19061 = build_function_type (void_type_node, void_list_node);
19062 tree void_ftype_unsigned
19063 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19064 tree void_ftype_unsigned_unsigned
19065 = build_function_type_list (void_type_node, unsigned_type_node,
19066 unsigned_type_node, NULL_TREE);
19067 tree void_ftype_pcvoid_unsigned_unsigned
19068 = build_function_type_list (void_type_node, const_ptr_type_node,
19069 unsigned_type_node, unsigned_type_node,
19071 tree unsigned_ftype_void
19072 = build_function_type (unsigned_type_node, void_list_node);
19073 tree v2si_ftype_v4sf
19074 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19075 /* Loads/stores. */
19076 tree void_ftype_v8qi_v8qi_pchar
19077 = build_function_type_list (void_type_node,
19078 V8QI_type_node, V8QI_type_node,
19079 pchar_type_node, NULL_TREE);
19080 tree v4sf_ftype_pcfloat
19081 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19082 tree v4sf_ftype_v4sf_pcv2sf
19083 = build_function_type_list (V4SF_type_node,
19084 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19085 tree void_ftype_pv2sf_v4sf
19086 = build_function_type_list (void_type_node,
19087 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19088 tree void_ftype_pfloat_v4sf
19089 = build_function_type_list (void_type_node,
19090 pfloat_type_node, V4SF_type_node, NULL_TREE);
19091 tree void_ftype_pdi_di
19092 = build_function_type_list (void_type_node,
19093 pdi_type_node, long_long_unsigned_type_node,
19095 tree void_ftype_pv2di_v2di
19096 = build_function_type_list (void_type_node,
19097 pv2di_type_node, V2DI_type_node, NULL_TREE);
19098 /* Normal vector unops. */
19099 tree v4sf_ftype_v4sf
19100 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19101 tree v16qi_ftype_v16qi
19102 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19103 tree v8hi_ftype_v8hi
19104 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19105 tree v4si_ftype_v4si
19106 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19107 tree v8qi_ftype_v8qi
19108 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19109 tree v4hi_ftype_v4hi
19110 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19112 /* Normal vector binops. */
19113 tree v4sf_ftype_v4sf_v4sf
19114 = build_function_type_list (V4SF_type_node,
19115 V4SF_type_node, V4SF_type_node, NULL_TREE);
19116 tree v8qi_ftype_v8qi_v8qi
19117 = build_function_type_list (V8QI_type_node,
19118 V8QI_type_node, V8QI_type_node, NULL_TREE);
19119 tree v4hi_ftype_v4hi_v4hi
19120 = build_function_type_list (V4HI_type_node,
19121 V4HI_type_node, V4HI_type_node, NULL_TREE);
19122 tree v2si_ftype_v2si_v2si
19123 = build_function_type_list (V2SI_type_node,
19124 V2SI_type_node, V2SI_type_node, NULL_TREE);
19125 tree v1di_ftype_v1di_v1di
19126 = build_function_type_list (V1DI_type_node,
19127 V1DI_type_node, V1DI_type_node, NULL_TREE);
19128 tree v1di_ftype_v1di_v1di_int
19129 = build_function_type_list (V1DI_type_node,
19130 V1DI_type_node, V1DI_type_node,
19131 integer_type_node, NULL_TREE);
19132 tree v2si_ftype_v2sf
19133 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19134 tree v2sf_ftype_v2si
19135 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19136 tree v2si_ftype_v2si
19137 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19138 tree v2sf_ftype_v2sf
19139 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19140 tree v2sf_ftype_v2sf_v2sf
19141 = build_function_type_list (V2SF_type_node,
19142 V2SF_type_node, V2SF_type_node, NULL_TREE);
19143 tree v2si_ftype_v2sf_v2sf
19144 = build_function_type_list (V2SI_type_node,
19145 V2SF_type_node, V2SF_type_node, NULL_TREE);
19146 tree pint_type_node = build_pointer_type (integer_type_node);
19147 tree pdouble_type_node = build_pointer_type (double_type_node);
19148 tree pcdouble_type_node = build_pointer_type (
19149 build_type_variant (double_type_node, 1, 0));
19150 tree int_ftype_v2df_v2df
19151 = build_function_type_list (integer_type_node,
19152 V2DF_type_node, V2DF_type_node, NULL_TREE);
19154 tree void_ftype_pcvoid
19155 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19156 tree v4sf_ftype_v4si
19157 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19158 tree v4si_ftype_v4sf
19159 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19160 tree v2df_ftype_v4si
19161 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19162 tree v4si_ftype_v2df
19163 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19164 tree v4si_ftype_v2df_v2df
19165 = build_function_type_list (V4SI_type_node,
19166 V2DF_type_node, V2DF_type_node, NULL_TREE);
19167 tree v2si_ftype_v2df
19168 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19169 tree v4sf_ftype_v2df
19170 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19171 tree v2df_ftype_v2si
19172 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19173 tree v2df_ftype_v4sf
19174 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19175 tree int_ftype_v2df
19176 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19177 tree int64_ftype_v2df
19178 = build_function_type_list (long_long_integer_type_node,
19179 V2DF_type_node, NULL_TREE);
19180 tree v2df_ftype_v2df_int
19181 = build_function_type_list (V2DF_type_node,
19182 V2DF_type_node, integer_type_node, NULL_TREE);
19183 tree v2df_ftype_v2df_int64
19184 = build_function_type_list (V2DF_type_node,
19185 V2DF_type_node, long_long_integer_type_node,
19187 tree v4sf_ftype_v4sf_v2df
19188 = build_function_type_list (V4SF_type_node,
19189 V4SF_type_node, V2DF_type_node, NULL_TREE);
19190 tree v2df_ftype_v2df_v4sf
19191 = build_function_type_list (V2DF_type_node,
19192 V2DF_type_node, V4SF_type_node, NULL_TREE);
19193 tree v2df_ftype_v2df_v2df_int
19194 = build_function_type_list (V2DF_type_node,
19195 V2DF_type_node, V2DF_type_node,
19198 tree v2df_ftype_v2df_pcdouble
19199 = build_function_type_list (V2DF_type_node,
19200 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19201 tree void_ftype_pdouble_v2df
19202 = build_function_type_list (void_type_node,
19203 pdouble_type_node, V2DF_type_node, NULL_TREE);
19204 tree void_ftype_pint_int
19205 = build_function_type_list (void_type_node,
19206 pint_type_node, integer_type_node, NULL_TREE);
19207 tree void_ftype_v16qi_v16qi_pchar
19208 = build_function_type_list (void_type_node,
19209 V16QI_type_node, V16QI_type_node,
19210 pchar_type_node, NULL_TREE);
19211 tree v2df_ftype_pcdouble
19212 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19213 tree v2df_ftype_v2df_v2df
19214 = build_function_type_list (V2DF_type_node,
19215 V2DF_type_node, V2DF_type_node, NULL_TREE);
19216 tree v16qi_ftype_v16qi_v16qi
19217 = build_function_type_list (V16QI_type_node,
19218 V16QI_type_node, V16QI_type_node, NULL_TREE);
19219 tree v8hi_ftype_v8hi_v8hi
19220 = build_function_type_list (V8HI_type_node,
19221 V8HI_type_node, V8HI_type_node, NULL_TREE);
19222 tree v4si_ftype_v4si_v4si
19223 = build_function_type_list (V4SI_type_node,
19224 V4SI_type_node, V4SI_type_node, NULL_TREE);
19225 tree v2di_ftype_v2di_v2di
19226 = build_function_type_list (V2DI_type_node,
19227 V2DI_type_node, V2DI_type_node, NULL_TREE);
19228 tree v2di_ftype_v2df_v2df
19229 = build_function_type_list (V2DI_type_node,
19230 V2DF_type_node, V2DF_type_node, NULL_TREE);
19231 tree v2df_ftype_v2df
19232 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19233 tree v2di_ftype_v2di_int
19234 = build_function_type_list (V2DI_type_node,
19235 V2DI_type_node, integer_type_node, NULL_TREE);
19236 tree v2di_ftype_v2di_v2di_int
19237 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19238 V2DI_type_node, integer_type_node, NULL_TREE);
19239 tree v4si_ftype_v4si_int
19240 = build_function_type_list (V4SI_type_node,
19241 V4SI_type_node, integer_type_node, NULL_TREE);
19242 tree v8hi_ftype_v8hi_int
19243 = build_function_type_list (V8HI_type_node,
19244 V8HI_type_node, integer_type_node, NULL_TREE);
19245 tree v4si_ftype_v8hi_v8hi
19246 = build_function_type_list (V4SI_type_node,
19247 V8HI_type_node, V8HI_type_node, NULL_TREE);
19248 tree v1di_ftype_v8qi_v8qi
19249 = build_function_type_list (V1DI_type_node,
19250 V8QI_type_node, V8QI_type_node, NULL_TREE);
19251 tree v1di_ftype_v2si_v2si
19252 = build_function_type_list (V1DI_type_node,
19253 V2SI_type_node, V2SI_type_node, NULL_TREE);
19254 tree v2di_ftype_v16qi_v16qi
19255 = build_function_type_list (V2DI_type_node,
19256 V16QI_type_node, V16QI_type_node, NULL_TREE);
19257 tree v2di_ftype_v4si_v4si
19258 = build_function_type_list (V2DI_type_node,
19259 V4SI_type_node, V4SI_type_node, NULL_TREE);
19260 tree int_ftype_v16qi
19261 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19262 tree v16qi_ftype_pcchar
19263 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19264 tree void_ftype_pchar_v16qi
19265 = build_function_type_list (void_type_node,
19266 pchar_type_node, V16QI_type_node, NULL_TREE);
19268 tree v2di_ftype_v2di_unsigned_unsigned
19269 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19270 unsigned_type_node, unsigned_type_node,
19272 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19273 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19274 unsigned_type_node, unsigned_type_node,
19276 tree v2di_ftype_v2di_v16qi
19277 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19279 tree v2df_ftype_v2df_v2df_v2df
19280 = build_function_type_list (V2DF_type_node,
19281 V2DF_type_node, V2DF_type_node,
19282 V2DF_type_node, NULL_TREE);
19283 tree v4sf_ftype_v4sf_v4sf_v4sf
19284 = build_function_type_list (V4SF_type_node,
19285 V4SF_type_node, V4SF_type_node,
19286 V4SF_type_node, NULL_TREE);
19287 tree v8hi_ftype_v16qi
19288 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19290 tree v4si_ftype_v16qi
19291 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19293 tree v2di_ftype_v16qi
19294 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19296 tree v4si_ftype_v8hi
19297 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19299 tree v2di_ftype_v8hi
19300 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19302 tree v2di_ftype_v4si
19303 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19305 tree v2di_ftype_pv2di
19306 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19308 tree v16qi_ftype_v16qi_v16qi_int
19309 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19310 V16QI_type_node, integer_type_node,
19312 tree v16qi_ftype_v16qi_v16qi_v16qi
19313 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19314 V16QI_type_node, V16QI_type_node,
19316 tree v8hi_ftype_v8hi_v8hi_int
19317 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19318 V8HI_type_node, integer_type_node,
19320 tree v4si_ftype_v4si_v4si_int
19321 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19322 V4SI_type_node, integer_type_node,
19324 tree int_ftype_v2di_v2di
19325 = build_function_type_list (integer_type_node,
19326 V2DI_type_node, V2DI_type_node,
19328 tree int_ftype_v16qi_int_v16qi_int_int
19329 = build_function_type_list (integer_type_node,
19336 tree v16qi_ftype_v16qi_int_v16qi_int_int
19337 = build_function_type_list (V16QI_type_node,
19344 tree int_ftype_v16qi_v16qi_int
19345 = build_function_type_list (integer_type_node,
19351 /* SSE5 instructions */
19352 tree v2di_ftype_v2di_v2di_v2di
19353 = build_function_type_list (V2DI_type_node,
19359 tree v4si_ftype_v4si_v4si_v4si
19360 = build_function_type_list (V4SI_type_node,
19366 tree v4si_ftype_v4si_v4si_v2di
19367 = build_function_type_list (V4SI_type_node,
19373 tree v8hi_ftype_v8hi_v8hi_v8hi
19374 = build_function_type_list (V8HI_type_node,
19380 tree v8hi_ftype_v8hi_v8hi_v4si
19381 = build_function_type_list (V8HI_type_node,
19387 tree v2df_ftype_v2df_v2df_v16qi
19388 = build_function_type_list (V2DF_type_node,
19394 tree v4sf_ftype_v4sf_v4sf_v16qi
19395 = build_function_type_list (V4SF_type_node,
19401 tree v2di_ftype_v2di_si
19402 = build_function_type_list (V2DI_type_node,
19407 tree v4si_ftype_v4si_si
19408 = build_function_type_list (V4SI_type_node,
19413 tree v8hi_ftype_v8hi_si
19414 = build_function_type_list (V8HI_type_node,
19419 tree v16qi_ftype_v16qi_si
19420 = build_function_type_list (V16QI_type_node,
19424 tree v4sf_ftype_v4hi
19425 = build_function_type_list (V4SF_type_node,
19429 tree v4hi_ftype_v4sf
19430 = build_function_type_list (V4HI_type_node,
19434 tree v2di_ftype_v2di
19435 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19437 tree v16qi_ftype_v8hi_v8hi
19438 = build_function_type_list (V16QI_type_node,
19439 V8HI_type_node, V8HI_type_node,
19441 tree v8hi_ftype_v4si_v4si
19442 = build_function_type_list (V8HI_type_node,
19443 V4SI_type_node, V4SI_type_node,
19445 tree v8hi_ftype_v16qi_v16qi
19446 = build_function_type_list (V8HI_type_node,
19447 V16QI_type_node, V16QI_type_node,
19449 tree v4hi_ftype_v8qi_v8qi
19450 = build_function_type_list (V4HI_type_node,
19451 V8QI_type_node, V8QI_type_node,
19453 tree unsigned_ftype_unsigned_uchar
19454 = build_function_type_list (unsigned_type_node,
19455 unsigned_type_node,
19456 unsigned_char_type_node,
19458 tree unsigned_ftype_unsigned_ushort
19459 = build_function_type_list (unsigned_type_node,
19460 unsigned_type_node,
19461 short_unsigned_type_node,
19463 tree unsigned_ftype_unsigned_unsigned
19464 = build_function_type_list (unsigned_type_node,
19465 unsigned_type_node,
19466 unsigned_type_node,
19468 tree uint64_ftype_uint64_uint64
19469 = build_function_type_list (long_long_unsigned_type_node,
19470 long_long_unsigned_type_node,
19471 long_long_unsigned_type_node,
19473 tree float_ftype_float
19474 = build_function_type_list (float_type_node,
19480 /* The __float80 type. */
19481 if (TYPE_MODE (long_double_type_node) == XFmode)
19482 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19486 /* The __float80 type. */
19487 tree float80_type_node = make_node (REAL_TYPE);
19489 TYPE_PRECISION (float80_type_node) = 80;
19490 layout_type (float80_type_node);
19491 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19497 tree float128_type_node = make_node (REAL_TYPE);
19499 TYPE_PRECISION (float128_type_node) = 128;
19500 layout_type (float128_type_node);
19501 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19504 /* TFmode support builtins. */
19505 ftype = build_function_type (float128_type_node,
19507 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19509 ftype = build_function_type_list (float128_type_node,
19510 float128_type_node,
19512 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19514 ftype = build_function_type_list (float128_type_node,
19515 float128_type_node,
19516 float128_type_node,
19518 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19521 /* Add all special builtins with variable number of operands. */
19522 for (i = 0, d = bdesc_special_args;
19523 i < ARRAY_SIZE (bdesc_special_args);
19531 switch ((enum ix86_special_builtin_type) d->flag)
19533 case VOID_FTYPE_VOID:
19534 type = void_ftype_void;
19536 case V16QI_FTYPE_PCCHAR:
19537 type = v16qi_ftype_pcchar;
19539 case V4SF_FTYPE_PCFLOAT:
19540 type = v4sf_ftype_pcfloat;
19542 case V2DI_FTYPE_PV2DI:
19543 type = v2di_ftype_pv2di;
19545 case V2DF_FTYPE_PCDOUBLE:
19546 type = v2df_ftype_pcdouble;
19548 case V4SF_FTYPE_V4SF_PCV2SF:
19549 type = v4sf_ftype_v4sf_pcv2sf;
19551 case V2DF_FTYPE_V2DF_PCDOUBLE:
19552 type = v2df_ftype_v2df_pcdouble;
19554 case VOID_FTYPE_PV2SF_V4SF:
19555 type = void_ftype_pv2sf_v4sf;
19557 case VOID_FTYPE_PV2DI_V2DI:
19558 type = void_ftype_pv2di_v2di;
19560 case VOID_FTYPE_PCHAR_V16QI:
19561 type = void_ftype_pchar_v16qi;
19563 case VOID_FTYPE_PFLOAT_V4SF:
19564 type = void_ftype_pfloat_v4sf;
19566 case VOID_FTYPE_PDOUBLE_V2DF:
19567 type = void_ftype_pdouble_v2df;
19569 case VOID_FTYPE_PDI_DI:
19570 type = void_ftype_pdi_di;
19572 case VOID_FTYPE_PINT_INT:
19573 type = void_ftype_pint_int;
19576 gcc_unreachable ();
19579 def_builtin (d->mask, d->name, type, d->code);
19582 /* Add all builtins with variable number of operands. */
19583 for (i = 0, d = bdesc_args;
19584 i < ARRAY_SIZE (bdesc_args);
19592 switch ((enum ix86_builtin_type) d->flag)
19594 case FLOAT_FTYPE_FLOAT:
19595 type = float_ftype_float;
19597 case INT_FTYPE_V2DI_V2DI_PTEST:
19598 type = int_ftype_v2di_v2di;
19600 case INT64_FTYPE_V4SF:
19601 type = int64_ftype_v4sf;
19603 case INT64_FTYPE_V2DF:
19604 type = int64_ftype_v2df;
19606 case INT_FTYPE_V16QI:
19607 type = int_ftype_v16qi;
19609 case INT_FTYPE_V8QI:
19610 type = int_ftype_v8qi;
19612 case INT_FTYPE_V4SF:
19613 type = int_ftype_v4sf;
19615 case INT_FTYPE_V2DF:
19616 type = int_ftype_v2df;
19618 case V16QI_FTYPE_V16QI:
19619 type = v16qi_ftype_v16qi;
19621 case V8HI_FTYPE_V8HI:
19622 type = v8hi_ftype_v8hi;
19624 case V8HI_FTYPE_V16QI:
19625 type = v8hi_ftype_v16qi;
19627 case V8QI_FTYPE_V8QI:
19628 type = v8qi_ftype_v8qi;
19630 case V4SI_FTYPE_V4SI:
19631 type = v4si_ftype_v4si;
19633 case V4SI_FTYPE_V16QI:
19634 type = v4si_ftype_v16qi;
19636 case V4SI_FTYPE_V8HI:
19637 type = v4si_ftype_v8hi;
19639 case V4SI_FTYPE_V4SF:
19640 type = v4si_ftype_v4sf;
19642 case V4SI_FTYPE_V2DF:
19643 type = v4si_ftype_v2df;
19645 case V4HI_FTYPE_V4HI:
19646 type = v4hi_ftype_v4hi;
19648 case V4SF_FTYPE_V4SF:
19649 case V4SF_FTYPE_V4SF_VEC_MERGE:
19650 type = v4sf_ftype_v4sf;
19652 case V4SF_FTYPE_V4SI:
19653 type = v4sf_ftype_v4si;
19655 case V4SF_FTYPE_V2DF:
19656 type = v4sf_ftype_v2df;
19658 case V2DI_FTYPE_V2DI:
19659 type = v2di_ftype_v2di;
19661 case V2DI_FTYPE_V16QI:
19662 type = v2di_ftype_v16qi;
19664 case V2DI_FTYPE_V8HI:
19665 type = v2di_ftype_v8hi;
19667 case V2DI_FTYPE_V4SI:
19668 type = v2di_ftype_v4si;
19670 case V2SI_FTYPE_V2SI:
19671 type = v2si_ftype_v2si;
19673 case V2SI_FTYPE_V4SF:
19674 type = v2si_ftype_v4sf;
19676 case V2SI_FTYPE_V2DF:
19677 type = v2si_ftype_v2df;
19679 case V2SI_FTYPE_V2SF:
19680 type = v2si_ftype_v2sf;
19682 case V2DF_FTYPE_V4SF:
19683 type = v2df_ftype_v4sf;
19685 case V2DF_FTYPE_V2DF:
19686 case V2DF_FTYPE_V2DF_VEC_MERGE:
19687 type = v2df_ftype_v2df;
19689 case V2DF_FTYPE_V2SI:
19690 type = v2df_ftype_v2si;
19692 case V2DF_FTYPE_V4SI:
19693 type = v2df_ftype_v4si;
19695 case V2SF_FTYPE_V2SF:
19696 type = v2sf_ftype_v2sf;
19698 case V2SF_FTYPE_V2SI:
19699 type = v2sf_ftype_v2si;
19701 case V16QI_FTYPE_V16QI_V16QI:
19702 type = v16qi_ftype_v16qi_v16qi;
19704 case V16QI_FTYPE_V8HI_V8HI:
19705 type = v16qi_ftype_v8hi_v8hi;
19707 case V8QI_FTYPE_V8QI_V8QI:
19708 type = v8qi_ftype_v8qi_v8qi;
19710 case V8QI_FTYPE_V4HI_V4HI:
19711 type = v8qi_ftype_v4hi_v4hi;
19713 case V8HI_FTYPE_V8HI_V8HI:
19714 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19715 type = v8hi_ftype_v8hi_v8hi;
19717 case V8HI_FTYPE_V16QI_V16QI:
19718 type = v8hi_ftype_v16qi_v16qi;
19720 case V8HI_FTYPE_V4SI_V4SI:
19721 type = v8hi_ftype_v4si_v4si;
19723 case V8HI_FTYPE_V8HI_SI_COUNT:
19724 type = v8hi_ftype_v8hi_int;
19726 case V4SI_FTYPE_V4SI_V4SI:
19727 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19728 type = v4si_ftype_v4si_v4si;
19730 case V4SI_FTYPE_V8HI_V8HI:
19731 type = v4si_ftype_v8hi_v8hi;
19733 case V4SI_FTYPE_V4SF_V4SF:
19734 type = v4si_ftype_v4sf_v4sf;
19736 case V4SI_FTYPE_V2DF_V2DF:
19737 type = v4si_ftype_v2df_v2df;
19739 case V4SI_FTYPE_V4SI_SI_COUNT:
19740 type = v4si_ftype_v4si_int;
19742 case V4HI_FTYPE_V4HI_V4HI:
19743 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19744 type = v4hi_ftype_v4hi_v4hi;
19746 case V4HI_FTYPE_V8QI_V8QI:
19747 type = v4hi_ftype_v8qi_v8qi;
19749 case V4HI_FTYPE_V2SI_V2SI:
19750 type = v4hi_ftype_v2si_v2si;
19752 case V4HI_FTYPE_V4HI_SI_COUNT:
19753 type = v4hi_ftype_v4hi_int;
19755 case V4SF_FTYPE_V4SF_V4SF:
19756 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19757 type = v4sf_ftype_v4sf_v4sf;
19759 case V4SF_FTYPE_V4SF_V2SI:
19760 type = v4sf_ftype_v4sf_v2si;
19762 case V4SF_FTYPE_V4SF_V2DF:
19763 type = v4sf_ftype_v4sf_v2df;
19765 case V4SF_FTYPE_V4SF_DI:
19766 type = v4sf_ftype_v4sf_int64;
19768 case V4SF_FTYPE_V4SF_SI:
19769 type = v4sf_ftype_v4sf_int;
19771 case V2DI_FTYPE_V2DI_V2DI:
19772 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19773 type = v2di_ftype_v2di_v2di;
19775 case V2DI_FTYPE_V16QI_V16QI:
19776 type = v2di_ftype_v16qi_v16qi;
19778 case V2DI_FTYPE_V4SI_V4SI:
19779 type = v2di_ftype_v4si_v4si;
19781 case V2DI_FTYPE_V2DI_V16QI:
19782 type = v2di_ftype_v2di_v16qi;
19784 case V2DI_FTYPE_V2DF_V2DF:
19785 type = v2di_ftype_v2df_v2df;
19787 case V2DI_FTYPE_V2DI_SI_COUNT:
19788 type = v2di_ftype_v2di_int;
19790 case V2SI_FTYPE_V2SI_V2SI:
19791 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19792 type = v2si_ftype_v2si_v2si;
19794 case V2SI_FTYPE_V4HI_V4HI:
19795 type = v2si_ftype_v4hi_v4hi;
19797 case V2SI_FTYPE_V2SF_V2SF:
19798 type = v2si_ftype_v2sf_v2sf;
19800 case V2SI_FTYPE_V2SI_SI_COUNT:
19801 type = v2si_ftype_v2si_int;
19803 case V2DF_FTYPE_V2DF_V2DF:
19804 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19805 type = v2df_ftype_v2df_v2df;
19807 case V2DF_FTYPE_V2DF_V4SF:
19808 type = v2df_ftype_v2df_v4sf;
19810 case V2DF_FTYPE_V2DF_DI:
19811 type = v2df_ftype_v2df_int64;
19813 case V2DF_FTYPE_V2DF_SI:
19814 type = v2df_ftype_v2df_int;
19816 case V2SF_FTYPE_V2SF_V2SF:
19817 type = v2sf_ftype_v2sf_v2sf;
19819 case V1DI_FTYPE_V1DI_V1DI:
19820 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19821 type = v1di_ftype_v1di_v1di;
19823 case V1DI_FTYPE_V8QI_V8QI:
19824 type = v1di_ftype_v8qi_v8qi;
19826 case V1DI_FTYPE_V2SI_V2SI:
19827 type = v1di_ftype_v2si_v2si;
19829 case V1DI_FTYPE_V1DI_SI_COUNT:
19830 type = v1di_ftype_v1di_int;
19832 case UINT64_FTYPE_UINT64_UINT64:
19833 type = uint64_ftype_uint64_uint64;
19835 case UINT_FTYPE_UINT_UINT:
19836 type = unsigned_ftype_unsigned_unsigned;
19838 case UINT_FTYPE_UINT_USHORT:
19839 type = unsigned_ftype_unsigned_ushort;
19841 case UINT_FTYPE_UINT_UCHAR:
19842 type = unsigned_ftype_unsigned_uchar;
19844 case V8HI_FTYPE_V8HI_INT:
19845 type = v8hi_ftype_v8hi_int;
19847 case V4SI_FTYPE_V4SI_INT:
19848 type = v4si_ftype_v4si_int;
19850 case V4HI_FTYPE_V4HI_INT:
19851 type = v4hi_ftype_v4hi_int;
19853 case V4SF_FTYPE_V4SF_INT:
19854 type = v4sf_ftype_v4sf_int;
19856 case V2DI_FTYPE_V2DI_INT:
19857 case V2DI2TI_FTYPE_V2DI_INT:
19858 type = v2di_ftype_v2di_int;
19860 case V2DF_FTYPE_V2DF_INT:
19861 type = v2df_ftype_v2df_int;
19863 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19864 type = v16qi_ftype_v16qi_v16qi_v16qi;
19866 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19867 type = v4sf_ftype_v4sf_v4sf_v4sf;
19869 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19870 type = v2df_ftype_v2df_v2df_v2df;
19872 case V16QI_FTYPE_V16QI_V16QI_INT:
19873 type = v16qi_ftype_v16qi_v16qi_int;
19875 case V8HI_FTYPE_V8HI_V8HI_INT:
19876 type = v8hi_ftype_v8hi_v8hi_int;
19878 case V4SI_FTYPE_V4SI_V4SI_INT:
19879 type = v4si_ftype_v4si_v4si_int;
19881 case V4SF_FTYPE_V4SF_V4SF_INT:
19882 type = v4sf_ftype_v4sf_v4sf_int;
19884 case V2DI_FTYPE_V2DI_V2DI_INT:
19885 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
19886 type = v2di_ftype_v2di_v2di_int;
19888 case V2DF_FTYPE_V2DF_V2DF_INT:
19889 type = v2df_ftype_v2df_v2df_int;
19891 case V2DI_FTYPE_V2DI_UINT_UINT:
19892 type = v2di_ftype_v2di_unsigned_unsigned;
19894 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
19895 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
19897 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
19898 type = v1di_ftype_v1di_v1di_int;
19901 gcc_unreachable ();
19904 def_builtin_const (d->mask, d->name, type, d->code);
19907 /* pcmpestr[im] insns. */
19908 for (i = 0, d = bdesc_pcmpestr;
19909 i < ARRAY_SIZE (bdesc_pcmpestr);
19912 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19913 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19915 ftype = int_ftype_v16qi_int_v16qi_int_int;
19916 def_builtin_const (d->mask, d->name, ftype, d->code);
19919 /* pcmpistr[im] insns. */
19920 for (i = 0, d = bdesc_pcmpistr;
19921 i < ARRAY_SIZE (bdesc_pcmpistr);
19924 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19925 ftype = v16qi_ftype_v16qi_v16qi_int;
19927 ftype = int_ftype_v16qi_v16qi_int;
19928 def_builtin_const (d->mask, d->name, ftype, d->code);
19931 /* comi/ucomi insns. */
19932 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19933 if (d->mask == OPTION_MASK_ISA_SSE2)
19934 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19936 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19939 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19940 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19942 /* SSE or 3DNow!A */
19943 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19946 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19948 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19949 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19952 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19953 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19958 /* Define AES built-in functions only if AES is enabled. */
19959 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
19960 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
19961 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
19962 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
19963 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
19964 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
19970 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
19971 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
19974 /* Access to the vec_init patterns. */
19975 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19976 integer_type_node, NULL_TREE);
19977 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19979 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19980 short_integer_type_node,
19981 short_integer_type_node,
19982 short_integer_type_node, NULL_TREE);
19983 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19985 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19986 char_type_node, char_type_node,
19987 char_type_node, char_type_node,
19988 char_type_node, char_type_node,
19989 char_type_node, NULL_TREE);
19990 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19992 /* Access to the vec_extract patterns. */
19993 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19994 integer_type_node, NULL_TREE);
19995 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19997 ftype = build_function_type_list (long_long_integer_type_node,
19998 V2DI_type_node, integer_type_node,
20000 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20002 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20003 integer_type_node, NULL_TREE);
20004 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20006 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20007 integer_type_node, NULL_TREE);
20008 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20010 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20011 integer_type_node, NULL_TREE);
20012 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20014 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20015 integer_type_node, NULL_TREE);
20016 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20018 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20019 integer_type_node, NULL_TREE);
20020 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20022 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20023 integer_type_node, NULL_TREE);
20024 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20026 /* Access to the vec_set patterns. */
20027 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20029 integer_type_node, NULL_TREE);
20030 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20032 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20034 integer_type_node, NULL_TREE);
20035 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20037 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20039 integer_type_node, NULL_TREE);
20040 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20042 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20044 integer_type_node, NULL_TREE);
20045 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20047 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20049 integer_type_node, NULL_TREE);
20050 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20052 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20054 integer_type_node, NULL_TREE);
20055 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20057 /* Add SSE5 multi-arg argument instructions */
20058 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20060 tree mtype = NULL_TREE;
20065 switch ((enum multi_arg_type)d->flag)
20067 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20068 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20069 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20070 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20071 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20072 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20073 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20074 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20075 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20076 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20077 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20078 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20079 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20080 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20081 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20082 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20083 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20084 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20085 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20086 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20087 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20088 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20089 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20090 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20091 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20092 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20093 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20094 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20095 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20096 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20097 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20098 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20099 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20100 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20101 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20102 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20103 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20104 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20105 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20106 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20107 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20108 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20109 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20110 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20111 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20112 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20113 case MULTI_ARG_UNKNOWN:
20115 gcc_unreachable ();
20119 def_builtin_const (d->mask, d->name, mtype, d->code);
20124 ix86_init_builtins (void)
20127 ix86_init_mmx_sse_builtins ();
20130 /* Errors in the source file can cause expand_expr to return const0_rtx
20131 where we expect a vector. To avoid crashing, use one of the vector
20132 clear instructions. */
20134 safe_vector_operand (rtx x, enum machine_mode mode)
20136 if (x == const0_rtx)
20137 x = CONST0_RTX (mode);
20141 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20144 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20147 tree arg0 = CALL_EXPR_ARG (exp, 0);
20148 tree arg1 = CALL_EXPR_ARG (exp, 1);
20149 rtx op0 = expand_normal (arg0);
20150 rtx op1 = expand_normal (arg1);
20151 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20152 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20153 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20155 if (VECTOR_MODE_P (mode0))
20156 op0 = safe_vector_operand (op0, mode0);
20157 if (VECTOR_MODE_P (mode1))
20158 op1 = safe_vector_operand (op1, mode1);
20160 if (optimize || !target
20161 || GET_MODE (target) != tmode
20162 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20163 target = gen_reg_rtx (tmode);
20165 if (GET_MODE (op1) == SImode && mode1 == TImode)
20167 rtx x = gen_reg_rtx (V4SImode);
20168 emit_insn (gen_sse2_loadd (x, op1));
20169 op1 = gen_lowpart (TImode, x);
20172 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20173 op0 = copy_to_mode_reg (mode0, op0);
20174 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20175 op1 = copy_to_mode_reg (mode1, op1);
20177 pat = GEN_FCN (icode) (target, op0, op1);
20186 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20189 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20190 enum multi_arg_type m_type,
20191 enum insn_code sub_code)
20196 bool comparison_p = false;
20198 bool last_arg_constant = false;
20199 int num_memory = 0;
20202 enum machine_mode mode;
20205 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20209 case MULTI_ARG_3_SF:
20210 case MULTI_ARG_3_DF:
20211 case MULTI_ARG_3_DI:
20212 case MULTI_ARG_3_SI:
20213 case MULTI_ARG_3_SI_DI:
20214 case MULTI_ARG_3_HI:
20215 case MULTI_ARG_3_HI_SI:
20216 case MULTI_ARG_3_QI:
20217 case MULTI_ARG_3_PERMPS:
20218 case MULTI_ARG_3_PERMPD:
20222 case MULTI_ARG_2_SF:
20223 case MULTI_ARG_2_DF:
20224 case MULTI_ARG_2_DI:
20225 case MULTI_ARG_2_SI:
20226 case MULTI_ARG_2_HI:
20227 case MULTI_ARG_2_QI:
20231 case MULTI_ARG_2_DI_IMM:
20232 case MULTI_ARG_2_SI_IMM:
20233 case MULTI_ARG_2_HI_IMM:
20234 case MULTI_ARG_2_QI_IMM:
20236 last_arg_constant = true;
20239 case MULTI_ARG_1_SF:
20240 case MULTI_ARG_1_DF:
20241 case MULTI_ARG_1_DI:
20242 case MULTI_ARG_1_SI:
20243 case MULTI_ARG_1_HI:
20244 case MULTI_ARG_1_QI:
20245 case MULTI_ARG_1_SI_DI:
20246 case MULTI_ARG_1_HI_DI:
20247 case MULTI_ARG_1_HI_SI:
20248 case MULTI_ARG_1_QI_DI:
20249 case MULTI_ARG_1_QI_SI:
20250 case MULTI_ARG_1_QI_HI:
20251 case MULTI_ARG_1_PH2PS:
20252 case MULTI_ARG_1_PS2PH:
20256 case MULTI_ARG_2_SF_CMP:
20257 case MULTI_ARG_2_DF_CMP:
20258 case MULTI_ARG_2_DI_CMP:
20259 case MULTI_ARG_2_SI_CMP:
20260 case MULTI_ARG_2_HI_CMP:
20261 case MULTI_ARG_2_QI_CMP:
20263 comparison_p = true;
20266 case MULTI_ARG_2_SF_TF:
20267 case MULTI_ARG_2_DF_TF:
20268 case MULTI_ARG_2_DI_TF:
20269 case MULTI_ARG_2_SI_TF:
20270 case MULTI_ARG_2_HI_TF:
20271 case MULTI_ARG_2_QI_TF:
20276 case MULTI_ARG_UNKNOWN:
20278 gcc_unreachable ();
20281 if (optimize || !target
20282 || GET_MODE (target) != tmode
20283 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20284 target = gen_reg_rtx (tmode);
20286 gcc_assert (nargs <= 4);
20288 for (i = 0; i < nargs; i++)
20290 tree arg = CALL_EXPR_ARG (exp, i);
20291 rtx op = expand_normal (arg);
20292 int adjust = (comparison_p) ? 1 : 0;
20293 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20295 if (last_arg_constant && i == nargs-1)
20297 if (GET_CODE (op) != CONST_INT)
20299 error ("last argument must be an immediate");
20300 return gen_reg_rtx (tmode);
20305 if (VECTOR_MODE_P (mode))
20306 op = safe_vector_operand (op, mode);
20308 /* If we aren't optimizing, only allow one memory operand to be
20310 if (memory_operand (op, mode))
20313 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20316 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20318 op = force_reg (mode, op);
20322 args[i].mode = mode;
20328 pat = GEN_FCN (icode) (target, args[0].op);
20333 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20334 GEN_INT ((int)sub_code));
20335 else if (! comparison_p)
20336 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20339 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20343 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20348 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20352 gcc_unreachable ();
20362 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20363 insns with vec_merge. */
20366 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20370 tree arg0 = CALL_EXPR_ARG (exp, 0);
20371 rtx op1, op0 = expand_normal (arg0);
20372 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20373 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20375 if (optimize || !target
20376 || GET_MODE (target) != tmode
20377 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20378 target = gen_reg_rtx (tmode);
20380 if (VECTOR_MODE_P (mode0))
20381 op0 = safe_vector_operand (op0, mode0);
20383 if ((optimize && !register_operand (op0, mode0))
20384 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20385 op0 = copy_to_mode_reg (mode0, op0);
20388 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20389 op1 = copy_to_mode_reg (mode0, op1);
20391 pat = GEN_FCN (icode) (target, op0, op1);
20398 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20401 ix86_expand_sse_compare (const struct builtin_description *d,
20402 tree exp, rtx target, bool swap)
20405 tree arg0 = CALL_EXPR_ARG (exp, 0);
20406 tree arg1 = CALL_EXPR_ARG (exp, 1);
20407 rtx op0 = expand_normal (arg0);
20408 rtx op1 = expand_normal (arg1);
20410 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20411 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20412 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20413 enum rtx_code comparison = d->comparison;
20415 if (VECTOR_MODE_P (mode0))
20416 op0 = safe_vector_operand (op0, mode0);
20417 if (VECTOR_MODE_P (mode1))
20418 op1 = safe_vector_operand (op1, mode1);
20420 /* Swap operands if we have a comparison that isn't available in
20424 rtx tmp = gen_reg_rtx (mode1);
20425 emit_move_insn (tmp, op1);
20430 if (optimize || !target
20431 || GET_MODE (target) != tmode
20432 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20433 target = gen_reg_rtx (tmode);
20435 if ((optimize && !register_operand (op0, mode0))
20436 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20437 op0 = copy_to_mode_reg (mode0, op0);
20438 if ((optimize && !register_operand (op1, mode1))
20439 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20440 op1 = copy_to_mode_reg (mode1, op1);
20442 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20443 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20450 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20453 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20457 tree arg0 = CALL_EXPR_ARG (exp, 0);
20458 tree arg1 = CALL_EXPR_ARG (exp, 1);
20459 rtx op0 = expand_normal (arg0);
20460 rtx op1 = expand_normal (arg1);
20461 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20462 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20463 enum rtx_code comparison = d->comparison;
20465 if (VECTOR_MODE_P (mode0))
20466 op0 = safe_vector_operand (op0, mode0);
20467 if (VECTOR_MODE_P (mode1))
20468 op1 = safe_vector_operand (op1, mode1);
20470 /* Swap operands if we have a comparison that isn't available in
20472 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20479 target = gen_reg_rtx (SImode);
20480 emit_move_insn (target, const0_rtx);
20481 target = gen_rtx_SUBREG (QImode, target, 0);
20483 if ((optimize && !register_operand (op0, mode0))
20484 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20485 op0 = copy_to_mode_reg (mode0, op0);
20486 if ((optimize && !register_operand (op1, mode1))
20487 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20488 op1 = copy_to_mode_reg (mode1, op1);
20490 pat = GEN_FCN (d->icode) (op0, op1);
20494 emit_insn (gen_rtx_SET (VOIDmode,
20495 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20496 gen_rtx_fmt_ee (comparison, QImode,
20500 return SUBREG_REG (target);
20503 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20506 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20510 tree arg0 = CALL_EXPR_ARG (exp, 0);
20511 tree arg1 = CALL_EXPR_ARG (exp, 1);
20512 rtx op0 = expand_normal (arg0);
20513 rtx op1 = expand_normal (arg1);
20514 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20515 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20516 enum rtx_code comparison = d->comparison;
20518 if (VECTOR_MODE_P (mode0))
20519 op0 = safe_vector_operand (op0, mode0);
20520 if (VECTOR_MODE_P (mode1))
20521 op1 = safe_vector_operand (op1, mode1);
20523 target = gen_reg_rtx (SImode);
20524 emit_move_insn (target, const0_rtx);
20525 target = gen_rtx_SUBREG (QImode, target, 0);
20527 if ((optimize && !register_operand (op0, mode0))
20528 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20529 op0 = copy_to_mode_reg (mode0, op0);
20530 if ((optimize && !register_operand (op1, mode1))
20531 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20532 op1 = copy_to_mode_reg (mode1, op1);
20534 pat = GEN_FCN (d->icode) (op0, op1);
20538 emit_insn (gen_rtx_SET (VOIDmode,
20539 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20540 gen_rtx_fmt_ee (comparison, QImode,
20544 return SUBREG_REG (target);
20547 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20550 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20551 tree exp, rtx target)
20554 tree arg0 = CALL_EXPR_ARG (exp, 0);
20555 tree arg1 = CALL_EXPR_ARG (exp, 1);
20556 tree arg2 = CALL_EXPR_ARG (exp, 2);
20557 tree arg3 = CALL_EXPR_ARG (exp, 3);
20558 tree arg4 = CALL_EXPR_ARG (exp, 4);
20559 rtx scratch0, scratch1;
20560 rtx op0 = expand_normal (arg0);
20561 rtx op1 = expand_normal (arg1);
20562 rtx op2 = expand_normal (arg2);
20563 rtx op3 = expand_normal (arg3);
20564 rtx op4 = expand_normal (arg4);
20565 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20567 tmode0 = insn_data[d->icode].operand[0].mode;
20568 tmode1 = insn_data[d->icode].operand[1].mode;
20569 modev2 = insn_data[d->icode].operand[2].mode;
20570 modei3 = insn_data[d->icode].operand[3].mode;
20571 modev4 = insn_data[d->icode].operand[4].mode;
20572 modei5 = insn_data[d->icode].operand[5].mode;
20573 modeimm = insn_data[d->icode].operand[6].mode;
20575 if (VECTOR_MODE_P (modev2))
20576 op0 = safe_vector_operand (op0, modev2);
20577 if (VECTOR_MODE_P (modev4))
20578 op2 = safe_vector_operand (op2, modev4);
20580 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20581 op0 = copy_to_mode_reg (modev2, op0);
20582 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20583 op1 = copy_to_mode_reg (modei3, op1);
20584 if ((optimize && !register_operand (op2, modev4))
20585 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20586 op2 = copy_to_mode_reg (modev4, op2);
20587 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20588 op3 = copy_to_mode_reg (modei5, op3);
20590 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20592 error ("the fifth argument must be a 8-bit immediate");
20596 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20598 if (optimize || !target
20599 || GET_MODE (target) != tmode0
20600 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20601 target = gen_reg_rtx (tmode0);
20603 scratch1 = gen_reg_rtx (tmode1);
20605 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20607 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20609 if (optimize || !target
20610 || GET_MODE (target) != tmode1
20611 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20612 target = gen_reg_rtx (tmode1);
20614 scratch0 = gen_reg_rtx (tmode0);
20616 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20620 gcc_assert (d->flag);
20622 scratch0 = gen_reg_rtx (tmode0);
20623 scratch1 = gen_reg_rtx (tmode1);
20625 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20635 target = gen_reg_rtx (SImode);
20636 emit_move_insn (target, const0_rtx);
20637 target = gen_rtx_SUBREG (QImode, target, 0);
20640 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20641 gen_rtx_fmt_ee (EQ, QImode,
20642 gen_rtx_REG ((enum machine_mode) d->flag,
20645 return SUBREG_REG (target);
20652 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20655 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20656 tree exp, rtx target)
20659 tree arg0 = CALL_EXPR_ARG (exp, 0);
20660 tree arg1 = CALL_EXPR_ARG (exp, 1);
20661 tree arg2 = CALL_EXPR_ARG (exp, 2);
20662 rtx scratch0, scratch1;
20663 rtx op0 = expand_normal (arg0);
20664 rtx op1 = expand_normal (arg1);
20665 rtx op2 = expand_normal (arg2);
20666 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20668 tmode0 = insn_data[d->icode].operand[0].mode;
20669 tmode1 = insn_data[d->icode].operand[1].mode;
20670 modev2 = insn_data[d->icode].operand[2].mode;
20671 modev3 = insn_data[d->icode].operand[3].mode;
20672 modeimm = insn_data[d->icode].operand[4].mode;
20674 if (VECTOR_MODE_P (modev2))
20675 op0 = safe_vector_operand (op0, modev2);
20676 if (VECTOR_MODE_P (modev3))
20677 op1 = safe_vector_operand (op1, modev3);
20679 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20680 op0 = copy_to_mode_reg (modev2, op0);
20681 if ((optimize && !register_operand (op1, modev3))
20682 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20683 op1 = copy_to_mode_reg (modev3, op1);
20685 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20687 error ("the third argument must be a 8-bit immediate");
20691 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20693 if (optimize || !target
20694 || GET_MODE (target) != tmode0
20695 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20696 target = gen_reg_rtx (tmode0);
20698 scratch1 = gen_reg_rtx (tmode1);
20700 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20702 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20704 if (optimize || !target
20705 || GET_MODE (target) != tmode1
20706 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20707 target = gen_reg_rtx (tmode1);
20709 scratch0 = gen_reg_rtx (tmode0);
20711 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20715 gcc_assert (d->flag);
20717 scratch0 = gen_reg_rtx (tmode0);
20718 scratch1 = gen_reg_rtx (tmode1);
20720 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20730 target = gen_reg_rtx (SImode);
20731 emit_move_insn (target, const0_rtx);
20732 target = gen_rtx_SUBREG (QImode, target, 0);
20735 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20736 gen_rtx_fmt_ee (EQ, QImode,
20737 gen_rtx_REG ((enum machine_mode) d->flag,
20740 return SUBREG_REG (target);
20746 /* Subroutine of ix86_expand_builtin to take care of insns with
20747 variable number of operands. */
20750 ix86_expand_args_builtin (const struct builtin_description *d,
20751 tree exp, rtx target)
20753 rtx pat, real_target;
20754 unsigned int i, nargs;
20755 unsigned int nargs_constant = 0;
20756 int num_memory = 0;
20760 enum machine_mode mode;
20762 bool last_arg_count = false;
20763 enum insn_code icode = d->icode;
20764 const struct insn_data *insn_p = &insn_data[icode];
20765 enum machine_mode tmode = insn_p->operand[0].mode;
20766 enum machine_mode rmode = VOIDmode;
20768 enum rtx_code comparison = d->comparison;
20770 switch ((enum ix86_builtin_type) d->flag)
20772 case INT_FTYPE_V2DI_V2DI_PTEST:
20773 return ix86_expand_sse_ptest (d, exp, target);
20774 case FLOAT128_FTYPE_FLOAT128:
20775 case FLOAT_FTYPE_FLOAT:
20776 case INT64_FTYPE_V4SF:
20777 case INT64_FTYPE_V2DF:
20778 case INT_FTYPE_V16QI:
20779 case INT_FTYPE_V8QI:
20780 case INT_FTYPE_V4SF:
20781 case INT_FTYPE_V2DF:
20782 case V16QI_FTYPE_V16QI:
20783 case V8HI_FTYPE_V8HI:
20784 case V8HI_FTYPE_V16QI:
20785 case V8QI_FTYPE_V8QI:
20786 case V4SI_FTYPE_V4SI:
20787 case V4SI_FTYPE_V16QI:
20788 case V4SI_FTYPE_V4SF:
20789 case V4SI_FTYPE_V8HI:
20790 case V4SI_FTYPE_V2DF:
20791 case V4HI_FTYPE_V4HI:
20792 case V4SF_FTYPE_V4SF:
20793 case V4SF_FTYPE_V4SI:
20794 case V4SF_FTYPE_V2DF:
20795 case V2DI_FTYPE_V2DI:
20796 case V2DI_FTYPE_V16QI:
20797 case V2DI_FTYPE_V8HI:
20798 case V2DI_FTYPE_V4SI:
20799 case V2DF_FTYPE_V2DF:
20800 case V2DF_FTYPE_V4SI:
20801 case V2DF_FTYPE_V4SF:
20802 case V2DF_FTYPE_V2SI:
20803 case V2SI_FTYPE_V2SI:
20804 case V2SI_FTYPE_V4SF:
20805 case V2SI_FTYPE_V2SF:
20806 case V2SI_FTYPE_V2DF:
20807 case V2SF_FTYPE_V2SF:
20808 case V2SF_FTYPE_V2SI:
20811 case V4SF_FTYPE_V4SF_VEC_MERGE:
20812 case V2DF_FTYPE_V2DF_VEC_MERGE:
20813 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20814 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20815 case V16QI_FTYPE_V16QI_V16QI:
20816 case V16QI_FTYPE_V8HI_V8HI:
20817 case V8QI_FTYPE_V8QI_V8QI:
20818 case V8QI_FTYPE_V4HI_V4HI:
20819 case V8HI_FTYPE_V8HI_V8HI:
20820 case V8HI_FTYPE_V16QI_V16QI:
20821 case V8HI_FTYPE_V4SI_V4SI:
20822 case V4SI_FTYPE_V4SI_V4SI:
20823 case V4SI_FTYPE_V8HI_V8HI:
20824 case V4SI_FTYPE_V4SF_V4SF:
20825 case V4SI_FTYPE_V2DF_V2DF:
20826 case V4HI_FTYPE_V4HI_V4HI:
20827 case V4HI_FTYPE_V8QI_V8QI:
20828 case V4HI_FTYPE_V2SI_V2SI:
20829 case V4SF_FTYPE_V4SF_V4SF:
20830 case V4SF_FTYPE_V4SF_V2SI:
20831 case V4SF_FTYPE_V4SF_V2DF:
20832 case V4SF_FTYPE_V4SF_DI:
20833 case V4SF_FTYPE_V4SF_SI:
20834 case V2DI_FTYPE_V2DI_V2DI:
20835 case V2DI_FTYPE_V16QI_V16QI:
20836 case V2DI_FTYPE_V4SI_V4SI:
20837 case V2DI_FTYPE_V2DI_V16QI:
20838 case V2DI_FTYPE_V2DF_V2DF:
20839 case V2SI_FTYPE_V2SI_V2SI:
20840 case V2SI_FTYPE_V4HI_V4HI:
20841 case V2SI_FTYPE_V2SF_V2SF:
20842 case V2DF_FTYPE_V2DF_V2DF:
20843 case V2DF_FTYPE_V2DF_V4SF:
20844 case V2DF_FTYPE_V2DF_DI:
20845 case V2DF_FTYPE_V2DF_SI:
20846 case V2SF_FTYPE_V2SF_V2SF:
20847 case V1DI_FTYPE_V1DI_V1DI:
20848 case V1DI_FTYPE_V8QI_V8QI:
20849 case V1DI_FTYPE_V2SI_V2SI:
20850 if (comparison == UNKNOWN)
20851 return ix86_expand_binop_builtin (icode, exp, target);
20854 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20855 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20856 gcc_assert (comparison != UNKNOWN);
20860 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20861 case V8HI_FTYPE_V8HI_SI_COUNT:
20862 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20863 case V4SI_FTYPE_V4SI_SI_COUNT:
20864 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20865 case V4HI_FTYPE_V4HI_SI_COUNT:
20866 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20867 case V2DI_FTYPE_V2DI_SI_COUNT:
20868 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20869 case V2SI_FTYPE_V2SI_SI_COUNT:
20870 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20871 case V1DI_FTYPE_V1DI_SI_COUNT:
20873 last_arg_count = true;
20875 case UINT64_FTYPE_UINT64_UINT64:
20876 case UINT_FTYPE_UINT_UINT:
20877 case UINT_FTYPE_UINT_USHORT:
20878 case UINT_FTYPE_UINT_UCHAR:
20881 case V2DI2TI_FTYPE_V2DI_INT:
20884 nargs_constant = 1;
20886 case V8HI_FTYPE_V8HI_INT:
20887 case V4SI_FTYPE_V4SI_INT:
20888 case V4HI_FTYPE_V4HI_INT:
20889 case V4SF_FTYPE_V4SF_INT:
20890 case V2DI_FTYPE_V2DI_INT:
20891 case V2DF_FTYPE_V2DF_INT:
20893 nargs_constant = 1;
20895 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20896 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20897 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20900 case V16QI_FTYPE_V16QI_V16QI_INT:
20901 case V8HI_FTYPE_V8HI_V8HI_INT:
20902 case V4SI_FTYPE_V4SI_V4SI_INT:
20903 case V4SF_FTYPE_V4SF_V4SF_INT:
20904 case V2DI_FTYPE_V2DI_V2DI_INT:
20905 case V2DF_FTYPE_V2DF_V2DF_INT:
20907 nargs_constant = 1;
20909 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20912 nargs_constant = 1;
20914 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20917 nargs_constant = 1;
20919 case V2DI_FTYPE_V2DI_UINT_UINT:
20921 nargs_constant = 2;
20923 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20925 nargs_constant = 2;
20928 gcc_unreachable ();
20931 gcc_assert (nargs <= ARRAY_SIZE (args));
20933 if (comparison != UNKNOWN)
20935 gcc_assert (nargs == 2);
20936 return ix86_expand_sse_compare (d, exp, target, swap);
20939 if (rmode == VOIDmode || rmode == tmode)
20943 || GET_MODE (target) != tmode
20944 || ! (*insn_p->operand[0].predicate) (target, tmode))
20945 target = gen_reg_rtx (tmode);
20946 real_target = target;
20950 target = gen_reg_rtx (rmode);
20951 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
20954 for (i = 0; i < nargs; i++)
20956 tree arg = CALL_EXPR_ARG (exp, i);
20957 rtx op = expand_normal (arg);
20958 enum machine_mode mode = insn_p->operand[i + 1].mode;
20959 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
20961 if (last_arg_count && (i + 1) == nargs)
20963 /* SIMD shift insns take either an 8-bit immediate or
20964 register as count. But builtin functions take int as
20965 count. If count doesn't match, we put it in register. */
20968 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
20969 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
20970 op = copy_to_reg (op);
20973 else if ((nargs - i) <= nargs_constant)
20978 case CODE_FOR_sse4_1_roundpd:
20979 case CODE_FOR_sse4_1_roundps:
20980 case CODE_FOR_sse4_1_roundsd:
20981 case CODE_FOR_sse4_1_roundss:
20982 case CODE_FOR_sse4_1_blendps:
20983 error ("the last argument must be a 4-bit immediate");
20986 case CODE_FOR_sse4_1_blendpd:
20987 error ("the last argument must be a 2-bit immediate");
20991 switch (nargs_constant)
20994 if ((nargs - i) == nargs_constant)
20996 error ("the next to last argument must be an 8-bit immediate");
21000 error ("the last argument must be an 8-bit immediate");
21003 gcc_unreachable ();
21010 if (VECTOR_MODE_P (mode))
21011 op = safe_vector_operand (op, mode);
21013 /* If we aren't optimizing, only allow one memory operand to
21015 if (memory_operand (op, mode))
21018 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21020 if (optimize || !match || num_memory > 1)
21021 op = copy_to_mode_reg (mode, op);
21025 op = copy_to_reg (op);
21026 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21031 args[i].mode = mode;
21037 pat = GEN_FCN (icode) (real_target, args[0].op);
21040 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21043 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21047 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21048 args[2].op, args[3].op);
21051 gcc_unreachable ();
21061 /* Subroutine of ix86_expand_builtin to take care of special insns
21062 with variable number of operands. */
21065 ix86_expand_special_args_builtin (const struct builtin_description *d,
21066 tree exp, rtx target)
21070 unsigned int i, nargs, arg_adjust, memory;
21074 enum machine_mode mode;
21076 enum insn_code icode = d->icode;
21077 bool last_arg_constant = false;
21078 const struct insn_data *insn_p = &insn_data[icode];
21079 enum machine_mode tmode = insn_p->operand[0].mode;
21080 enum { load, store } class;
21082 switch ((enum ix86_special_builtin_type) d->flag)
21084 case VOID_FTYPE_VOID:
21085 emit_insn (GEN_FCN (icode) (target));
21087 case V2DI_FTYPE_PV2DI:
21088 case V16QI_FTYPE_PCCHAR:
21089 case V4SF_FTYPE_PCFLOAT:
21090 case V2DF_FTYPE_PCDOUBLE:
21095 case VOID_FTYPE_PV2SF_V4SF:
21096 case VOID_FTYPE_PV2DI_V2DI:
21097 case VOID_FTYPE_PCHAR_V16QI:
21098 case VOID_FTYPE_PFLOAT_V4SF:
21099 case VOID_FTYPE_PDOUBLE_V2DF:
21100 case VOID_FTYPE_PDI_DI:
21101 case VOID_FTYPE_PINT_INT:
21104 /* Reserve memory operand for target. */
21105 memory = ARRAY_SIZE (args);
21107 case V4SF_FTYPE_V4SF_PCV2SF:
21108 case V2DF_FTYPE_V2DF_PCDOUBLE:
21114 gcc_unreachable ();
21117 gcc_assert (nargs <= ARRAY_SIZE (args));
21119 if (class == store)
21121 arg = CALL_EXPR_ARG (exp, 0);
21122 op = expand_normal (arg);
21123 gcc_assert (target == 0);
21124 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21132 || GET_MODE (target) != tmode
21133 || ! (*insn_p->operand[0].predicate) (target, tmode))
21134 target = gen_reg_rtx (tmode);
21137 for (i = 0; i < nargs; i++)
21139 enum machine_mode mode = insn_p->operand[i + 1].mode;
21142 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21143 op = expand_normal (arg);
21144 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21146 if (last_arg_constant && (i + 1) == nargs)
21152 error ("the last argument must be an 8-bit immediate");
21160 /* This must be the memory operand. */
21161 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21162 gcc_assert (GET_MODE (op) == mode
21163 || GET_MODE (op) == VOIDmode);
21167 /* This must be register. */
21168 if (VECTOR_MODE_P (mode))
21169 op = safe_vector_operand (op, mode);
21171 gcc_assert (GET_MODE (op) == mode
21172 || GET_MODE (op) == VOIDmode);
21173 op = copy_to_mode_reg (mode, op);
21178 args[i].mode = mode;
21184 pat = GEN_FCN (icode) (target, args[0].op);
21187 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21190 gcc_unreachable ();
21196 return class == store ? 0 : target;
21199 /* Return the integer constant in ARG. Constrain it to be in the range
21200 of the subparts of VEC_TYPE; issue an error if not. */
21203 get_element_number (tree vec_type, tree arg)
21205 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21207 if (!host_integerp (arg, 1)
21208 || (elt = tree_low_cst (arg, 1), elt > max))
21210 error ("selector must be an integer constant in the range 0..%wi", max);
21217 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21218 ix86_expand_vector_init. We DO have language-level syntax for this, in
21219 the form of (type){ init-list }. Except that since we can't place emms
21220 instructions from inside the compiler, we can't allow the use of MMX
21221 registers unless the user explicitly asks for it. So we do *not* define
21222 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21223 we have builtins invoked by mmintrin.h that gives us license to emit
21224 these sorts of instructions. */
21227 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21229 enum machine_mode tmode = TYPE_MODE (type);
21230 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21231 int i, n_elt = GET_MODE_NUNITS (tmode);
21232 rtvec v = rtvec_alloc (n_elt);
21234 gcc_assert (VECTOR_MODE_P (tmode));
21235 gcc_assert (call_expr_nargs (exp) == n_elt);
21237 for (i = 0; i < n_elt; ++i)
21239 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21240 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21243 if (!target || !register_operand (target, tmode))
21244 target = gen_reg_rtx (tmode);
21246 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21250 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21251 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21252 had a language-level syntax for referencing vector elements. */
21255 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21257 enum machine_mode tmode, mode0;
21262 arg0 = CALL_EXPR_ARG (exp, 0);
21263 arg1 = CALL_EXPR_ARG (exp, 1);
21265 op0 = expand_normal (arg0);
21266 elt = get_element_number (TREE_TYPE (arg0), arg1);
21268 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21269 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21270 gcc_assert (VECTOR_MODE_P (mode0));
21272 op0 = force_reg (mode0, op0);
21274 if (optimize || !target || !register_operand (target, tmode))
21275 target = gen_reg_rtx (tmode);
21277 ix86_expand_vector_extract (true, target, op0, elt);
21282 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21283 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21284 a language-level syntax for referencing vector elements. */
21287 ix86_expand_vec_set_builtin (tree exp)
21289 enum machine_mode tmode, mode1;
21290 tree arg0, arg1, arg2;
21292 rtx op0, op1, target;
21294 arg0 = CALL_EXPR_ARG (exp, 0);
21295 arg1 = CALL_EXPR_ARG (exp, 1);
21296 arg2 = CALL_EXPR_ARG (exp, 2);
21298 tmode = TYPE_MODE (TREE_TYPE (arg0));
21299 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21300 gcc_assert (VECTOR_MODE_P (tmode));
21302 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21303 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21304 elt = get_element_number (TREE_TYPE (arg0), arg2);
21306 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21307 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21309 op0 = force_reg (tmode, op0);
21310 op1 = force_reg (mode1, op1);
21312 /* OP0 is the source of these builtin functions and shouldn't be
21313 modified. Create a copy, use it and return it as target. */
21314 target = gen_reg_rtx (tmode);
21315 emit_move_insn (target, op0);
21316 ix86_expand_vector_set (true, target, op1, elt);
21321 /* Expand an expression EXP that calls a built-in function,
21322 with result going to TARGET if that's convenient
21323 (and in mode MODE if that's convenient).
21324 SUBTARGET may be used as the target for computing one of EXP's operands.
21325 IGNORE is nonzero if the value is to be ignored. */
21328 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21329 enum machine_mode mode ATTRIBUTE_UNUSED,
21330 int ignore ATTRIBUTE_UNUSED)
21332 const struct builtin_description *d;
21334 enum insn_code icode;
21335 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21336 tree arg0, arg1, arg2;
21337 rtx op0, op1, op2, pat;
21338 enum machine_mode mode0, mode1, mode2;
21339 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21343 case IX86_BUILTIN_MASKMOVQ:
21344 case IX86_BUILTIN_MASKMOVDQU:
21345 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21346 ? CODE_FOR_mmx_maskmovq
21347 : CODE_FOR_sse2_maskmovdqu);
21348 /* Note the arg order is different from the operand order. */
21349 arg1 = CALL_EXPR_ARG (exp, 0);
21350 arg2 = CALL_EXPR_ARG (exp, 1);
21351 arg0 = CALL_EXPR_ARG (exp, 2);
21352 op0 = expand_normal (arg0);
21353 op1 = expand_normal (arg1);
21354 op2 = expand_normal (arg2);
21355 mode0 = insn_data[icode].operand[0].mode;
21356 mode1 = insn_data[icode].operand[1].mode;
21357 mode2 = insn_data[icode].operand[2].mode;
21359 op0 = force_reg (Pmode, op0);
21360 op0 = gen_rtx_MEM (mode1, op0);
21362 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21363 op0 = copy_to_mode_reg (mode0, op0);
21364 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21365 op1 = copy_to_mode_reg (mode1, op1);
21366 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21367 op2 = copy_to_mode_reg (mode2, op2);
21368 pat = GEN_FCN (icode) (op0, op1, op2);
21374 case IX86_BUILTIN_LDMXCSR:
21375 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21376 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21377 emit_move_insn (target, op0);
21378 emit_insn (gen_sse_ldmxcsr (target));
21381 case IX86_BUILTIN_STMXCSR:
21382 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21383 emit_insn (gen_sse_stmxcsr (target));
21384 return copy_to_mode_reg (SImode, target);
21386 case IX86_BUILTIN_CLFLUSH:
21387 arg0 = CALL_EXPR_ARG (exp, 0);
21388 op0 = expand_normal (arg0);
21389 icode = CODE_FOR_sse2_clflush;
21390 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21391 op0 = copy_to_mode_reg (Pmode, op0);
21393 emit_insn (gen_sse2_clflush (op0));
21396 case IX86_BUILTIN_MONITOR:
21397 arg0 = CALL_EXPR_ARG (exp, 0);
21398 arg1 = CALL_EXPR_ARG (exp, 1);
21399 arg2 = CALL_EXPR_ARG (exp, 2);
21400 op0 = expand_normal (arg0);
21401 op1 = expand_normal (arg1);
21402 op2 = expand_normal (arg2);
21404 op0 = copy_to_mode_reg (Pmode, op0);
21406 op1 = copy_to_mode_reg (SImode, op1);
21408 op2 = copy_to_mode_reg (SImode, op2);
21410 emit_insn (gen_sse3_monitor (op0, op1, op2));
21412 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21415 case IX86_BUILTIN_MWAIT:
21416 arg0 = CALL_EXPR_ARG (exp, 0);
21417 arg1 = CALL_EXPR_ARG (exp, 1);
21418 op0 = expand_normal (arg0);
21419 op1 = expand_normal (arg1);
21421 op0 = copy_to_mode_reg (SImode, op0);
21423 op1 = copy_to_mode_reg (SImode, op1);
21424 emit_insn (gen_sse3_mwait (op0, op1));
21427 case IX86_BUILTIN_VEC_INIT_V2SI:
21428 case IX86_BUILTIN_VEC_INIT_V4HI:
21429 case IX86_BUILTIN_VEC_INIT_V8QI:
21430 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21432 case IX86_BUILTIN_VEC_EXT_V2DF:
21433 case IX86_BUILTIN_VEC_EXT_V2DI:
21434 case IX86_BUILTIN_VEC_EXT_V4SF:
21435 case IX86_BUILTIN_VEC_EXT_V4SI:
21436 case IX86_BUILTIN_VEC_EXT_V8HI:
21437 case IX86_BUILTIN_VEC_EXT_V2SI:
21438 case IX86_BUILTIN_VEC_EXT_V4HI:
21439 case IX86_BUILTIN_VEC_EXT_V16QI:
21440 return ix86_expand_vec_ext_builtin (exp, target);
21442 case IX86_BUILTIN_VEC_SET_V2DI:
21443 case IX86_BUILTIN_VEC_SET_V4SF:
21444 case IX86_BUILTIN_VEC_SET_V4SI:
21445 case IX86_BUILTIN_VEC_SET_V8HI:
21446 case IX86_BUILTIN_VEC_SET_V4HI:
21447 case IX86_BUILTIN_VEC_SET_V16QI:
21448 return ix86_expand_vec_set_builtin (exp);
21450 case IX86_BUILTIN_INFQ:
21452 REAL_VALUE_TYPE inf;
21456 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21458 tmp = validize_mem (force_const_mem (mode, tmp));
21461 target = gen_reg_rtx (mode);
21463 emit_move_insn (target, tmp);
21471 for (i = 0, d = bdesc_special_args;
21472 i < ARRAY_SIZE (bdesc_special_args);
21474 if (d->code == fcode)
21475 return ix86_expand_special_args_builtin (d, exp, target);
21477 for (i = 0, d = bdesc_args;
21478 i < ARRAY_SIZE (bdesc_args);
21480 if (d->code == fcode)
21481 return ix86_expand_args_builtin (d, exp, target);
21483 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21484 if (d->code == fcode)
21485 return ix86_expand_sse_comi (d, exp, target);
21487 for (i = 0, d = bdesc_pcmpestr;
21488 i < ARRAY_SIZE (bdesc_pcmpestr);
21490 if (d->code == fcode)
21491 return ix86_expand_sse_pcmpestr (d, exp, target);
21493 for (i = 0, d = bdesc_pcmpistr;
21494 i < ARRAY_SIZE (bdesc_pcmpistr);
21496 if (d->code == fcode)
21497 return ix86_expand_sse_pcmpistr (d, exp, target);
21499 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21500 if (d->code == fcode)
21501 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21502 (enum multi_arg_type)d->flag,
21505 gcc_unreachable ();
21508 /* Returns a function decl for a vectorized version of the builtin function
21509 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21510 if it is not available. */
21513 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21516 enum machine_mode in_mode, out_mode;
21519 if (TREE_CODE (type_out) != VECTOR_TYPE
21520 || TREE_CODE (type_in) != VECTOR_TYPE)
21523 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21524 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21525 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21526 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21530 case BUILT_IN_SQRT:
21531 if (out_mode == DFmode && out_n == 2
21532 && in_mode == DFmode && in_n == 2)
21533 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21536 case BUILT_IN_SQRTF:
21537 if (out_mode == SFmode && out_n == 4
21538 && in_mode == SFmode && in_n == 4)
21539 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21542 case BUILT_IN_LRINT:
21543 if (out_mode == SImode && out_n == 4
21544 && in_mode == DFmode && in_n == 2)
21545 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21548 case BUILT_IN_LRINTF:
21549 if (out_mode == SImode && out_n == 4
21550 && in_mode == SFmode && in_n == 4)
21551 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21558 /* Dispatch to a handler for a vectorization library. */
21559 if (ix86_veclib_handler)
21560 return (*ix86_veclib_handler)(fn, type_out, type_in);
21565 /* Handler for an SVML-style interface to
21566 a library with vectorized intrinsics. */
21569 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21572 tree fntype, new_fndecl, args;
21575 enum machine_mode el_mode, in_mode;
21578 /* The SVML is suitable for unsafe math only. */
21579 if (!flag_unsafe_math_optimizations)
21582 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21583 n = TYPE_VECTOR_SUBPARTS (type_out);
21584 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21585 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21586 if (el_mode != in_mode
21594 case BUILT_IN_LOG10:
21596 case BUILT_IN_TANH:
21598 case BUILT_IN_ATAN:
21599 case BUILT_IN_ATAN2:
21600 case BUILT_IN_ATANH:
21601 case BUILT_IN_CBRT:
21602 case BUILT_IN_SINH:
21604 case BUILT_IN_ASINH:
21605 case BUILT_IN_ASIN:
21606 case BUILT_IN_COSH:
21608 case BUILT_IN_ACOSH:
21609 case BUILT_IN_ACOS:
21610 if (el_mode != DFmode || n != 2)
21614 case BUILT_IN_EXPF:
21615 case BUILT_IN_LOGF:
21616 case BUILT_IN_LOG10F:
21617 case BUILT_IN_POWF:
21618 case BUILT_IN_TANHF:
21619 case BUILT_IN_TANF:
21620 case BUILT_IN_ATANF:
21621 case BUILT_IN_ATAN2F:
21622 case BUILT_IN_ATANHF:
21623 case BUILT_IN_CBRTF:
21624 case BUILT_IN_SINHF:
21625 case BUILT_IN_SINF:
21626 case BUILT_IN_ASINHF:
21627 case BUILT_IN_ASINF:
21628 case BUILT_IN_COSHF:
21629 case BUILT_IN_COSF:
21630 case BUILT_IN_ACOSHF:
21631 case BUILT_IN_ACOSF:
21632 if (el_mode != SFmode || n != 4)
21640 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21642 if (fn == BUILT_IN_LOGF)
21643 strcpy (name, "vmlsLn4");
21644 else if (fn == BUILT_IN_LOG)
21645 strcpy (name, "vmldLn2");
21648 sprintf (name, "vmls%s", bname+10);
21649 name[strlen (name)-1] = '4';
21652 sprintf (name, "vmld%s2", bname+10);
21654 /* Convert to uppercase. */
21658 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21659 args = TREE_CHAIN (args))
21663 fntype = build_function_type_list (type_out, type_in, NULL);
21665 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21667 /* Build a function declaration for the vectorized function. */
21668 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21669 TREE_PUBLIC (new_fndecl) = 1;
21670 DECL_EXTERNAL (new_fndecl) = 1;
21671 DECL_IS_NOVOPS (new_fndecl) = 1;
21672 TREE_READONLY (new_fndecl) = 1;
21677 /* Handler for an ACML-style interface to
21678 a library with vectorized intrinsics. */
21681 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21683 char name[20] = "__vr.._";
21684 tree fntype, new_fndecl, args;
21687 enum machine_mode el_mode, in_mode;
21690 /* The ACML is 64bits only and suitable for unsafe math only as
21691 it does not correctly support parts of IEEE with the required
21692 precision such as denormals. */
21694 || !flag_unsafe_math_optimizations)
21697 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21698 n = TYPE_VECTOR_SUBPARTS (type_out);
21699 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21700 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21701 if (el_mode != in_mode
21711 case BUILT_IN_LOG2:
21712 case BUILT_IN_LOG10:
21715 if (el_mode != DFmode
21720 case BUILT_IN_SINF:
21721 case BUILT_IN_COSF:
21722 case BUILT_IN_EXPF:
21723 case BUILT_IN_POWF:
21724 case BUILT_IN_LOGF:
21725 case BUILT_IN_LOG2F:
21726 case BUILT_IN_LOG10F:
21729 if (el_mode != SFmode
21738 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21739 sprintf (name + 7, "%s", bname+10);
21742 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21743 args = TREE_CHAIN (args))
21747 fntype = build_function_type_list (type_out, type_in, NULL);
21749 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21751 /* Build a function declaration for the vectorized function. */
21752 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21753 TREE_PUBLIC (new_fndecl) = 1;
21754 DECL_EXTERNAL (new_fndecl) = 1;
21755 DECL_IS_NOVOPS (new_fndecl) = 1;
21756 TREE_READONLY (new_fndecl) = 1;
21762 /* Returns a decl of a function that implements conversion of the
21763 input vector of type TYPE, or NULL_TREE if it is not available. */
21766 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21768 if (TREE_CODE (type) != VECTOR_TYPE)
21774 switch (TYPE_MODE (type))
21777 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21782 case FIX_TRUNC_EXPR:
21783 switch (TYPE_MODE (type))
21786 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21796 /* Returns a code for a target-specific builtin that implements
21797 reciprocal of the function, or NULL_TREE if not available. */
21800 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21801 bool sqrt ATTRIBUTE_UNUSED)
21803 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21804 && flag_finite_math_only && !flag_trapping_math
21805 && flag_unsafe_math_optimizations))
21809 /* Machine dependent builtins. */
21812 /* Vectorized version of sqrt to rsqrt conversion. */
21813 case IX86_BUILTIN_SQRTPS_NR:
21814 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21820 /* Normal builtins. */
21823 /* Sqrt to rsqrt conversion. */
21824 case BUILT_IN_SQRTF:
21825 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21832 /* Store OPERAND to the memory after reload is completed. This means
21833 that we can't easily use assign_stack_local. */
21835 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21839 gcc_assert (reload_completed);
21840 if (TARGET_RED_ZONE)
21842 result = gen_rtx_MEM (mode,
21843 gen_rtx_PLUS (Pmode,
21845 GEN_INT (-RED_ZONE_SIZE)));
21846 emit_move_insn (result, operand);
21848 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21854 operand = gen_lowpart (DImode, operand);
21858 gen_rtx_SET (VOIDmode,
21859 gen_rtx_MEM (DImode,
21860 gen_rtx_PRE_DEC (DImode,
21861 stack_pointer_rtx)),
21865 gcc_unreachable ();
21867 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21876 split_di (&operand, 1, operands, operands + 1);
21878 gen_rtx_SET (VOIDmode,
21879 gen_rtx_MEM (SImode,
21880 gen_rtx_PRE_DEC (Pmode,
21881 stack_pointer_rtx)),
21884 gen_rtx_SET (VOIDmode,
21885 gen_rtx_MEM (SImode,
21886 gen_rtx_PRE_DEC (Pmode,
21887 stack_pointer_rtx)),
21892 /* Store HImodes as SImodes. */
21893 operand = gen_lowpart (SImode, operand);
21897 gen_rtx_SET (VOIDmode,
21898 gen_rtx_MEM (GET_MODE (operand),
21899 gen_rtx_PRE_DEC (SImode,
21900 stack_pointer_rtx)),
21904 gcc_unreachable ();
21906 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21911 /* Free operand from the memory. */
21913 ix86_free_from_memory (enum machine_mode mode)
21915 if (!TARGET_RED_ZONE)
21919 if (mode == DImode || TARGET_64BIT)
21923 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21924 to pop or add instruction if registers are available. */
21925 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21926 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21931 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21932 QImode must go into class Q_REGS.
21933 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21934 movdf to do mem-to-mem moves through integer regs. */
21936 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21938 enum machine_mode mode = GET_MODE (x);
21940 /* We're only allowed to return a subclass of CLASS. Many of the
21941 following checks fail for NO_REGS, so eliminate that early. */
21942 if (regclass == NO_REGS)
21945 /* All classes can load zeros. */
21946 if (x == CONST0_RTX (mode))
21949 /* Force constants into memory if we are loading a (nonzero) constant into
21950 an MMX or SSE register. This is because there are no MMX/SSE instructions
21951 to load from a constant. */
21953 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21956 /* Prefer SSE regs only, if we can use them for math. */
21957 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21958 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21960 /* Floating-point constants need more complex checks. */
21961 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21963 /* General regs can load everything. */
21964 if (reg_class_subset_p (regclass, GENERAL_REGS))
21967 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21968 zero above. We only want to wind up preferring 80387 registers if
21969 we plan on doing computation with them. */
21971 && standard_80387_constant_p (x))
21973 /* Limit class to non-sse. */
21974 if (regclass == FLOAT_SSE_REGS)
21976 if (regclass == FP_TOP_SSE_REGS)
21978 if (regclass == FP_SECOND_SSE_REGS)
21979 return FP_SECOND_REG;
21980 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21987 /* Generally when we see PLUS here, it's the function invariant
21988 (plus soft-fp const_int). Which can only be computed into general
21990 if (GET_CODE (x) == PLUS)
21991 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21993 /* QImode constants are easy to load, but non-constant QImode data
21994 must go into Q_REGS. */
21995 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21997 if (reg_class_subset_p (regclass, Q_REGS))
21999 if (reg_class_subset_p (Q_REGS, regclass))
22007 /* Discourage putting floating-point values in SSE registers unless
22008 SSE math is being used, and likewise for the 387 registers. */
22010 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22012 enum machine_mode mode = GET_MODE (x);
22014 /* Restrict the output reload class to the register bank that we are doing
22015 math on. If we would like not to return a subset of CLASS, reject this
22016 alternative: if reload cannot do this, it will still use its choice. */
22017 mode = GET_MODE (x);
22018 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22019 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22021 if (X87_FLOAT_MODE_P (mode))
22023 if (regclass == FP_TOP_SSE_REGS)
22025 else if (regclass == FP_SECOND_SSE_REGS)
22026 return FP_SECOND_REG;
22028 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22034 /* If we are copying between general and FP registers, we need a memory
22035 location. The same is true for SSE and MMX registers.
22037 To optimize register_move_cost performance, allow inline variant.
22039 The macro can't work reliably when one of the CLASSES is class containing
22040 registers from multiple units (SSE, MMX, integer). We avoid this by never
22041 combining those units in single alternative in the machine description.
22042 Ensure that this constraint holds to avoid unexpected surprises.
22044 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22045 enforce these sanity checks. */
22048 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22049 enum machine_mode mode, int strict)
22051 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22052 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22053 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22054 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22055 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22056 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22058 gcc_assert (!strict);
22062 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22065 /* ??? This is a lie. We do have moves between mmx/general, and for
22066 mmx/sse2. But by saying we need secondary memory we discourage the
22067 register allocator from using the mmx registers unless needed. */
22068 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22071 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22073 /* SSE1 doesn't have any direct moves from other classes. */
22077 /* If the target says that inter-unit moves are more expensive
22078 than moving through memory, then don't generate them. */
22079 if (!TARGET_INTER_UNIT_MOVES)
22082 /* Between SSE and general, we have moves no larger than word size. */
22083 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22091 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22092 enum machine_mode mode, int strict)
22094 return inline_secondary_memory_needed (class1, class2, mode, strict);
22097 /* Return true if the registers in CLASS cannot represent the change from
22098 modes FROM to TO. */
22101 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22102 enum reg_class regclass)
22107 /* x87 registers can't do subreg at all, as all values are reformatted
22108 to extended precision. */
22109 if (MAYBE_FLOAT_CLASS_P (regclass))
22112 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22114 /* Vector registers do not support QI or HImode loads. If we don't
22115 disallow a change to these modes, reload will assume it's ok to
22116 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22117 the vec_dupv4hi pattern. */
22118 if (GET_MODE_SIZE (from) < 4)
22121 /* Vector registers do not support subreg with nonzero offsets, which
22122 are otherwise valid for integer registers. Since we can't see
22123 whether we have a nonzero offset from here, prohibit all
22124 nonparadoxical subregs changing size. */
22125 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22132 /* Return the cost of moving data of mode M between a
22133 register and memory. A value of 2 is the default; this cost is
22134 relative to those in `REGISTER_MOVE_COST'.
22136 This function is used extensively by register_move_cost that is used to
22137 build tables at startup. Make it inline in this case.
22138 When IN is 2, return maximum of in and out move cost.
22140 If moving between registers and memory is more expensive than
22141 between two registers, you should define this macro to express the
22144 Model also increased moving costs of QImode registers in non
22148 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22152 if (FLOAT_CLASS_P (regclass))
22170 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22171 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22173 if (SSE_CLASS_P (regclass))
22176 switch (GET_MODE_SIZE (mode))
22191 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22192 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22194 if (MMX_CLASS_P (regclass))
22197 switch (GET_MODE_SIZE (mode))
22209 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22210 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22212 switch (GET_MODE_SIZE (mode))
22215 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22218 return ix86_cost->int_store[0];
22219 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22220 cost = ix86_cost->movzbl_load;
22222 cost = ix86_cost->int_load[0];
22224 return MAX (cost, ix86_cost->int_store[0]);
22230 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22232 return ix86_cost->movzbl_load;
22234 return ix86_cost->int_store[0] + 4;
22239 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22240 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22242 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22243 if (mode == TFmode)
22246 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22248 cost = ix86_cost->int_load[2];
22250 cost = ix86_cost->int_store[2];
22251 return (cost * (((int) GET_MODE_SIZE (mode)
22252 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22257 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22259 return inline_memory_move_cost (mode, regclass, in);
22263 /* Return the cost of moving data from a register in class CLASS1 to
22264 one in class CLASS2.
22266 It is not required that the cost always equal 2 when FROM is the same as TO;
22267 on some machines it is expensive to move between registers if they are not
22268 general registers. */
22271 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22272 enum reg_class class2)
22274 /* In case we require secondary memory, compute cost of the store followed
22275 by load. In order to avoid bad register allocation choices, we need
22276 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22278 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22282 cost += inline_memory_move_cost (mode, class1, 2);
22283 cost += inline_memory_move_cost (mode, class2, 2);
22285 /* In case of copying from general_purpose_register we may emit multiple
22286 stores followed by single load causing memory size mismatch stall.
22287 Count this as arbitrarily high cost of 20. */
22288 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22291 /* In the case of FP/MMX moves, the registers actually overlap, and we
22292 have to switch modes in order to treat them differently. */
22293 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22294 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22300 /* Moves between SSE/MMX and integer unit are expensive. */
22301 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22302 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22304 /* ??? By keeping returned value relatively high, we limit the number
22305 of moves between integer and MMX/SSE registers for all targets.
22306 Additionally, high value prevents problem with x86_modes_tieable_p(),
22307 where integer modes in MMX/SSE registers are not tieable
22308 because of missing QImode and HImode moves to, from or between
22309 MMX/SSE registers. */
22310 return MAX (8, ix86_cost->mmxsse_to_integer);
22312 if (MAYBE_FLOAT_CLASS_P (class1))
22313 return ix86_cost->fp_move;
22314 if (MAYBE_SSE_CLASS_P (class1))
22315 return ix86_cost->sse_move;
22316 if (MAYBE_MMX_CLASS_P (class1))
22317 return ix86_cost->mmx_move;
22321 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22324 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22326 /* Flags and only flags can only hold CCmode values. */
22327 if (CC_REGNO_P (regno))
22328 return GET_MODE_CLASS (mode) == MODE_CC;
22329 if (GET_MODE_CLASS (mode) == MODE_CC
22330 || GET_MODE_CLASS (mode) == MODE_RANDOM
22331 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22333 if (FP_REGNO_P (regno))
22334 return VALID_FP_MODE_P (mode);
22335 if (SSE_REGNO_P (regno))
22337 /* We implement the move patterns for all vector modes into and
22338 out of SSE registers, even when no operation instructions
22340 return (VALID_SSE_REG_MODE (mode)
22341 || VALID_SSE2_REG_MODE (mode)
22342 || VALID_MMX_REG_MODE (mode)
22343 || VALID_MMX_REG_MODE_3DNOW (mode));
22345 if (MMX_REGNO_P (regno))
22347 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22348 so if the register is available at all, then we can move data of
22349 the given mode into or out of it. */
22350 return (VALID_MMX_REG_MODE (mode)
22351 || VALID_MMX_REG_MODE_3DNOW (mode));
22354 if (mode == QImode)
22356 /* Take care for QImode values - they can be in non-QI regs,
22357 but then they do cause partial register stalls. */
22358 if (regno < 4 || TARGET_64BIT)
22360 if (!TARGET_PARTIAL_REG_STALL)
22362 return reload_in_progress || reload_completed;
22364 /* We handle both integer and floats in the general purpose registers. */
22365 else if (VALID_INT_MODE_P (mode))
22367 else if (VALID_FP_MODE_P (mode))
22369 else if (VALID_DFP_MODE_P (mode))
22371 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22372 on to use that value in smaller contexts, this can easily force a
22373 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22374 supporting DImode, allow it. */
22375 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22381 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22382 tieable integer mode. */
22385 ix86_tieable_integer_mode_p (enum machine_mode mode)
22394 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22397 return TARGET_64BIT;
22404 /* Return true if MODE1 is accessible in a register that can hold MODE2
22405 without copying. That is, all register classes that can hold MODE2
22406 can also hold MODE1. */
22409 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22411 if (mode1 == mode2)
22414 if (ix86_tieable_integer_mode_p (mode1)
22415 && ix86_tieable_integer_mode_p (mode2))
22418 /* MODE2 being XFmode implies fp stack or general regs, which means we
22419 can tie any smaller floating point modes to it. Note that we do not
22420 tie this with TFmode. */
22421 if (mode2 == XFmode)
22422 return mode1 == SFmode || mode1 == DFmode;
22424 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22425 that we can tie it with SFmode. */
22426 if (mode2 == DFmode)
22427 return mode1 == SFmode;
22429 /* If MODE2 is only appropriate for an SSE register, then tie with
22430 any other mode acceptable to SSE registers. */
22431 if (GET_MODE_SIZE (mode2) == 16
22432 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22433 return (GET_MODE_SIZE (mode1) == 16
22434 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22436 /* If MODE2 is appropriate for an MMX register, then tie
22437 with any other mode acceptable to MMX registers. */
22438 if (GET_MODE_SIZE (mode2) == 8
22439 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22440 return (GET_MODE_SIZE (mode1) == 8
22441 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22446 /* Compute a (partial) cost for rtx X. Return true if the complete
22447 cost has been computed, and false if subexpressions should be
22448 scanned. In either case, *TOTAL contains the cost result. */
22451 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22453 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22454 enum machine_mode mode = GET_MODE (x);
22462 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22464 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22466 else if (flag_pic && SYMBOLIC_CONST (x)
22468 || (!GET_CODE (x) != LABEL_REF
22469 && (GET_CODE (x) != SYMBOL_REF
22470 || !SYMBOL_REF_LOCAL_P (x)))))
22477 if (mode == VOIDmode)
22480 switch (standard_80387_constant_p (x))
22485 default: /* Other constants */
22490 /* Start with (MEM (SYMBOL_REF)), since that's where
22491 it'll probably end up. Add a penalty for size. */
22492 *total = (COSTS_N_INSNS (1)
22493 + (flag_pic != 0 && !TARGET_64BIT)
22494 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22500 /* The zero extensions is often completely free on x86_64, so make
22501 it as cheap as possible. */
22502 if (TARGET_64BIT && mode == DImode
22503 && GET_MODE (XEXP (x, 0)) == SImode)
22505 else if (TARGET_ZERO_EXTEND_WITH_AND)
22506 *total = ix86_cost->add;
22508 *total = ix86_cost->movzx;
22512 *total = ix86_cost->movsx;
22516 if (CONST_INT_P (XEXP (x, 1))
22517 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22519 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22522 *total = ix86_cost->add;
22525 if ((value == 2 || value == 3)
22526 && ix86_cost->lea <= ix86_cost->shift_const)
22528 *total = ix86_cost->lea;
22538 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22540 if (CONST_INT_P (XEXP (x, 1)))
22542 if (INTVAL (XEXP (x, 1)) > 32)
22543 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22545 *total = ix86_cost->shift_const * 2;
22549 if (GET_CODE (XEXP (x, 1)) == AND)
22550 *total = ix86_cost->shift_var * 2;
22552 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22557 if (CONST_INT_P (XEXP (x, 1)))
22558 *total = ix86_cost->shift_const;
22560 *total = ix86_cost->shift_var;
22565 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22567 /* ??? SSE scalar cost should be used here. */
22568 *total = ix86_cost->fmul;
22571 else if (X87_FLOAT_MODE_P (mode))
22573 *total = ix86_cost->fmul;
22576 else if (FLOAT_MODE_P (mode))
22578 /* ??? SSE vector cost should be used here. */
22579 *total = ix86_cost->fmul;
22584 rtx op0 = XEXP (x, 0);
22585 rtx op1 = XEXP (x, 1);
22587 if (CONST_INT_P (XEXP (x, 1)))
22589 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22590 for (nbits = 0; value != 0; value &= value - 1)
22594 /* This is arbitrary. */
22597 /* Compute costs correctly for widening multiplication. */
22598 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22599 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22600 == GET_MODE_SIZE (mode))
22602 int is_mulwiden = 0;
22603 enum machine_mode inner_mode = GET_MODE (op0);
22605 if (GET_CODE (op0) == GET_CODE (op1))
22606 is_mulwiden = 1, op1 = XEXP (op1, 0);
22607 else if (CONST_INT_P (op1))
22609 if (GET_CODE (op0) == SIGN_EXTEND)
22610 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22613 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22617 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22620 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22621 + nbits * ix86_cost->mult_bit
22622 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22631 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22632 /* ??? SSE cost should be used here. */
22633 *total = ix86_cost->fdiv;
22634 else if (X87_FLOAT_MODE_P (mode))
22635 *total = ix86_cost->fdiv;
22636 else if (FLOAT_MODE_P (mode))
22637 /* ??? SSE vector cost should be used here. */
22638 *total = ix86_cost->fdiv;
22640 *total = ix86_cost->divide[MODE_INDEX (mode)];
22644 if (GET_MODE_CLASS (mode) == MODE_INT
22645 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22647 if (GET_CODE (XEXP (x, 0)) == PLUS
22648 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22649 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22650 && CONSTANT_P (XEXP (x, 1)))
22652 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22653 if (val == 2 || val == 4 || val == 8)
22655 *total = ix86_cost->lea;
22656 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22657 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22659 *total += rtx_cost (XEXP (x, 1), outer_code);
22663 else if (GET_CODE (XEXP (x, 0)) == MULT
22664 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22666 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22667 if (val == 2 || val == 4 || val == 8)
22669 *total = ix86_cost->lea;
22670 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22671 *total += rtx_cost (XEXP (x, 1), outer_code);
22675 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22677 *total = ix86_cost->lea;
22678 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22679 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22680 *total += rtx_cost (XEXP (x, 1), outer_code);
22687 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22689 /* ??? SSE cost should be used here. */
22690 *total = ix86_cost->fadd;
22693 else if (X87_FLOAT_MODE_P (mode))
22695 *total = ix86_cost->fadd;
22698 else if (FLOAT_MODE_P (mode))
22700 /* ??? SSE vector cost should be used here. */
22701 *total = ix86_cost->fadd;
22709 if (!TARGET_64BIT && mode == DImode)
22711 *total = (ix86_cost->add * 2
22712 + (rtx_cost (XEXP (x, 0), outer_code)
22713 << (GET_MODE (XEXP (x, 0)) != DImode))
22714 + (rtx_cost (XEXP (x, 1), outer_code)
22715 << (GET_MODE (XEXP (x, 1)) != DImode)));
22721 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22723 /* ??? SSE cost should be used here. */
22724 *total = ix86_cost->fchs;
22727 else if (X87_FLOAT_MODE_P (mode))
22729 *total = ix86_cost->fchs;
22732 else if (FLOAT_MODE_P (mode))
22734 /* ??? SSE vector cost should be used here. */
22735 *total = ix86_cost->fchs;
22741 if (!TARGET_64BIT && mode == DImode)
22742 *total = ix86_cost->add * 2;
22744 *total = ix86_cost->add;
22748 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22749 && XEXP (XEXP (x, 0), 1) == const1_rtx
22750 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22751 && XEXP (x, 1) == const0_rtx)
22753 /* This kind of construct is implemented using test[bwl].
22754 Treat it as if we had an AND. */
22755 *total = (ix86_cost->add
22756 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22757 + rtx_cost (const1_rtx, outer_code));
22763 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22768 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22769 /* ??? SSE cost should be used here. */
22770 *total = ix86_cost->fabs;
22771 else if (X87_FLOAT_MODE_P (mode))
22772 *total = ix86_cost->fabs;
22773 else if (FLOAT_MODE_P (mode))
22774 /* ??? SSE vector cost should be used here. */
22775 *total = ix86_cost->fabs;
22779 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22780 /* ??? SSE cost should be used here. */
22781 *total = ix86_cost->fsqrt;
22782 else if (X87_FLOAT_MODE_P (mode))
22783 *total = ix86_cost->fsqrt;
22784 else if (FLOAT_MODE_P (mode))
22785 /* ??? SSE vector cost should be used here. */
22786 *total = ix86_cost->fsqrt;
22790 if (XINT (x, 1) == UNSPEC_TP)
22801 static int current_machopic_label_num;
22803 /* Given a symbol name and its associated stub, write out the
22804 definition of the stub. */
22807 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22809 unsigned int length;
22810 char *binder_name, *symbol_name, lazy_ptr_name[32];
22811 int label = ++current_machopic_label_num;
22813 /* For 64-bit we shouldn't get here. */
22814 gcc_assert (!TARGET_64BIT);
22816 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22817 symb = (*targetm.strip_name_encoding) (symb);
22819 length = strlen (stub);
22820 binder_name = alloca (length + 32);
22821 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22823 length = strlen (symb);
22824 symbol_name = alloca (length + 32);
22825 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22827 sprintf (lazy_ptr_name, "L%d$lz", label);
22830 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22832 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22834 fprintf (file, "%s:\n", stub);
22835 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22839 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22840 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22841 fprintf (file, "\tjmp\t*%%edx\n");
22844 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22846 fprintf (file, "%s:\n", binder_name);
22850 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22851 fprintf (file, "\tpushl\t%%eax\n");
22854 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22856 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22858 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22859 fprintf (file, "%s:\n", lazy_ptr_name);
22860 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22861 fprintf (file, "\t.long %s\n", binder_name);
22865 darwin_x86_file_end (void)
22867 darwin_file_end ();
22870 #endif /* TARGET_MACHO */
22872 /* Order the registers for register allocator. */
22875 x86_order_regs_for_local_alloc (void)
22880 /* First allocate the local general purpose registers. */
22881 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22882 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22883 reg_alloc_order [pos++] = i;
22885 /* Global general purpose registers. */
22886 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22887 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22888 reg_alloc_order [pos++] = i;
22890 /* x87 registers come first in case we are doing FP math
22892 if (!TARGET_SSE_MATH)
22893 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22894 reg_alloc_order [pos++] = i;
22896 /* SSE registers. */
22897 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22898 reg_alloc_order [pos++] = i;
22899 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22900 reg_alloc_order [pos++] = i;
22902 /* x87 registers. */
22903 if (TARGET_SSE_MATH)
22904 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22905 reg_alloc_order [pos++] = i;
22907 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22908 reg_alloc_order [pos++] = i;
22910 /* Initialize the rest of array as we do not allocate some registers
22912 while (pos < FIRST_PSEUDO_REGISTER)
22913 reg_alloc_order [pos++] = 0;
22916 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22917 struct attribute_spec.handler. */
22919 ix86_handle_struct_attribute (tree *node, tree name,
22920 tree args ATTRIBUTE_UNUSED,
22921 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22924 if (DECL_P (*node))
22926 if (TREE_CODE (*node) == TYPE_DECL)
22927 type = &TREE_TYPE (*node);
22932 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22933 || TREE_CODE (*type) == UNION_TYPE)))
22935 warning (OPT_Wattributes, "%qs attribute ignored",
22936 IDENTIFIER_POINTER (name));
22937 *no_add_attrs = true;
22940 else if ((is_attribute_p ("ms_struct", name)
22941 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22942 || ((is_attribute_p ("gcc_struct", name)
22943 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22945 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22946 IDENTIFIER_POINTER (name));
22947 *no_add_attrs = true;
22954 ix86_ms_bitfield_layout_p (const_tree record_type)
22956 return (TARGET_MS_BITFIELD_LAYOUT &&
22957 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22958 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22961 /* Returns an expression indicating where the this parameter is
22962 located on entry to the FUNCTION. */
22965 x86_this_parameter (tree function)
22967 tree type = TREE_TYPE (function);
22968 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22973 const int *parm_regs;
22975 if (TARGET_64BIT_MS_ABI)
22976 parm_regs = x86_64_ms_abi_int_parameter_registers;
22978 parm_regs = x86_64_int_parameter_registers;
22979 return gen_rtx_REG (DImode, parm_regs[aggr]);
22982 nregs = ix86_function_regparm (type, function);
22984 if (nregs > 0 && !stdarg_p (type))
22988 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22989 regno = aggr ? DX_REG : CX_REG;
22997 return gen_rtx_MEM (SImode,
22998 plus_constant (stack_pointer_rtx, 4));
23001 return gen_rtx_REG (SImode, regno);
23004 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23007 /* Determine whether x86_output_mi_thunk can succeed. */
23010 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23011 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23012 HOST_WIDE_INT vcall_offset, const_tree function)
23014 /* 64-bit can handle anything. */
23018 /* For 32-bit, everything's fine if we have one free register. */
23019 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23022 /* Need a free register for vcall_offset. */
23026 /* Need a free register for GOT references. */
23027 if (flag_pic && !(*targetm.binds_local_p) (function))
23030 /* Otherwise ok. */
23034 /* Output the assembler code for a thunk function. THUNK_DECL is the
23035 declaration for the thunk function itself, FUNCTION is the decl for
23036 the target function. DELTA is an immediate constant offset to be
23037 added to THIS. If VCALL_OFFSET is nonzero, the word at
23038 *(*this + vcall_offset) should be added to THIS. */
23041 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23042 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23043 HOST_WIDE_INT vcall_offset, tree function)
23046 rtx this_param = x86_this_parameter (function);
23049 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23050 pull it in now and let DELTA benefit. */
23051 if (REG_P (this_param))
23052 this_reg = this_param;
23053 else if (vcall_offset)
23055 /* Put the this parameter into %eax. */
23056 xops[0] = this_param;
23057 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23059 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23061 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23064 this_reg = NULL_RTX;
23066 /* Adjust the this parameter by a fixed constant. */
23069 xops[0] = GEN_INT (delta);
23070 xops[1] = this_reg ? this_reg : this_param;
23073 if (!x86_64_general_operand (xops[0], DImode))
23075 tmp = gen_rtx_REG (DImode, R10_REG);
23077 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23079 xops[1] = this_param;
23081 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23084 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23087 /* Adjust the this parameter by a value stored in the vtable. */
23091 tmp = gen_rtx_REG (DImode, R10_REG);
23094 int tmp_regno = CX_REG;
23095 if (lookup_attribute ("fastcall",
23096 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23097 tmp_regno = AX_REG;
23098 tmp = gen_rtx_REG (SImode, tmp_regno);
23101 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23104 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23106 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23108 /* Adjust the this parameter. */
23109 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23110 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23112 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23113 xops[0] = GEN_INT (vcall_offset);
23115 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23116 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23118 xops[1] = this_reg;
23120 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23122 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23125 /* If necessary, drop THIS back to its stack slot. */
23126 if (this_reg && this_reg != this_param)
23128 xops[0] = this_reg;
23129 xops[1] = this_param;
23131 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23133 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23136 xops[0] = XEXP (DECL_RTL (function), 0);
23139 if (!flag_pic || (*targetm.binds_local_p) (function))
23140 output_asm_insn ("jmp\t%P0", xops);
23141 /* All thunks should be in the same object as their target,
23142 and thus binds_local_p should be true. */
23143 else if (TARGET_64BIT_MS_ABI)
23144 gcc_unreachable ();
23147 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23148 tmp = gen_rtx_CONST (Pmode, tmp);
23149 tmp = gen_rtx_MEM (QImode, tmp);
23151 output_asm_insn ("jmp\t%A0", xops);
23156 if (!flag_pic || (*targetm.binds_local_p) (function))
23157 output_asm_insn ("jmp\t%P0", xops);
23162 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23163 tmp = (gen_rtx_SYMBOL_REF
23165 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23166 tmp = gen_rtx_MEM (QImode, tmp);
23168 output_asm_insn ("jmp\t%0", xops);
23171 #endif /* TARGET_MACHO */
23173 tmp = gen_rtx_REG (SImode, CX_REG);
23174 output_set_got (tmp, NULL_RTX);
23177 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23178 output_asm_insn ("jmp\t{*}%1", xops);
23184 x86_file_start (void)
23186 default_file_start ();
23188 darwin_file_start ();
23190 if (X86_FILE_START_VERSION_DIRECTIVE)
23191 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23192 if (X86_FILE_START_FLTUSED)
23193 fputs ("\t.global\t__fltused\n", asm_out_file);
23194 if (ix86_asm_dialect == ASM_INTEL)
23195 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23199 x86_field_alignment (tree field, int computed)
23201 enum machine_mode mode;
23202 tree type = TREE_TYPE (field);
23204 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23206 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
23207 ? get_inner_array_type (type) : type);
23208 if (mode == DFmode || mode == DCmode
23209 || GET_MODE_CLASS (mode) == MODE_INT
23210 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23211 return MIN (32, computed);
23215 /* Output assembler code to FILE to increment profiler label # LABELNO
23216 for profiling a function entry. */
23218 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23222 #ifndef NO_PROFILE_COUNTERS
23223 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23226 if (!TARGET_64BIT_MS_ABI && flag_pic)
23227 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23229 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23233 #ifndef NO_PROFILE_COUNTERS
23234 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23235 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23237 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23241 #ifndef NO_PROFILE_COUNTERS
23242 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23243 PROFILE_COUNT_REGISTER);
23245 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23249 /* We don't have exact information about the insn sizes, but we may assume
23250 quite safely that we are informed about all 1 byte insns and memory
23251 address sizes. This is enough to eliminate unnecessary padding in
23255 min_insn_size (rtx insn)
23259 if (!INSN_P (insn) || !active_insn_p (insn))
23262 /* Discard alignments we've emit and jump instructions. */
23263 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23264 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23267 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23268 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23271 /* Important case - calls are always 5 bytes.
23272 It is common to have many calls in the row. */
23274 && symbolic_reference_mentioned_p (PATTERN (insn))
23275 && !SIBLING_CALL_P (insn))
23277 if (get_attr_length (insn) <= 1)
23280 /* For normal instructions we may rely on the sizes of addresses
23281 and the presence of symbol to require 4 bytes of encoding.
23282 This is not the case for jumps where references are PC relative. */
23283 if (!JUMP_P (insn))
23285 l = get_attr_length_address (insn);
23286 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23295 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23299 ix86_avoid_jump_misspredicts (void)
23301 rtx insn, start = get_insns ();
23302 int nbytes = 0, njumps = 0;
23305 /* Look for all minimal intervals of instructions containing 4 jumps.
23306 The intervals are bounded by START and INSN. NBYTES is the total
23307 size of instructions in the interval including INSN and not including
23308 START. When the NBYTES is smaller than 16 bytes, it is possible
23309 that the end of START and INSN ends up in the same 16byte page.
23311 The smallest offset in the page INSN can start is the case where START
23312 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23313 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23315 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23318 nbytes += min_insn_size (insn);
23320 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23321 INSN_UID (insn), min_insn_size (insn));
23323 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23324 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23332 start = NEXT_INSN (start);
23333 if ((JUMP_P (start)
23334 && GET_CODE (PATTERN (start)) != ADDR_VEC
23335 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23337 njumps--, isjump = 1;
23340 nbytes -= min_insn_size (start);
23342 gcc_assert (njumps >= 0);
23344 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23345 INSN_UID (start), INSN_UID (insn), nbytes);
23347 if (njumps == 3 && isjump && nbytes < 16)
23349 int padsize = 15 - nbytes + min_insn_size (insn);
23352 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23353 INSN_UID (insn), padsize);
23354 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23359 /* AMD Athlon works faster
23360 when RET is not destination of conditional jump or directly preceded
23361 by other jump instruction. We avoid the penalty by inserting NOP just
23362 before the RET instructions in such cases. */
23364 ix86_pad_returns (void)
23369 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23371 basic_block bb = e->src;
23372 rtx ret = BB_END (bb);
23374 bool replace = false;
23376 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23377 || !maybe_hot_bb_p (bb))
23379 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23380 if (active_insn_p (prev) || LABEL_P (prev))
23382 if (prev && LABEL_P (prev))
23387 FOR_EACH_EDGE (e, ei, bb->preds)
23388 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23389 && !(e->flags & EDGE_FALLTHRU))
23394 prev = prev_active_insn (ret);
23396 && ((JUMP_P (prev) && any_condjump_p (prev))
23399 /* Empty functions get branch mispredict even when the jump destination
23400 is not visible to us. */
23401 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23406 emit_insn_before (gen_return_internal_long (), ret);
23412 /* Implement machine specific optimizations. We implement padding of returns
23413 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23417 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23418 ix86_pad_returns ();
23419 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23420 ix86_avoid_jump_misspredicts ();
23423 /* Return nonzero when QImode register that must be represented via REX prefix
23426 x86_extended_QIreg_mentioned_p (rtx insn)
23429 extract_insn_cached (insn);
23430 for (i = 0; i < recog_data.n_operands; i++)
23431 if (REG_P (recog_data.operand[i])
23432 && REGNO (recog_data.operand[i]) >= 4)
23437 /* Return nonzero when P points to register encoded via REX prefix.
23438 Called via for_each_rtx. */
23440 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23442 unsigned int regno;
23445 regno = REGNO (*p);
23446 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23449 /* Return true when INSN mentions register that must be encoded using REX
23452 x86_extended_reg_mentioned_p (rtx insn)
23454 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23457 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23458 optabs would emit if we didn't have TFmode patterns. */
23461 x86_emit_floatuns (rtx operands[2])
23463 rtx neglab, donelab, i0, i1, f0, in, out;
23464 enum machine_mode mode, inmode;
23466 inmode = GET_MODE (operands[1]);
23467 gcc_assert (inmode == SImode || inmode == DImode);
23470 in = force_reg (inmode, operands[1]);
23471 mode = GET_MODE (out);
23472 neglab = gen_label_rtx ();
23473 donelab = gen_label_rtx ();
23474 f0 = gen_reg_rtx (mode);
23476 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23478 expand_float (out, in, 0);
23480 emit_jump_insn (gen_jump (donelab));
23483 emit_label (neglab);
23485 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23487 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23489 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23491 expand_float (f0, i0, 0);
23493 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23495 emit_label (donelab);
23498 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23499 with all elements equal to VAR. Return true if successful. */
23502 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23503 rtx target, rtx val)
23505 enum machine_mode smode, wsmode, wvmode;
23520 val = force_reg (GET_MODE_INNER (mode), val);
23521 x = gen_rtx_VEC_DUPLICATE (mode, val);
23522 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23528 if (TARGET_SSE || TARGET_3DNOW_A)
23530 val = gen_lowpart (SImode, val);
23531 x = gen_rtx_TRUNCATE (HImode, val);
23532 x = gen_rtx_VEC_DUPLICATE (mode, x);
23533 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23555 /* Extend HImode to SImode using a paradoxical SUBREG. */
23556 tmp1 = gen_reg_rtx (SImode);
23557 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23558 /* Insert the SImode value as low element of V4SImode vector. */
23559 tmp2 = gen_reg_rtx (V4SImode);
23560 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23561 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23562 CONST0_RTX (V4SImode),
23564 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23565 /* Cast the V4SImode vector back to a V8HImode vector. */
23566 tmp1 = gen_reg_rtx (V8HImode);
23567 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23568 /* Duplicate the low short through the whole low SImode word. */
23569 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23570 /* Cast the V8HImode vector back to a V4SImode vector. */
23571 tmp2 = gen_reg_rtx (V4SImode);
23572 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23573 /* Replicate the low element of the V4SImode vector. */
23574 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23575 /* Cast the V2SImode back to V8HImode, and store in target. */
23576 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23587 /* Extend QImode to SImode using a paradoxical SUBREG. */
23588 tmp1 = gen_reg_rtx (SImode);
23589 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23590 /* Insert the SImode value as low element of V4SImode vector. */
23591 tmp2 = gen_reg_rtx (V4SImode);
23592 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23593 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23594 CONST0_RTX (V4SImode),
23596 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23597 /* Cast the V4SImode vector back to a V16QImode vector. */
23598 tmp1 = gen_reg_rtx (V16QImode);
23599 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23600 /* Duplicate the low byte through the whole low SImode word. */
23601 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23602 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23603 /* Cast the V16QImode vector back to a V4SImode vector. */
23604 tmp2 = gen_reg_rtx (V4SImode);
23605 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23606 /* Replicate the low element of the V4SImode vector. */
23607 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23608 /* Cast the V2SImode back to V16QImode, and store in target. */
23609 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23617 /* Replicate the value once into the next wider mode and recurse. */
23618 val = convert_modes (wsmode, smode, val, true);
23619 x = expand_simple_binop (wsmode, ASHIFT, val,
23620 GEN_INT (GET_MODE_BITSIZE (smode)),
23621 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23622 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23624 x = gen_reg_rtx (wvmode);
23625 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23626 gcc_unreachable ();
23627 emit_move_insn (target, gen_lowpart (mode, x));
23635 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23636 whose ONE_VAR element is VAR, and other elements are zero. Return true
23640 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23641 rtx target, rtx var, int one_var)
23643 enum machine_mode vsimode;
23646 bool use_vector_set = false;
23651 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23656 use_vector_set = TARGET_SSE4_1;
23659 use_vector_set = TARGET_SSE2;
23662 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23667 if (use_vector_set)
23669 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23670 var = force_reg (GET_MODE_INNER (mode), var);
23671 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23687 var = force_reg (GET_MODE_INNER (mode), var);
23688 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23689 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23694 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23695 new_target = gen_reg_rtx (mode);
23697 new_target = target;
23698 var = force_reg (GET_MODE_INNER (mode), var);
23699 x = gen_rtx_VEC_DUPLICATE (mode, var);
23700 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23701 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23704 /* We need to shuffle the value to the correct position, so
23705 create a new pseudo to store the intermediate result. */
23707 /* With SSE2, we can use the integer shuffle insns. */
23708 if (mode != V4SFmode && TARGET_SSE2)
23710 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23712 GEN_INT (one_var == 1 ? 0 : 1),
23713 GEN_INT (one_var == 2 ? 0 : 1),
23714 GEN_INT (one_var == 3 ? 0 : 1)));
23715 if (target != new_target)
23716 emit_move_insn (target, new_target);
23720 /* Otherwise convert the intermediate result to V4SFmode and
23721 use the SSE1 shuffle instructions. */
23722 if (mode != V4SFmode)
23724 tmp = gen_reg_rtx (V4SFmode);
23725 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23730 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23732 GEN_INT (one_var == 1 ? 0 : 1),
23733 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23734 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23736 if (mode != V4SFmode)
23737 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23738 else if (tmp != target)
23739 emit_move_insn (target, tmp);
23741 else if (target != new_target)
23742 emit_move_insn (target, new_target);
23747 vsimode = V4SImode;
23753 vsimode = V2SImode;
23759 /* Zero extend the variable element to SImode and recurse. */
23760 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23762 x = gen_reg_rtx (vsimode);
23763 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23765 gcc_unreachable ();
23767 emit_move_insn (target, gen_lowpart (mode, x));
23775 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23776 consisting of the values in VALS. It is known that all elements
23777 except ONE_VAR are constants. Return true if successful. */
23780 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23781 rtx target, rtx vals, int one_var)
23783 rtx var = XVECEXP (vals, 0, one_var);
23784 enum machine_mode wmode;
23787 const_vec = copy_rtx (vals);
23788 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23789 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23797 /* For the two element vectors, it's just as easy to use
23798 the general case. */
23814 /* There's no way to set one QImode entry easily. Combine
23815 the variable value with its adjacent constant value, and
23816 promote to an HImode set. */
23817 x = XVECEXP (vals, 0, one_var ^ 1);
23820 var = convert_modes (HImode, QImode, var, true);
23821 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23822 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23823 x = GEN_INT (INTVAL (x) & 0xff);
23827 var = convert_modes (HImode, QImode, var, true);
23828 x = gen_int_mode (INTVAL (x) << 8, HImode);
23830 if (x != const0_rtx)
23831 var = expand_simple_binop (HImode, IOR, var, x, var,
23832 1, OPTAB_LIB_WIDEN);
23834 x = gen_reg_rtx (wmode);
23835 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23836 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23838 emit_move_insn (target, gen_lowpart (mode, x));
23845 emit_move_insn (target, const_vec);
23846 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23850 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23851 all values variable, and none identical. */
23854 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23855 rtx target, rtx vals)
23857 enum machine_mode half_mode = GET_MODE_INNER (mode);
23858 rtx op0 = NULL, op1 = NULL;
23859 bool use_vec_concat = false;
23865 if (!mmx_ok && !TARGET_SSE)
23871 /* For the two element vectors, we always implement VEC_CONCAT. */
23872 op0 = XVECEXP (vals, 0, 0);
23873 op1 = XVECEXP (vals, 0, 1);
23874 use_vec_concat = true;
23878 half_mode = V2SFmode;
23881 half_mode = V2SImode;
23887 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23888 Recurse to load the two halves. */
23890 op0 = gen_reg_rtx (half_mode);
23891 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23892 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23894 op1 = gen_reg_rtx (half_mode);
23895 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23896 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23898 use_vec_concat = true;
23909 gcc_unreachable ();
23912 if (use_vec_concat)
23914 if (!register_operand (op0, half_mode))
23915 op0 = force_reg (half_mode, op0);
23916 if (!register_operand (op1, half_mode))
23917 op1 = force_reg (half_mode, op1);
23919 emit_insn (gen_rtx_SET (VOIDmode, target,
23920 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23924 int i, j, n_elts, n_words, n_elt_per_word;
23925 enum machine_mode inner_mode;
23926 rtx words[4], shift;
23928 inner_mode = GET_MODE_INNER (mode);
23929 n_elts = GET_MODE_NUNITS (mode);
23930 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23931 n_elt_per_word = n_elts / n_words;
23932 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23934 for (i = 0; i < n_words; ++i)
23936 rtx word = NULL_RTX;
23938 for (j = 0; j < n_elt_per_word; ++j)
23940 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23941 elt = convert_modes (word_mode, inner_mode, elt, true);
23947 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23948 word, 1, OPTAB_LIB_WIDEN);
23949 word = expand_simple_binop (word_mode, IOR, word, elt,
23950 word, 1, OPTAB_LIB_WIDEN);
23958 emit_move_insn (target, gen_lowpart (mode, words[0]));
23959 else if (n_words == 2)
23961 rtx tmp = gen_reg_rtx (mode);
23962 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23963 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23964 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23965 emit_move_insn (target, tmp);
23967 else if (n_words == 4)
23969 rtx tmp = gen_reg_rtx (V4SImode);
23970 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23971 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23972 emit_move_insn (target, gen_lowpart (mode, tmp));
23975 gcc_unreachable ();
23979 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23980 instructions unless MMX_OK is true. */
23983 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23985 enum machine_mode mode = GET_MODE (target);
23986 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23987 int n_elts = GET_MODE_NUNITS (mode);
23988 int n_var = 0, one_var = -1;
23989 bool all_same = true, all_const_zero = true;
23993 for (i = 0; i < n_elts; ++i)
23995 x = XVECEXP (vals, 0, i);
23996 if (!(CONST_INT_P (x)
23997 || GET_CODE (x) == CONST_DOUBLE
23998 || GET_CODE (x) == CONST_FIXED))
23999 n_var++, one_var = i;
24000 else if (x != CONST0_RTX (inner_mode))
24001 all_const_zero = false;
24002 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24006 /* Constants are best loaded from the constant pool. */
24009 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24013 /* If all values are identical, broadcast the value. */
24015 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24016 XVECEXP (vals, 0, 0)))
24019 /* Values where only one field is non-constant are best loaded from
24020 the pool and overwritten via move later. */
24024 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24025 XVECEXP (vals, 0, one_var),
24029 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24033 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24037 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24039 enum machine_mode mode = GET_MODE (target);
24040 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24041 bool use_vec_merge = false;
24050 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24051 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24053 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24055 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24056 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24062 use_vec_merge = TARGET_SSE4_1;
24070 /* For the two element vectors, we implement a VEC_CONCAT with
24071 the extraction of the other element. */
24073 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24074 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24077 op0 = val, op1 = tmp;
24079 op0 = tmp, op1 = val;
24081 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24082 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24087 use_vec_merge = TARGET_SSE4_1;
24094 use_vec_merge = true;
24098 /* tmp = target = A B C D */
24099 tmp = copy_to_reg (target);
24100 /* target = A A B B */
24101 emit_insn (gen_sse_unpcklps (target, target, target));
24102 /* target = X A B B */
24103 ix86_expand_vector_set (false, target, val, 0);
24104 /* target = A X C D */
24105 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24106 GEN_INT (1), GEN_INT (0),
24107 GEN_INT (2+4), GEN_INT (3+4)));
24111 /* tmp = target = A B C D */
24112 tmp = copy_to_reg (target);
24113 /* tmp = X B C D */
24114 ix86_expand_vector_set (false, tmp, val, 0);
24115 /* target = A B X D */
24116 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24117 GEN_INT (0), GEN_INT (1),
24118 GEN_INT (0+4), GEN_INT (3+4)));
24122 /* tmp = target = A B C D */
24123 tmp = copy_to_reg (target);
24124 /* tmp = X B C D */
24125 ix86_expand_vector_set (false, tmp, val, 0);
24126 /* target = A B X D */
24127 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24128 GEN_INT (0), GEN_INT (1),
24129 GEN_INT (2+4), GEN_INT (0+4)));
24133 gcc_unreachable ();
24138 use_vec_merge = TARGET_SSE4_1;
24142 /* Element 0 handled by vec_merge below. */
24145 use_vec_merge = true;
24151 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24152 store into element 0, then shuffle them back. */
24156 order[0] = GEN_INT (elt);
24157 order[1] = const1_rtx;
24158 order[2] = const2_rtx;
24159 order[3] = GEN_INT (3);
24160 order[elt] = const0_rtx;
24162 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24163 order[1], order[2], order[3]));
24165 ix86_expand_vector_set (false, target, val, 0);
24167 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24168 order[1], order[2], order[3]));
24172 /* For SSE1, we have to reuse the V4SF code. */
24173 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24174 gen_lowpart (SFmode, val), elt);
24179 use_vec_merge = TARGET_SSE2;
24182 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24186 use_vec_merge = TARGET_SSE4_1;
24196 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24197 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24198 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24202 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24204 emit_move_insn (mem, target);
24206 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24207 emit_move_insn (tmp, val);
24209 emit_move_insn (target, mem);
24214 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24216 enum machine_mode mode = GET_MODE (vec);
24217 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24218 bool use_vec_extr = false;
24231 use_vec_extr = true;
24235 use_vec_extr = TARGET_SSE4_1;
24247 tmp = gen_reg_rtx (mode);
24248 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24249 GEN_INT (elt), GEN_INT (elt),
24250 GEN_INT (elt+4), GEN_INT (elt+4)));
24254 tmp = gen_reg_rtx (mode);
24255 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24259 gcc_unreachable ();
24262 use_vec_extr = true;
24267 use_vec_extr = TARGET_SSE4_1;
24281 tmp = gen_reg_rtx (mode);
24282 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24283 GEN_INT (elt), GEN_INT (elt),
24284 GEN_INT (elt), GEN_INT (elt)));
24288 tmp = gen_reg_rtx (mode);
24289 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24293 gcc_unreachable ();
24296 use_vec_extr = true;
24301 /* For SSE1, we have to reuse the V4SF code. */
24302 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24303 gen_lowpart (V4SFmode, vec), elt);
24309 use_vec_extr = TARGET_SSE2;
24312 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24316 use_vec_extr = TARGET_SSE4_1;
24320 /* ??? Could extract the appropriate HImode element and shift. */
24327 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24328 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24330 /* Let the rtl optimizers know about the zero extension performed. */
24331 if (inner_mode == QImode || inner_mode == HImode)
24333 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24334 target = gen_lowpart (SImode, target);
24337 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24341 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24343 emit_move_insn (mem, vec);
24345 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24346 emit_move_insn (target, tmp);
24350 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24351 pattern to reduce; DEST is the destination; IN is the input vector. */
24354 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24356 rtx tmp1, tmp2, tmp3;
24358 tmp1 = gen_reg_rtx (V4SFmode);
24359 tmp2 = gen_reg_rtx (V4SFmode);
24360 tmp3 = gen_reg_rtx (V4SFmode);
24362 emit_insn (gen_sse_movhlps (tmp1, in, in));
24363 emit_insn (fn (tmp2, tmp1, in));
24365 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24366 GEN_INT (1), GEN_INT (1),
24367 GEN_INT (1+4), GEN_INT (1+4)));
24368 emit_insn (fn (dest, tmp2, tmp3));
24371 /* Target hook for scalar_mode_supported_p. */
24373 ix86_scalar_mode_supported_p (enum machine_mode mode)
24375 if (DECIMAL_FLOAT_MODE_P (mode))
24377 else if (mode == TFmode)
24378 return TARGET_64BIT;
24380 return default_scalar_mode_supported_p (mode);
24383 /* Implements target hook vector_mode_supported_p. */
24385 ix86_vector_mode_supported_p (enum machine_mode mode)
24387 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24389 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24391 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24393 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24398 /* Target hook for c_mode_for_suffix. */
24399 static enum machine_mode
24400 ix86_c_mode_for_suffix (char suffix)
24402 if (TARGET_64BIT && suffix == 'q')
24404 if (TARGET_MMX && suffix == 'w')
24410 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24412 We do this in the new i386 backend to maintain source compatibility
24413 with the old cc0-based compiler. */
24416 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24417 tree inputs ATTRIBUTE_UNUSED,
24420 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24422 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24427 /* Implements target vector targetm.asm.encode_section_info. This
24428 is not used by netware. */
24430 static void ATTRIBUTE_UNUSED
24431 ix86_encode_section_info (tree decl, rtx rtl, int first)
24433 default_encode_section_info (decl, rtl, first);
24435 if (TREE_CODE (decl) == VAR_DECL
24436 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24437 && ix86_in_large_data_p (decl))
24438 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24441 /* Worker function for REVERSE_CONDITION. */
24444 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24446 return (mode != CCFPmode && mode != CCFPUmode
24447 ? reverse_condition (code)
24448 : reverse_condition_maybe_unordered (code));
24451 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24455 output_387_reg_move (rtx insn, rtx *operands)
24457 if (REG_P (operands[0]))
24459 if (REG_P (operands[1])
24460 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24462 if (REGNO (operands[0]) == FIRST_STACK_REG)
24463 return output_387_ffreep (operands, 0);
24464 return "fstp\t%y0";
24466 if (STACK_TOP_P (operands[0]))
24467 return "fld%z1\t%y1";
24470 else if (MEM_P (operands[0]))
24472 gcc_assert (REG_P (operands[1]));
24473 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24474 return "fstp%z0\t%y0";
24477 /* There is no non-popping store to memory for XFmode.
24478 So if we need one, follow the store with a load. */
24479 if (GET_MODE (operands[0]) == XFmode)
24480 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24482 return "fst%z0\t%y0";
24489 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24490 FP status register is set. */
24493 ix86_emit_fp_unordered_jump (rtx label)
24495 rtx reg = gen_reg_rtx (HImode);
24498 emit_insn (gen_x86_fnstsw_1 (reg));
24500 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24502 emit_insn (gen_x86_sahf_1 (reg));
24504 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24505 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24509 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24511 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24512 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24515 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24516 gen_rtx_LABEL_REF (VOIDmode, label),
24518 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24520 emit_jump_insn (temp);
24521 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24524 /* Output code to perform a log1p XFmode calculation. */
24526 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24528 rtx label1 = gen_label_rtx ();
24529 rtx label2 = gen_label_rtx ();
24531 rtx tmp = gen_reg_rtx (XFmode);
24532 rtx tmp2 = gen_reg_rtx (XFmode);
24534 emit_insn (gen_absxf2 (tmp, op1));
24535 emit_insn (gen_cmpxf (tmp,
24536 CONST_DOUBLE_FROM_REAL_VALUE (
24537 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24539 emit_jump_insn (gen_bge (label1));
24541 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24542 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24543 emit_jump (label2);
24545 emit_label (label1);
24546 emit_move_insn (tmp, CONST1_RTX (XFmode));
24547 emit_insn (gen_addxf3 (tmp, op1, tmp));
24548 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24549 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24551 emit_label (label2);
24554 /* Output code to perform a Newton-Rhapson approximation of a single precision
24555 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24557 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24559 rtx x0, x1, e0, e1, two;
24561 x0 = gen_reg_rtx (mode);
24562 e0 = gen_reg_rtx (mode);
24563 e1 = gen_reg_rtx (mode);
24564 x1 = gen_reg_rtx (mode);
24566 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24568 if (VECTOR_MODE_P (mode))
24569 two = ix86_build_const_vector (SFmode, true, two);
24571 two = force_reg (mode, two);
24573 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24575 /* x0 = rcp(b) estimate */
24576 emit_insn (gen_rtx_SET (VOIDmode, x0,
24577 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24580 emit_insn (gen_rtx_SET (VOIDmode, e0,
24581 gen_rtx_MULT (mode, x0, b)));
24583 emit_insn (gen_rtx_SET (VOIDmode, e1,
24584 gen_rtx_MINUS (mode, two, e0)));
24586 emit_insn (gen_rtx_SET (VOIDmode, x1,
24587 gen_rtx_MULT (mode, x0, e1)));
24589 emit_insn (gen_rtx_SET (VOIDmode, res,
24590 gen_rtx_MULT (mode, a, x1)));
24593 /* Output code to perform a Newton-Rhapson approximation of a
24594 single precision floating point [reciprocal] square root. */
24596 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24599 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24602 x0 = gen_reg_rtx (mode);
24603 e0 = gen_reg_rtx (mode);
24604 e1 = gen_reg_rtx (mode);
24605 e2 = gen_reg_rtx (mode);
24606 e3 = gen_reg_rtx (mode);
24608 real_from_integer (&r, VOIDmode, -3, -1, 0);
24609 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24611 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24612 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24614 if (VECTOR_MODE_P (mode))
24616 mthree = ix86_build_const_vector (SFmode, true, mthree);
24617 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24620 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24621 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24623 /* x0 = rsqrt(a) estimate */
24624 emit_insn (gen_rtx_SET (VOIDmode, x0,
24625 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24628 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24633 zero = gen_reg_rtx (mode);
24634 mask = gen_reg_rtx (mode);
24636 zero = force_reg (mode, CONST0_RTX(mode));
24637 emit_insn (gen_rtx_SET (VOIDmode, mask,
24638 gen_rtx_NE (mode, zero, a)));
24640 emit_insn (gen_rtx_SET (VOIDmode, x0,
24641 gen_rtx_AND (mode, x0, mask)));
24645 emit_insn (gen_rtx_SET (VOIDmode, e0,
24646 gen_rtx_MULT (mode, x0, a)));
24648 emit_insn (gen_rtx_SET (VOIDmode, e1,
24649 gen_rtx_MULT (mode, e0, x0)));
24652 mthree = force_reg (mode, mthree);
24653 emit_insn (gen_rtx_SET (VOIDmode, e2,
24654 gen_rtx_PLUS (mode, e1, mthree)));
24656 mhalf = force_reg (mode, mhalf);
24658 /* e3 = -.5 * x0 */
24659 emit_insn (gen_rtx_SET (VOIDmode, e3,
24660 gen_rtx_MULT (mode, x0, mhalf)));
24662 /* e3 = -.5 * e0 */
24663 emit_insn (gen_rtx_SET (VOIDmode, e3,
24664 gen_rtx_MULT (mode, e0, mhalf)));
24665 /* ret = e2 * e3 */
24666 emit_insn (gen_rtx_SET (VOIDmode, res,
24667 gen_rtx_MULT (mode, e2, e3)));
24670 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24672 static void ATTRIBUTE_UNUSED
24673 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24676 /* With Binutils 2.15, the "@unwind" marker must be specified on
24677 every occurrence of the ".eh_frame" section, not just the first
24680 && strcmp (name, ".eh_frame") == 0)
24682 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24683 flags & SECTION_WRITE ? "aw" : "a");
24686 default_elf_asm_named_section (name, flags, decl);
24689 /* Return the mangling of TYPE if it is an extended fundamental type. */
24691 static const char *
24692 ix86_mangle_type (const_tree type)
24694 type = TYPE_MAIN_VARIANT (type);
24696 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24697 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24700 switch (TYPE_MODE (type))
24703 /* __float128 is "g". */
24706 /* "long double" or __float80 is "e". */
24713 /* For 32-bit code we can save PIC register setup by using
24714 __stack_chk_fail_local hidden function instead of calling
24715 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24716 register, so it is better to call __stack_chk_fail directly. */
24719 ix86_stack_protect_fail (void)
24721 return TARGET_64BIT
24722 ? default_external_stack_protect_fail ()
24723 : default_hidden_stack_protect_fail ();
24726 /* Select a format to encode pointers in exception handling data. CODE
24727 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24728 true if the symbol may be affected by dynamic relocations.
24730 ??? All x86 object file formats are capable of representing this.
24731 After all, the relocation needed is the same as for the call insn.
24732 Whether or not a particular assembler allows us to enter such, I
24733 guess we'll have to see. */
24735 asm_preferred_eh_data_format (int code, int global)
24739 int type = DW_EH_PE_sdata8;
24741 || ix86_cmodel == CM_SMALL_PIC
24742 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24743 type = DW_EH_PE_sdata4;
24744 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24746 if (ix86_cmodel == CM_SMALL
24747 || (ix86_cmodel == CM_MEDIUM && code))
24748 return DW_EH_PE_udata4;
24749 return DW_EH_PE_absptr;
24752 /* Expand copysign from SIGN to the positive value ABS_VALUE
24753 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24756 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24758 enum machine_mode mode = GET_MODE (sign);
24759 rtx sgn = gen_reg_rtx (mode);
24760 if (mask == NULL_RTX)
24762 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24763 if (!VECTOR_MODE_P (mode))
24765 /* We need to generate a scalar mode mask in this case. */
24766 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24767 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24768 mask = gen_reg_rtx (mode);
24769 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24773 mask = gen_rtx_NOT (mode, mask);
24774 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24775 gen_rtx_AND (mode, mask, sign)));
24776 emit_insn (gen_rtx_SET (VOIDmode, result,
24777 gen_rtx_IOR (mode, abs_value, sgn)));
24780 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24781 mask for masking out the sign-bit is stored in *SMASK, if that is
24784 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24786 enum machine_mode mode = GET_MODE (op0);
24789 xa = gen_reg_rtx (mode);
24790 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24791 if (!VECTOR_MODE_P (mode))
24793 /* We need to generate a scalar mode mask in this case. */
24794 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24795 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24796 mask = gen_reg_rtx (mode);
24797 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24799 emit_insn (gen_rtx_SET (VOIDmode, xa,
24800 gen_rtx_AND (mode, op0, mask)));
24808 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24809 swapping the operands if SWAP_OPERANDS is true. The expanded
24810 code is a forward jump to a newly created label in case the
24811 comparison is true. The generated label rtx is returned. */
24813 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24814 bool swap_operands)
24825 label = gen_label_rtx ();
24826 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24827 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24828 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24829 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24830 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24831 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24832 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24833 JUMP_LABEL (tmp) = label;
24838 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24839 using comparison code CODE. Operands are swapped for the comparison if
24840 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24842 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24843 bool swap_operands)
24845 enum machine_mode mode = GET_MODE (op0);
24846 rtx mask = gen_reg_rtx (mode);
24855 if (mode == DFmode)
24856 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24857 gen_rtx_fmt_ee (code, mode, op0, op1)));
24859 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24860 gen_rtx_fmt_ee (code, mode, op0, op1)));
24865 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24866 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24868 ix86_gen_TWO52 (enum machine_mode mode)
24870 REAL_VALUE_TYPE TWO52r;
24873 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24874 TWO52 = const_double_from_real_value (TWO52r, mode);
24875 TWO52 = force_reg (mode, TWO52);
24880 /* Expand SSE sequence for computing lround from OP1 storing
24883 ix86_expand_lround (rtx op0, rtx op1)
24885 /* C code for the stuff we're doing below:
24886 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24889 enum machine_mode mode = GET_MODE (op1);
24890 const struct real_format *fmt;
24891 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24894 /* load nextafter (0.5, 0.0) */
24895 fmt = REAL_MODE_FORMAT (mode);
24896 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24897 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24899 /* adj = copysign (0.5, op1) */
24900 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24901 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24903 /* adj = op1 + adj */
24904 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24906 /* op0 = (imode)adj */
24907 expand_fix (op0, adj, 0);
24910 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24913 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24915 /* C code for the stuff we're doing below (for do_floor):
24917 xi -= (double)xi > op1 ? 1 : 0;
24920 enum machine_mode fmode = GET_MODE (op1);
24921 enum machine_mode imode = GET_MODE (op0);
24922 rtx ireg, freg, label, tmp;
24924 /* reg = (long)op1 */
24925 ireg = gen_reg_rtx (imode);
24926 expand_fix (ireg, op1, 0);
24928 /* freg = (double)reg */
24929 freg = gen_reg_rtx (fmode);
24930 expand_float (freg, ireg, 0);
24932 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24933 label = ix86_expand_sse_compare_and_jump (UNLE,
24934 freg, op1, !do_floor);
24935 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24936 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24937 emit_move_insn (ireg, tmp);
24939 emit_label (label);
24940 LABEL_NUSES (label) = 1;
24942 emit_move_insn (op0, ireg);
24945 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24946 result in OPERAND0. */
24948 ix86_expand_rint (rtx operand0, rtx operand1)
24950 /* C code for the stuff we're doing below:
24951 xa = fabs (operand1);
24952 if (!isless (xa, 2**52))
24954 xa = xa + 2**52 - 2**52;
24955 return copysign (xa, operand1);
24957 enum machine_mode mode = GET_MODE (operand0);
24958 rtx res, xa, label, TWO52, mask;
24960 res = gen_reg_rtx (mode);
24961 emit_move_insn (res, operand1);
24963 /* xa = abs (operand1) */
24964 xa = ix86_expand_sse_fabs (res, &mask);
24966 /* if (!isless (xa, TWO52)) goto label; */
24967 TWO52 = ix86_gen_TWO52 (mode);
24968 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24970 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24971 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24973 ix86_sse_copysign_to_positive (res, xa, res, mask);
24975 emit_label (label);
24976 LABEL_NUSES (label) = 1;
24978 emit_move_insn (operand0, res);
24981 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24984 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24986 /* C code for the stuff we expand below.
24987 double xa = fabs (x), x2;
24988 if (!isless (xa, TWO52))
24990 xa = xa + TWO52 - TWO52;
24991 x2 = copysign (xa, x);
25000 enum machine_mode mode = GET_MODE (operand0);
25001 rtx xa, TWO52, tmp, label, one, res, mask;
25003 TWO52 = ix86_gen_TWO52 (mode);
25005 /* Temporary for holding the result, initialized to the input
25006 operand to ease control flow. */
25007 res = gen_reg_rtx (mode);
25008 emit_move_insn (res, operand1);
25010 /* xa = abs (operand1) */
25011 xa = ix86_expand_sse_fabs (res, &mask);
25013 /* if (!isless (xa, TWO52)) goto label; */
25014 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25016 /* xa = xa + TWO52 - TWO52; */
25017 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25018 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25020 /* xa = copysign (xa, operand1) */
25021 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25023 /* generate 1.0 or -1.0 */
25024 one = force_reg (mode,
25025 const_double_from_real_value (do_floor
25026 ? dconst1 : dconstm1, mode));
25028 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25029 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25030 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25031 gen_rtx_AND (mode, one, tmp)));
25032 /* We always need to subtract here to preserve signed zero. */
25033 tmp = expand_simple_binop (mode, MINUS,
25034 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25035 emit_move_insn (res, tmp);
25037 emit_label (label);
25038 LABEL_NUSES (label) = 1;
25040 emit_move_insn (operand0, res);
25043 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25046 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25048 /* C code for the stuff we expand below.
25049 double xa = fabs (x), x2;
25050 if (!isless (xa, TWO52))
25052 x2 = (double)(long)x;
25059 if (HONOR_SIGNED_ZEROS (mode))
25060 return copysign (x2, x);
25063 enum machine_mode mode = GET_MODE (operand0);
25064 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25066 TWO52 = ix86_gen_TWO52 (mode);
25068 /* Temporary for holding the result, initialized to the input
25069 operand to ease control flow. */
25070 res = gen_reg_rtx (mode);
25071 emit_move_insn (res, operand1);
25073 /* xa = abs (operand1) */
25074 xa = ix86_expand_sse_fabs (res, &mask);
25076 /* if (!isless (xa, TWO52)) goto label; */
25077 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25079 /* xa = (double)(long)x */
25080 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25081 expand_fix (xi, res, 0);
25082 expand_float (xa, xi, 0);
25085 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25087 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25088 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25089 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25090 gen_rtx_AND (mode, one, tmp)));
25091 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25092 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25093 emit_move_insn (res, tmp);
25095 if (HONOR_SIGNED_ZEROS (mode))
25096 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25098 emit_label (label);
25099 LABEL_NUSES (label) = 1;
25101 emit_move_insn (operand0, res);
25104 /* Expand SSE sequence for computing round from OPERAND1 storing
25105 into OPERAND0. Sequence that works without relying on DImode truncation
25106 via cvttsd2siq that is only available on 64bit targets. */
25108 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25110 /* C code for the stuff we expand below.
25111 double xa = fabs (x), xa2, x2;
25112 if (!isless (xa, TWO52))
25114 Using the absolute value and copying back sign makes
25115 -0.0 -> -0.0 correct.
25116 xa2 = xa + TWO52 - TWO52;
25121 else if (dxa > 0.5)
25123 x2 = copysign (xa2, x);
25126 enum machine_mode mode = GET_MODE (operand0);
25127 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25129 TWO52 = ix86_gen_TWO52 (mode);
25131 /* Temporary for holding the result, initialized to the input
25132 operand to ease control flow. */
25133 res = gen_reg_rtx (mode);
25134 emit_move_insn (res, operand1);
25136 /* xa = abs (operand1) */
25137 xa = ix86_expand_sse_fabs (res, &mask);
25139 /* if (!isless (xa, TWO52)) goto label; */
25140 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25142 /* xa2 = xa + TWO52 - TWO52; */
25143 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25144 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25146 /* dxa = xa2 - xa; */
25147 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25149 /* generate 0.5, 1.0 and -0.5 */
25150 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25151 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25152 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25156 tmp = gen_reg_rtx (mode);
25157 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25158 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25159 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25160 gen_rtx_AND (mode, one, tmp)));
25161 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25162 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25163 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25164 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25165 gen_rtx_AND (mode, one, tmp)));
25166 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25168 /* res = copysign (xa2, operand1) */
25169 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25171 emit_label (label);
25172 LABEL_NUSES (label) = 1;
25174 emit_move_insn (operand0, res);
25177 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25180 ix86_expand_trunc (rtx operand0, rtx operand1)
25182 /* C code for SSE variant we expand below.
25183 double xa = fabs (x), x2;
25184 if (!isless (xa, TWO52))
25186 x2 = (double)(long)x;
25187 if (HONOR_SIGNED_ZEROS (mode))
25188 return copysign (x2, x);
25191 enum machine_mode mode = GET_MODE (operand0);
25192 rtx xa, xi, TWO52, label, res, mask;
25194 TWO52 = ix86_gen_TWO52 (mode);
25196 /* Temporary for holding the result, initialized to the input
25197 operand to ease control flow. */
25198 res = gen_reg_rtx (mode);
25199 emit_move_insn (res, operand1);
25201 /* xa = abs (operand1) */
25202 xa = ix86_expand_sse_fabs (res, &mask);
25204 /* if (!isless (xa, TWO52)) goto label; */
25205 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25207 /* x = (double)(long)x */
25208 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25209 expand_fix (xi, res, 0);
25210 expand_float (res, xi, 0);
25212 if (HONOR_SIGNED_ZEROS (mode))
25213 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25215 emit_label (label);
25216 LABEL_NUSES (label) = 1;
25218 emit_move_insn (operand0, res);
25221 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25224 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25226 enum machine_mode mode = GET_MODE (operand0);
25227 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25229 /* C code for SSE variant we expand below.
25230 double xa = fabs (x), x2;
25231 if (!isless (xa, TWO52))
25233 xa2 = xa + TWO52 - TWO52;
25237 x2 = copysign (xa2, x);
25241 TWO52 = ix86_gen_TWO52 (mode);
25243 /* Temporary for holding the result, initialized to the input
25244 operand to ease control flow. */
25245 res = gen_reg_rtx (mode);
25246 emit_move_insn (res, operand1);
25248 /* xa = abs (operand1) */
25249 xa = ix86_expand_sse_fabs (res, &smask);
25251 /* if (!isless (xa, TWO52)) goto label; */
25252 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25254 /* res = xa + TWO52 - TWO52; */
25255 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25256 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25257 emit_move_insn (res, tmp);
25260 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25262 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25263 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25264 emit_insn (gen_rtx_SET (VOIDmode, mask,
25265 gen_rtx_AND (mode, mask, one)));
25266 tmp = expand_simple_binop (mode, MINUS,
25267 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25268 emit_move_insn (res, tmp);
25270 /* res = copysign (res, operand1) */
25271 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25273 emit_label (label);
25274 LABEL_NUSES (label) = 1;
25276 emit_move_insn (operand0, res);
25279 /* Expand SSE sequence for computing round from OPERAND1 storing
25282 ix86_expand_round (rtx operand0, rtx operand1)
25284 /* C code for the stuff we're doing below:
25285 double xa = fabs (x);
25286 if (!isless (xa, TWO52))
25288 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25289 return copysign (xa, x);
25291 enum machine_mode mode = GET_MODE (operand0);
25292 rtx res, TWO52, xa, label, xi, half, mask;
25293 const struct real_format *fmt;
25294 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25296 /* Temporary for holding the result, initialized to the input
25297 operand to ease control flow. */
25298 res = gen_reg_rtx (mode);
25299 emit_move_insn (res, operand1);
25301 TWO52 = ix86_gen_TWO52 (mode);
25302 xa = ix86_expand_sse_fabs (res, &mask);
25303 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25305 /* load nextafter (0.5, 0.0) */
25306 fmt = REAL_MODE_FORMAT (mode);
25307 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25308 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25310 /* xa = xa + 0.5 */
25311 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25312 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25314 /* xa = (double)(int64_t)xa */
25315 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25316 expand_fix (xi, xa, 0);
25317 expand_float (xa, xi, 0);
25319 /* res = copysign (xa, operand1) */
25320 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25322 emit_label (label);
25323 LABEL_NUSES (label) = 1;
25325 emit_move_insn (operand0, res);
25329 /* Validate whether a SSE5 instruction is valid or not.
25330 OPERANDS is the array of operands.
25331 NUM is the number of operands.
25332 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25333 NUM_MEMORY is the maximum number of memory operands to accept. */
25335 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
25341 /* Count the number of memory arguments */
25344 for (i = 0; i < num; i++)
25346 enum machine_mode mode = GET_MODE (operands[i]);
25347 if (register_operand (operands[i], mode))
25350 else if (memory_operand (operands[i], mode))
25352 mem_mask |= (1 << i);
25358 rtx pattern = PATTERN (insn);
25360 /* allow 0 for pcmov */
25361 if (GET_CODE (pattern) != SET
25362 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25364 || operands[i] != CONST0_RTX (mode))
25369 /* If there were no memory operations, allow the insn */
25373 /* Do not allow the destination register to be a memory operand. */
25374 else if (mem_mask & (1 << 0))
25377 /* If there are too many memory operations, disallow the instruction. While
25378 the hardware only allows 1 memory reference, before register allocation
25379 for some insns, we allow two memory operations sometimes in order to allow
25380 code like the following to be optimized:
25382 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25384 or similar cases that are vectorized into using the fmaddss
25386 else if (mem_count > num_memory)
25389 /* Don't allow more than one memory operation if not optimizing. */
25390 else if (mem_count > 1 && !optimize)
25393 else if (num == 4 && mem_count == 1)
25395 /* formats (destination is the first argument), example fmaddss:
25396 xmm1, xmm1, xmm2, xmm3/mem
25397 xmm1, xmm1, xmm2/mem, xmm3
25398 xmm1, xmm2, xmm3/mem, xmm1
25399 xmm1, xmm2/mem, xmm3, xmm1 */
25401 return ((mem_mask == (1 << 1))
25402 || (mem_mask == (1 << 2))
25403 || (mem_mask == (1 << 3)));
25405 /* format, example pmacsdd:
25406 xmm1, xmm2, xmm3/mem, xmm1 */
25408 return (mem_mask == (1 << 2));
25411 else if (num == 4 && num_memory == 2)
25413 /* If there are two memory operations, we can load one of the memory ops
25414 into the destination register. This is for optimizing the
25415 multiply/add ops, which the combiner has optimized both the multiply
25416 and the add insns to have a memory operation. We have to be careful
25417 that the destination doesn't overlap with the inputs. */
25418 rtx op0 = operands[0];
25420 if (reg_mentioned_p (op0, operands[1])
25421 || reg_mentioned_p (op0, operands[2])
25422 || reg_mentioned_p (op0, operands[3]))
25425 /* formats (destination is the first argument), example fmaddss:
25426 xmm1, xmm1, xmm2, xmm3/mem
25427 xmm1, xmm1, xmm2/mem, xmm3
25428 xmm1, xmm2, xmm3/mem, xmm1
25429 xmm1, xmm2/mem, xmm3, xmm1
25431 For the oc0 case, we will load either operands[1] or operands[3] into
25432 operands[0], so any combination of 2 memory operands is ok. */
25436 /* format, example pmacsdd:
25437 xmm1, xmm2, xmm3/mem, xmm1
25439 For the integer multiply/add instructions be more restrictive and
25440 require operands[2] and operands[3] to be the memory operands. */
25442 return (mem_mask == ((1 << 2) | (1 << 3)));
25445 else if (num == 3 && num_memory == 1)
25447 /* formats, example protb:
25448 xmm1, xmm2, xmm3/mem
25449 xmm1, xmm2/mem, xmm3 */
25451 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25453 /* format, example comeq:
25454 xmm1, xmm2, xmm3/mem */
25456 return (mem_mask == (1 << 2));
25460 gcc_unreachable ();
25466 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25467 hardware will allow by using the destination register to load one of the
25468 memory operations. Presently this is used by the multiply/add routines to
25469 allow 2 memory references. */
25472 ix86_expand_sse5_multiple_memory (rtx operands[],
25474 enum machine_mode mode)
25476 rtx op0 = operands[0];
25478 || memory_operand (op0, mode)
25479 || reg_mentioned_p (op0, operands[1])
25480 || reg_mentioned_p (op0, operands[2])
25481 || reg_mentioned_p (op0, operands[3]))
25482 gcc_unreachable ();
25484 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25485 the destination register. */
25486 if (memory_operand (operands[1], mode))
25488 emit_move_insn (op0, operands[1]);
25491 else if (memory_operand (operands[3], mode))
25493 emit_move_insn (op0, operands[3]);
25497 gcc_unreachable ();
25503 /* Table of valid machine attributes. */
25504 static const struct attribute_spec ix86_attribute_table[] =
25506 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25507 /* Stdcall attribute says callee is responsible for popping arguments
25508 if they are not variable. */
25509 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25510 /* Fastcall attribute says callee is responsible for popping arguments
25511 if they are not variable. */
25512 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25513 /* Cdecl attribute says the callee is a normal C declaration */
25514 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25515 /* Regparm attribute specifies how many integer arguments are to be
25516 passed in registers. */
25517 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25518 /* Sseregparm attribute says we are using x86_64 calling conventions
25519 for FP arguments. */
25520 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25521 /* force_align_arg_pointer says this function realigns the stack at entry. */
25522 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25523 false, true, true, ix86_handle_cconv_attribute },
25524 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25525 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25526 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25527 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25529 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25530 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25531 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25532 SUBTARGET_ATTRIBUTE_TABLE,
25534 { NULL, 0, 0, false, false, false, NULL }
25537 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25539 x86_builtin_vectorization_cost (bool runtime_test)
25541 /* If the branch of the runtime test is taken - i.e. - the vectorized
25542 version is skipped - this incurs a misprediction cost (because the
25543 vectorized version is expected to be the fall-through). So we subtract
25544 the latency of a mispredicted branch from the costs that are incured
25545 when the vectorized version is executed.
25547 TODO: The values in individual target tables have to be tuned or new
25548 fields may be needed. For eg. on K8, the default branch path is the
25549 not-taken path. If the taken path is predicted correctly, the minimum
25550 penalty of going down the taken-path is 1 cycle. If the taken-path is
25551 not predicted correctly, then the minimum penalty is 10 cycles. */
25555 return (-(ix86_cost->cond_taken_branch_cost));
25561 /* Initialize the GCC target structure. */
25562 #undef TARGET_ATTRIBUTE_TABLE
25563 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25564 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25565 # undef TARGET_MERGE_DECL_ATTRIBUTES
25566 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25569 #undef TARGET_COMP_TYPE_ATTRIBUTES
25570 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25572 #undef TARGET_INIT_BUILTINS
25573 #define TARGET_INIT_BUILTINS ix86_init_builtins
25574 #undef TARGET_EXPAND_BUILTIN
25575 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25577 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25578 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25579 ix86_builtin_vectorized_function
25581 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25582 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25584 #undef TARGET_BUILTIN_RECIPROCAL
25585 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25587 #undef TARGET_ASM_FUNCTION_EPILOGUE
25588 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25590 #undef TARGET_ENCODE_SECTION_INFO
25591 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25592 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25594 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25597 #undef TARGET_ASM_OPEN_PAREN
25598 #define TARGET_ASM_OPEN_PAREN ""
25599 #undef TARGET_ASM_CLOSE_PAREN
25600 #define TARGET_ASM_CLOSE_PAREN ""
25602 #undef TARGET_ASM_ALIGNED_HI_OP
25603 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25604 #undef TARGET_ASM_ALIGNED_SI_OP
25605 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25607 #undef TARGET_ASM_ALIGNED_DI_OP
25608 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25611 #undef TARGET_ASM_UNALIGNED_HI_OP
25612 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25613 #undef TARGET_ASM_UNALIGNED_SI_OP
25614 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25615 #undef TARGET_ASM_UNALIGNED_DI_OP
25616 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25618 #undef TARGET_SCHED_ADJUST_COST
25619 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25620 #undef TARGET_SCHED_ISSUE_RATE
25621 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25622 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25623 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25624 ia32_multipass_dfa_lookahead
25626 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25627 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25630 #undef TARGET_HAVE_TLS
25631 #define TARGET_HAVE_TLS true
25633 #undef TARGET_CANNOT_FORCE_CONST_MEM
25634 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25635 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25636 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25638 #undef TARGET_DELEGITIMIZE_ADDRESS
25639 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25641 #undef TARGET_MS_BITFIELD_LAYOUT_P
25642 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25645 #undef TARGET_BINDS_LOCAL_P
25646 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25648 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25649 #undef TARGET_BINDS_LOCAL_P
25650 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25653 #undef TARGET_ASM_OUTPUT_MI_THUNK
25654 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25655 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25656 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25658 #undef TARGET_ASM_FILE_START
25659 #define TARGET_ASM_FILE_START x86_file_start
25661 #undef TARGET_DEFAULT_TARGET_FLAGS
25662 #define TARGET_DEFAULT_TARGET_FLAGS \
25664 | TARGET_SUBTARGET_DEFAULT \
25665 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25667 #undef TARGET_HANDLE_OPTION
25668 #define TARGET_HANDLE_OPTION ix86_handle_option
25670 #undef TARGET_RTX_COSTS
25671 #define TARGET_RTX_COSTS ix86_rtx_costs
25672 #undef TARGET_ADDRESS_COST
25673 #define TARGET_ADDRESS_COST ix86_address_cost
25675 #undef TARGET_FIXED_CONDITION_CODE_REGS
25676 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25677 #undef TARGET_CC_MODES_COMPATIBLE
25678 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25680 #undef TARGET_MACHINE_DEPENDENT_REORG
25681 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25683 #undef TARGET_BUILD_BUILTIN_VA_LIST
25684 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25686 #undef TARGET_EXPAND_BUILTIN_VA_START
25687 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25689 #undef TARGET_MD_ASM_CLOBBERS
25690 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25692 #undef TARGET_PROMOTE_PROTOTYPES
25693 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25694 #undef TARGET_STRUCT_VALUE_RTX
25695 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25696 #undef TARGET_SETUP_INCOMING_VARARGS
25697 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25698 #undef TARGET_MUST_PASS_IN_STACK
25699 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25700 #undef TARGET_PASS_BY_REFERENCE
25701 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25702 #undef TARGET_INTERNAL_ARG_POINTER
25703 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25704 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25705 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25706 #undef TARGET_STRICT_ARGUMENT_NAMING
25707 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25709 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25710 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25712 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25713 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25715 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25716 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25718 #undef TARGET_C_MODE_FOR_SUFFIX
25719 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25722 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25723 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25726 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25727 #undef TARGET_INSERT_ATTRIBUTES
25728 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25731 #undef TARGET_MANGLE_TYPE
25732 #define TARGET_MANGLE_TYPE ix86_mangle_type
25734 #undef TARGET_STACK_PROTECT_FAIL
25735 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25737 #undef TARGET_FUNCTION_VALUE
25738 #define TARGET_FUNCTION_VALUE ix86_function_value
25740 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25741 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25743 struct gcc_target targetm = TARGET_INITIALIZER;
25745 #include "gt-i386.h"