1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1227 /* X86_TUNE_USE_BIT_TEST */
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1288 /* X86_TUNE_READ_MODIFY */
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1376 /* X86_TUNE_USE_FFREEP */
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1393 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1439 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1440 with a subsequent conditional jump instruction into a single
1441 compare-and-branch uop. */
1445 /* Feature tests against the various architecture variations. */
1446 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1447 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1448 ~(m_386 | m_486 | m_PENT | m_K6),
1450 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1453 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1456 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1459 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1463 static const unsigned int x86_accumulate_outgoing_args
1464 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1466 static const unsigned int x86_arch_always_fancy_math_387
1467 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1468 | m_NOCONA | m_CORE2 | m_GENERIC;
1470 static enum stringop_alg stringop_alg = no_stringop;
1472 /* In case the average insn count for single function invocation is
1473 lower than this constant, emit fast (but longer) prologue and
1475 #define FAST_PROLOGUE_INSN_COUNT 20
1477 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1478 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1479 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1480 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1482 /* Array of the smallest class containing reg number REGNO, indexed by
1483 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1485 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1487 /* ax, dx, cx, bx */
1488 AREG, DREG, CREG, BREG,
1489 /* si, di, bp, sp */
1490 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1492 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1493 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1496 /* flags, fpsr, fpcr, frame */
1497 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1499 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1502 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1505 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1506 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1507 /* SSE REX registers */
1508 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1512 /* The "default" register map used in 32bit mode. */
1514 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1516 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1517 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1519 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1520 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1521 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1522 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1525 static int const x86_64_int_parameter_registers[6] =
1527 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1531 static int const x86_64_ms_abi_int_parameter_registers[4] =
1533 2 /*RCX*/, 1 /*RDX*/,
1534 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1537 static int const x86_64_int_return_registers[4] =
1539 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1542 /* The "default" register map used in 64bit mode. */
1543 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1545 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1546 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1547 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1548 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1549 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1550 8,9,10,11,12,13,14,15, /* extended integer registers */
1551 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1554 /* Define the register numbers to be used in Dwarf debugging information.
1555 The SVR4 reference port C compiler uses the following register numbers
1556 in its Dwarf output code:
1557 0 for %eax (gcc regno = 0)
1558 1 for %ecx (gcc regno = 2)
1559 2 for %edx (gcc regno = 1)
1560 3 for %ebx (gcc regno = 3)
1561 4 for %esp (gcc regno = 7)
1562 5 for %ebp (gcc regno = 6)
1563 6 for %esi (gcc regno = 4)
1564 7 for %edi (gcc regno = 5)
1565 The following three DWARF register numbers are never generated by
1566 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1567 believes these numbers have these meanings.
1568 8 for %eip (no gcc equivalent)
1569 9 for %eflags (gcc regno = 17)
1570 10 for %trapno (no gcc equivalent)
1571 It is not at all clear how we should number the FP stack registers
1572 for the x86 architecture. If the version of SDB on x86/svr4 were
1573 a bit less brain dead with respect to floating-point then we would
1574 have a precedent to follow with respect to DWARF register numbers
1575 for x86 FP registers, but the SDB on x86/svr4 is so completely
1576 broken with respect to FP registers that it is hardly worth thinking
1577 of it as something to strive for compatibility with.
1578 The version of x86/svr4 SDB I have at the moment does (partially)
1579 seem to believe that DWARF register number 11 is associated with
1580 the x86 register %st(0), but that's about all. Higher DWARF
1581 register numbers don't seem to be associated with anything in
1582 particular, and even for DWARF regno 11, SDB only seems to under-
1583 stand that it should say that a variable lives in %st(0) (when
1584 asked via an `=' command) if we said it was in DWARF regno 11,
1585 but SDB still prints garbage when asked for the value of the
1586 variable in question (via a `/' command).
1587 (Also note that the labels SDB prints for various FP stack regs
1588 when doing an `x' command are all wrong.)
1589 Note that these problems generally don't affect the native SVR4
1590 C compiler because it doesn't allow the use of -O with -g and
1591 because when it is *not* optimizing, it allocates a memory
1592 location for each floating-point variable, and the memory
1593 location is what gets described in the DWARF AT_location
1594 attribute for the variable in question.
1595 Regardless of the severe mental illness of the x86/svr4 SDB, we
1596 do something sensible here and we use the following DWARF
1597 register numbers. Note that these are all stack-top-relative
1599 11 for %st(0) (gcc regno = 8)
1600 12 for %st(1) (gcc regno = 9)
1601 13 for %st(2) (gcc regno = 10)
1602 14 for %st(3) (gcc regno = 11)
1603 15 for %st(4) (gcc regno = 12)
1604 16 for %st(5) (gcc regno = 13)
1605 17 for %st(6) (gcc regno = 14)
1606 18 for %st(7) (gcc regno = 15)
1608 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1610 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1611 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1612 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1613 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1614 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1615 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1616 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1619 /* Test and compare insns in i386.md store the information needed to
1620 generate branch and scc insns here. */
1622 rtx ix86_compare_op0 = NULL_RTX;
1623 rtx ix86_compare_op1 = NULL_RTX;
1624 rtx ix86_compare_emitted = NULL_RTX;
1626 /* Size of the register save area. */
1627 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1629 /* Define the structure for the machine field in struct function. */
1631 struct stack_local_entry GTY(())
1633 unsigned short mode;
1636 struct stack_local_entry *next;
1639 /* Structure describing stack frame layout.
1640 Stack grows downward:
1646 saved frame pointer if frame_pointer_needed
1647 <- HARD_FRAME_POINTER
1652 [va_arg registers] (
1653 > to_allocate <- FRAME_POINTER
1663 HOST_WIDE_INT frame;
1665 int outgoing_arguments_size;
1668 HOST_WIDE_INT to_allocate;
1669 /* The offsets relative to ARG_POINTER. */
1670 HOST_WIDE_INT frame_pointer_offset;
1671 HOST_WIDE_INT hard_frame_pointer_offset;
1672 HOST_WIDE_INT stack_pointer_offset;
1674 /* When save_regs_using_mov is set, emit prologue using
1675 move instead of push instructions. */
1676 bool save_regs_using_mov;
1679 /* Code model option. */
1680 enum cmodel ix86_cmodel;
1682 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1684 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1686 /* Which unit we are generating floating point math for. */
1687 enum fpmath_unit ix86_fpmath;
1689 /* Which cpu are we scheduling for. */
1690 enum processor_type ix86_tune;
1692 /* Which instruction set architecture to use. */
1693 enum processor_type ix86_arch;
1695 /* true if sse prefetch instruction is not NOOP. */
1696 int x86_prefetch_sse;
1698 /* ix86_regparm_string as a number */
1699 static int ix86_regparm;
1701 /* -mstackrealign option */
1702 extern int ix86_force_align_arg_pointer;
1703 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1705 static rtx (*ix86_gen_leave) (void);
1706 static rtx (*ix86_gen_pop1) (rtx);
1707 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1708 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1709 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1710 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1711 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1713 /* Preferred alignment for stack boundary in bits. */
1714 unsigned int ix86_preferred_stack_boundary;
1716 /* Values 1-5: see jump.c */
1717 int ix86_branch_cost;
1719 /* Variables which are this size or smaller are put in the data/bss
1720 or ldata/lbss sections. */
1722 int ix86_section_threshold = 65536;
1724 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1725 char internal_label_prefix[16];
1726 int internal_label_prefix_len;
1728 /* Fence to use after loop using movnt. */
1731 /* Register class used for passing given 64bit part of the argument.
1732 These represent classes as documented by the PS ABI, with the exception
1733 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1734 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1736 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1737 whenever possible (upper half does contain padding). */
1738 enum x86_64_reg_class
1741 X86_64_INTEGER_CLASS,
1742 X86_64_INTEGERSI_CLASS,
1749 X86_64_COMPLEX_X87_CLASS,
1752 static const char * const x86_64_reg_class_name[] =
1754 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1755 "sseup", "x87", "x87up", "cplx87", "no"
1758 #define MAX_CLASSES 4
1760 /* Table of constants used by fldpi, fldln2, etc.... */
1761 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1762 static bool ext_80387_constants_init = 0;
1765 static struct machine_function * ix86_init_machine_status (void);
1766 static rtx ix86_function_value (const_tree, const_tree, bool);
1767 static int ix86_function_regparm (const_tree, const_tree);
1768 static void ix86_compute_frame_layout (struct ix86_frame *);
1769 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1773 /* The svr4 ABI for the i386 says that records and unions are returned
1775 #ifndef DEFAULT_PCC_STRUCT_RETURN
1776 #define DEFAULT_PCC_STRUCT_RETURN 1
1779 /* Bit flags that specify the ISA we are compiling for. */
1780 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1782 /* A mask of ix86_isa_flags that includes bit X if X
1783 was set or cleared on the command line. */
1784 static int ix86_isa_flags_explicit;
1786 /* Define a set of ISAs which are available when a given ISA is
1787 enabled. MMX and SSE ISAs are handled separately. */
1789 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1790 #define OPTION_MASK_ISA_3DNOW_SET \
1791 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1793 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1794 #define OPTION_MASK_ISA_SSE2_SET \
1795 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1796 #define OPTION_MASK_ISA_SSE3_SET \
1797 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1798 #define OPTION_MASK_ISA_SSSE3_SET \
1799 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1800 #define OPTION_MASK_ISA_SSE4_1_SET \
1801 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1802 #define OPTION_MASK_ISA_SSE4_2_SET \
1803 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1805 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1807 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1809 #define OPTION_MASK_ISA_SSE4A_SET \
1810 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1811 #define OPTION_MASK_ISA_SSE5_SET \
1812 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1814 /* Define a set of ISAs which aren't available when a given ISA is
1815 disabled. MMX and SSE ISAs are handled separately. */
1817 #define OPTION_MASK_ISA_MMX_UNSET \
1818 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1819 #define OPTION_MASK_ISA_3DNOW_UNSET \
1820 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1821 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1823 #define OPTION_MASK_ISA_SSE_UNSET \
1824 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1825 #define OPTION_MASK_ISA_SSE2_UNSET \
1826 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1827 #define OPTION_MASK_ISA_SSE3_UNSET \
1828 (OPTION_MASK_ISA_SSE3 \
1829 | OPTION_MASK_ISA_SSSE3_UNSET \
1830 | OPTION_MASK_ISA_SSE4A_UNSET )
1831 #define OPTION_MASK_ISA_SSSE3_UNSET \
1832 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1833 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1834 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1835 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1837 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1839 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1841 #define OPTION_MASK_ISA_SSE4A_UNSET \
1842 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1844 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1846 /* Vectorization library interface and handlers. */
1847 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1848 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1849 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1851 /* Implement TARGET_HANDLE_OPTION. */
1854 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1861 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1874 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1875 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1879 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1880 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1955 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1956 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1960 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1961 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1966 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1967 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1971 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1972 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1991 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1992 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1996 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1997 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2006 /* Sometimes certain combinations of command options do not make
2007 sense on a particular target machine. You can define a macro
2008 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2009 defined, is executed once just after all the command options have
2012 Don't use this macro to turn on various extra optimizations for
2013 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2016 override_options (void)
2019 int ix86_tune_defaulted = 0;
2020 int ix86_arch_specified = 0;
2021 unsigned int ix86_arch_mask, ix86_tune_mask;
2023 /* Comes from final.c -- no real reason to change it. */
2024 #define MAX_CODE_ALIGN 16
2028 const struct processor_costs *cost; /* Processor costs */
2029 const int align_loop; /* Default alignments. */
2030 const int align_loop_max_skip;
2031 const int align_jump;
2032 const int align_jump_max_skip;
2033 const int align_func;
2035 const processor_target_table[PROCESSOR_max] =
2037 {&i386_cost, 4, 3, 4, 3, 4},
2038 {&i486_cost, 16, 15, 16, 15, 16},
2039 {&pentium_cost, 16, 7, 16, 7, 16},
2040 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2041 {&geode_cost, 0, 0, 0, 0, 0},
2042 {&k6_cost, 32, 7, 32, 7, 32},
2043 {&athlon_cost, 16, 7, 16, 7, 16},
2044 {&pentium4_cost, 0, 0, 0, 0, 0},
2045 {&k8_cost, 16, 7, 16, 7, 16},
2046 {&nocona_cost, 0, 0, 0, 0, 0},
2047 {&core2_cost, 16, 10, 16, 10, 16},
2048 {&generic32_cost, 16, 7, 16, 7, 16},
2049 {&generic64_cost, 16, 10, 16, 10, 16},
2050 {&amdfam10_cost, 32, 24, 32, 7, 32}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2084 PTA_PREFETCH_SSE = 1 << 4,
2086 PTA_3DNOW_A = 1 << 6,
2090 PTA_POPCNT = 1 << 10,
2092 PTA_SSE4A = 1 << 12,
2093 PTA_NO_SAHF = 1 << 13,
2094 PTA_SSE4_1 = 1 << 14,
2095 PTA_SSE4_2 = 1 << 15,
2098 PTA_PCLMUL = 1 << 18
2103 const char *const name; /* processor name or nickname. */
2104 const enum processor_type processor;
2105 const unsigned /*enum pta_flags*/ flags;
2107 const processor_alias_table[] =
2109 {"i386", PROCESSOR_I386, 0},
2110 {"i486", PROCESSOR_I486, 0},
2111 {"i586", PROCESSOR_PENTIUM, 0},
2112 {"pentium", PROCESSOR_PENTIUM, 0},
2113 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2114 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2115 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2116 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2117 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2118 {"i686", PROCESSOR_PENTIUMPRO, 0},
2119 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2120 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2121 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2122 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2123 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2124 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2125 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2126 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2127 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2128 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2129 | PTA_CX16 | PTA_NO_SAHF)},
2130 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2131 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2134 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135 |PTA_PREFETCH_SSE)},
2136 {"k6", PROCESSOR_K6, PTA_MMX},
2137 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2138 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2139 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_PREFETCH_SSE)},
2141 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2142 | PTA_PREFETCH_SSE)},
2143 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2147 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2150 | PTA_MMX | PTA_SSE | PTA_SSE2
2152 {"k8", PROCESSOR_K8, (PTA_64BIT
2153 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2154 | PTA_SSE | PTA_SSE2
2156 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2157 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2158 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2160 {"opteron", PROCESSOR_K8, (PTA_64BIT
2161 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2162 | PTA_SSE | PTA_SSE2
2164 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2165 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2166 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2168 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2169 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2170 | PTA_SSE | PTA_SSE2
2172 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2176 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2177 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2178 | PTA_SSE | PTA_SSE2
2180 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2181 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2182 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2184 | PTA_CX16 | PTA_ABM)},
2185 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2186 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2187 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2189 | PTA_CX16 | PTA_ABM)},
2190 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2191 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2194 int const pta_size = ARRAY_SIZE (processor_alias_table);
2196 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2197 SUBTARGET_OVERRIDE_OPTIONS;
2200 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2201 SUBSUBTARGET_OVERRIDE_OPTIONS;
2204 /* -fPIC is the default for x86_64. */
2205 if (TARGET_MACHO && TARGET_64BIT)
2208 /* Set the default values for switches whose default depends on TARGET_64BIT
2209 in case they weren't overwritten by command line options. */
2212 /* Mach-O doesn't support omitting the frame pointer for now. */
2213 if (flag_omit_frame_pointer == 2)
2214 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2215 if (flag_asynchronous_unwind_tables == 2)
2216 flag_asynchronous_unwind_tables = 1;
2217 if (flag_pcc_struct_return == 2)
2218 flag_pcc_struct_return = 0;
2222 if (flag_omit_frame_pointer == 2)
2223 flag_omit_frame_pointer = 0;
2224 if (flag_asynchronous_unwind_tables == 2)
2225 flag_asynchronous_unwind_tables = 0;
2226 if (flag_pcc_struct_return == 2)
2227 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2230 /* Need to check -mtune=generic first. */
2231 if (ix86_tune_string)
2233 if (!strcmp (ix86_tune_string, "generic")
2234 || !strcmp (ix86_tune_string, "i686")
2235 /* As special support for cross compilers we read -mtune=native
2236 as -mtune=generic. With native compilers we won't see the
2237 -mtune=native, as it was changed by the driver. */
2238 || !strcmp (ix86_tune_string, "native"))
2241 ix86_tune_string = "generic64";
2243 ix86_tune_string = "generic32";
2245 else if (!strncmp (ix86_tune_string, "generic", 7))
2246 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2250 if (ix86_arch_string)
2251 ix86_tune_string = ix86_arch_string;
2252 if (!ix86_tune_string)
2254 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2255 ix86_tune_defaulted = 1;
2258 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2259 need to use a sensible tune option. */
2260 if (!strcmp (ix86_tune_string, "generic")
2261 || !strcmp (ix86_tune_string, "x86-64")
2262 || !strcmp (ix86_tune_string, "i686"))
2265 ix86_tune_string = "generic64";
2267 ix86_tune_string = "generic32";
2270 if (ix86_stringop_string)
2272 if (!strcmp (ix86_stringop_string, "rep_byte"))
2273 stringop_alg = rep_prefix_1_byte;
2274 else if (!strcmp (ix86_stringop_string, "libcall"))
2275 stringop_alg = libcall;
2276 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2277 stringop_alg = rep_prefix_4_byte;
2278 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2279 stringop_alg = rep_prefix_8_byte;
2280 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2281 stringop_alg = loop_1_byte;
2282 else if (!strcmp (ix86_stringop_string, "loop"))
2283 stringop_alg = loop;
2284 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2285 stringop_alg = unrolled_loop;
2287 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2289 if (!strcmp (ix86_tune_string, "x86-64"))
2290 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2291 "-mtune=generic instead as appropriate.");
2293 if (!ix86_arch_string)
2294 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2296 ix86_arch_specified = 1;
2298 if (!strcmp (ix86_arch_string, "generic"))
2299 error ("generic CPU can be used only for -mtune= switch");
2300 if (!strncmp (ix86_arch_string, "generic", 7))
2301 error ("bad value (%s) for -march= switch", ix86_arch_string);
2303 if (ix86_cmodel_string != 0)
2305 if (!strcmp (ix86_cmodel_string, "small"))
2306 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2307 else if (!strcmp (ix86_cmodel_string, "medium"))
2308 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2309 else if (!strcmp (ix86_cmodel_string, "large"))
2310 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2312 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2313 else if (!strcmp (ix86_cmodel_string, "32"))
2314 ix86_cmodel = CM_32;
2315 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2316 ix86_cmodel = CM_KERNEL;
2318 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2322 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2323 use of rip-relative addressing. This eliminates fixups that
2324 would otherwise be needed if this object is to be placed in a
2325 DLL, and is essentially just as efficient as direct addressing. */
2326 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2327 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2328 else if (TARGET_64BIT)
2329 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2331 ix86_cmodel = CM_32;
2333 if (ix86_asm_string != 0)
2336 && !strcmp (ix86_asm_string, "intel"))
2337 ix86_asm_dialect = ASM_INTEL;
2338 else if (!strcmp (ix86_asm_string, "att"))
2339 ix86_asm_dialect = ASM_ATT;
2341 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2343 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2344 error ("code model %qs not supported in the %s bit mode",
2345 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2346 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2347 sorry ("%i-bit mode not compiled in",
2348 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2350 for (i = 0; i < pta_size; i++)
2351 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2353 ix86_arch = processor_alias_table[i].processor;
2354 /* Default cpu tuning to the architecture. */
2355 ix86_tune = ix86_arch;
2357 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2358 error ("CPU you selected does not support x86-64 "
2361 if (processor_alias_table[i].flags & PTA_MMX
2362 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2363 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2364 if (processor_alias_table[i].flags & PTA_3DNOW
2365 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2366 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2367 if (processor_alias_table[i].flags & PTA_3DNOW_A
2368 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2369 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2370 if (processor_alias_table[i].flags & PTA_SSE
2371 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2372 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2373 if (processor_alias_table[i].flags & PTA_SSE2
2374 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2375 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2376 if (processor_alias_table[i].flags & PTA_SSE3
2377 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2378 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2379 if (processor_alias_table[i].flags & PTA_SSSE3
2380 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2381 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2382 if (processor_alias_table[i].flags & PTA_SSE4_1
2383 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2384 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2385 if (processor_alias_table[i].flags & PTA_SSE4_2
2386 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2387 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2388 if (processor_alias_table[i].flags & PTA_SSE4A
2389 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2390 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2391 if (processor_alias_table[i].flags & PTA_SSE5
2392 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2393 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2395 if (processor_alias_table[i].flags & PTA_ABM)
2397 if (processor_alias_table[i].flags & PTA_CX16)
2398 x86_cmpxchg16b = true;
2399 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2401 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2402 x86_prefetch_sse = true;
2403 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2405 if (processor_alias_table[i].flags & PTA_AES)
2407 if (processor_alias_table[i].flags & PTA_PCLMUL)
2414 error ("bad value (%s) for -march= switch", ix86_arch_string);
2416 ix86_arch_mask = 1u << ix86_arch;
2417 for (i = 0; i < X86_ARCH_LAST; ++i)
2418 ix86_arch_features[i] &= ix86_arch_mask;
2420 for (i = 0; i < pta_size; i++)
2421 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2423 ix86_tune = processor_alias_table[i].processor;
2424 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2426 if (ix86_tune_defaulted)
2428 ix86_tune_string = "x86-64";
2429 for (i = 0; i < pta_size; i++)
2430 if (! strcmp (ix86_tune_string,
2431 processor_alias_table[i].name))
2433 ix86_tune = processor_alias_table[i].processor;
2436 error ("CPU you selected does not support x86-64 "
2439 /* Intel CPUs have always interpreted SSE prefetch instructions as
2440 NOPs; so, we can enable SSE prefetch instructions even when
2441 -mtune (rather than -march) points us to a processor that has them.
2442 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2443 higher processors. */
2445 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2446 x86_prefetch_sse = true;
2450 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2452 /* Enable SSE2 if AES or PCLMUL is enabled. */
2453 if ((x86_aes || x86_pclmul)
2454 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2456 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2457 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2460 ix86_tune_mask = 1u << ix86_tune;
2461 for (i = 0; i < X86_TUNE_LAST; ++i)
2462 ix86_tune_features[i] &= ix86_tune_mask;
2465 ix86_cost = &size_cost;
2467 ix86_cost = processor_target_table[ix86_tune].cost;
2469 /* Arrange to set up i386_stack_locals for all functions. */
2470 init_machine_status = ix86_init_machine_status;
2472 /* Validate -mregparm= value. */
2473 if (ix86_regparm_string)
2476 warning (0, "-mregparm is ignored in 64-bit mode");
2477 i = atoi (ix86_regparm_string);
2478 if (i < 0 || i > REGPARM_MAX)
2479 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2484 ix86_regparm = REGPARM_MAX;
2486 /* If the user has provided any of the -malign-* options,
2487 warn and use that value only if -falign-* is not set.
2488 Remove this code in GCC 3.2 or later. */
2489 if (ix86_align_loops_string)
2491 warning (0, "-malign-loops is obsolete, use -falign-loops");
2492 if (align_loops == 0)
2494 i = atoi (ix86_align_loops_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2498 align_loops = 1 << i;
2502 if (ix86_align_jumps_string)
2504 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2505 if (align_jumps == 0)
2507 i = atoi (ix86_align_jumps_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2511 align_jumps = 1 << i;
2515 if (ix86_align_funcs_string)
2517 warning (0, "-malign-functions is obsolete, use -falign-functions");
2518 if (align_functions == 0)
2520 i = atoi (ix86_align_funcs_string);
2521 if (i < 0 || i > MAX_CODE_ALIGN)
2522 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2524 align_functions = 1 << i;
2528 /* Default align_* from the processor table. */
2529 if (align_loops == 0)
2531 align_loops = processor_target_table[ix86_tune].align_loop;
2532 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2534 if (align_jumps == 0)
2536 align_jumps = processor_target_table[ix86_tune].align_jump;
2537 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2539 if (align_functions == 0)
2541 align_functions = processor_target_table[ix86_tune].align_func;
2544 /* Validate -mbranch-cost= value, or provide default. */
2545 ix86_branch_cost = ix86_cost->branch_cost;
2546 if (ix86_branch_cost_string)
2548 i = atoi (ix86_branch_cost_string);
2550 error ("-mbranch-cost=%d is not between 0 and 5", i);
2552 ix86_branch_cost = i;
2554 if (ix86_section_threshold_string)
2556 i = atoi (ix86_section_threshold_string);
2558 error ("-mlarge-data-threshold=%d is negative", i);
2560 ix86_section_threshold = i;
2563 if (ix86_tls_dialect_string)
2565 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2566 ix86_tls_dialect = TLS_DIALECT_GNU;
2567 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2568 ix86_tls_dialect = TLS_DIALECT_GNU2;
2569 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2570 ix86_tls_dialect = TLS_DIALECT_SUN;
2572 error ("bad value (%s) for -mtls-dialect= switch",
2573 ix86_tls_dialect_string);
2576 if (ix87_precision_string)
2578 i = atoi (ix87_precision_string);
2579 if (i != 32 && i != 64 && i != 80)
2580 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2585 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2587 /* Enable by default the SSE and MMX builtins. Do allow the user to
2588 explicitly disable any of these. In particular, disabling SSE and
2589 MMX for kernel code is extremely useful. */
2590 if (!ix86_arch_specified)
2592 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2593 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2596 warning (0, "-mrtd is ignored in 64bit mode");
2600 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2602 if (!ix86_arch_specified)
2604 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2606 /* i386 ABI does not specify red zone. It still makes sense to use it
2607 when programmer takes care to stack from being destroyed. */
2608 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2609 target_flags |= MASK_NO_RED_ZONE;
2612 /* Keep nonleaf frame pointers. */
2613 if (flag_omit_frame_pointer)
2614 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2615 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2616 flag_omit_frame_pointer = 1;
2618 /* If we're doing fast math, we don't care about comparison order
2619 wrt NaNs. This lets us use a shorter comparison sequence. */
2620 if (flag_finite_math_only)
2621 target_flags &= ~MASK_IEEE_FP;
2623 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2624 since the insns won't need emulation. */
2625 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2626 target_flags &= ~MASK_NO_FANCY_MATH_387;
2628 /* Likewise, if the target doesn't have a 387, or we've specified
2629 software floating point, don't use 387 inline intrinsics. */
2631 target_flags |= MASK_NO_FANCY_MATH_387;
2633 /* Turn on MMX builtins for -msse. */
2636 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2637 x86_prefetch_sse = true;
2640 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2641 if (TARGET_SSE4_2 || TARGET_ABM)
2644 /* Validate -mpreferred-stack-boundary= value, or provide default.
2645 The default of 128 bits is for Pentium III's SSE __m128. We can't
2646 change it because of optimize_size. Otherwise, we can't mix object
2647 files compiled with -Os and -On. */
2648 ix86_preferred_stack_boundary = 128;
2649 if (ix86_preferred_stack_boundary_string)
2651 i = atoi (ix86_preferred_stack_boundary_string);
2652 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2653 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2654 TARGET_64BIT ? 4 : 2);
2656 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2659 /* Accept -msseregparm only if at least SSE support is enabled. */
2660 if (TARGET_SSEREGPARM
2662 error ("-msseregparm used without SSE enabled");
2664 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2665 if (ix86_fpmath_string != 0)
2667 if (! strcmp (ix86_fpmath_string, "387"))
2668 ix86_fpmath = FPMATH_387;
2669 else if (! strcmp (ix86_fpmath_string, "sse"))
2673 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2674 ix86_fpmath = FPMATH_387;
2677 ix86_fpmath = FPMATH_SSE;
2679 else if (! strcmp (ix86_fpmath_string, "387,sse")
2680 || ! strcmp (ix86_fpmath_string, "sse,387"))
2684 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2685 ix86_fpmath = FPMATH_387;
2687 else if (!TARGET_80387)
2689 warning (0, "387 instruction set disabled, using SSE arithmetics");
2690 ix86_fpmath = FPMATH_SSE;
2693 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2696 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2699 /* If the i387 is disabled, then do not return values in it. */
2701 target_flags &= ~MASK_FLOAT_RETURNS;
2703 /* Use external vectorized library in vectorizing intrinsics. */
2704 if (ix86_veclibabi_string)
2706 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2707 ix86_veclib_handler = ix86_veclibabi_svml;
2708 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2709 ix86_veclib_handler = ix86_veclibabi_acml;
2711 error ("unknown vectorization library ABI type (%s) for "
2712 "-mveclibabi= switch", ix86_veclibabi_string);
2715 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2716 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2718 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2720 /* ??? Unwind info is not correct around the CFG unless either a frame
2721 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2722 unwind info generation to be aware of the CFG and propagating states
2724 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2725 || flag_exceptions || flag_non_call_exceptions)
2726 && flag_omit_frame_pointer
2727 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2729 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2730 warning (0, "unwind tables currently require either a frame pointer "
2731 "or -maccumulate-outgoing-args for correctness");
2732 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2735 /* If stack probes are required, the space used for large function
2736 arguments on the stack must also be probed, so enable
2737 -maccumulate-outgoing-args so this happens in the prologue. */
2738 if (TARGET_STACK_PROBE
2739 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2741 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2742 warning (0, "stack probing requires -maccumulate-outgoing-args "
2744 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2747 /* For sane SSE instruction set generation we need fcomi instruction.
2748 It is safe to enable all CMOVE instructions. */
2752 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2755 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2756 p = strchr (internal_label_prefix, 'X');
2757 internal_label_prefix_len = p - internal_label_prefix;
2761 /* When scheduling description is not available, disable scheduler pass
2762 so it won't slow down the compilation and make x87 code slower. */
2763 if (!TARGET_SCHEDULE)
2764 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2766 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2767 set_param_value ("simultaneous-prefetches",
2768 ix86_cost->simultaneous_prefetches);
2769 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2770 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2771 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2772 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2773 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2774 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2776 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2777 can be optimized to ap = __builtin_next_arg (0).
2778 For abi switching it should be corrected. */
2779 if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
2780 targetm.expand_builtin_va_start = NULL;
2784 ix86_gen_leave = gen_leave_rex64;
2785 ix86_gen_pop1 = gen_popdi1;
2786 ix86_gen_add3 = gen_adddi3;
2787 ix86_gen_sub3 = gen_subdi3;
2788 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
2789 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
2790 ix86_gen_monitor = gen_sse3_monitor64;
2794 ix86_gen_leave = gen_leave;
2795 ix86_gen_pop1 = gen_popsi1;
2796 ix86_gen_add3 = gen_addsi3;
2797 ix86_gen_sub3 = gen_subsi3;
2798 ix86_gen_sub3_carry = gen_subsi3_carry;
2799 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
2800 ix86_gen_monitor = gen_sse3_monitor;
2804 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
2806 target_flags |= MASK_CLD & ~target_flags_explicit;
2810 /* Return true if this goes in large data/bss. */
2813 ix86_in_large_data_p (tree exp)
2815 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2818 /* Functions are never large data. */
2819 if (TREE_CODE (exp) == FUNCTION_DECL)
2822 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2824 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2825 if (strcmp (section, ".ldata") == 0
2826 || strcmp (section, ".lbss") == 0)
2832 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2834 /* If this is an incomplete type with size 0, then we can't put it
2835 in data because it might be too big when completed. */
2836 if (!size || size > ix86_section_threshold)
2843 /* Switch to the appropriate section for output of DECL.
2844 DECL is either a `VAR_DECL' node or a constant of some sort.
2845 RELOC indicates whether forming the initial value of DECL requires
2846 link-time relocations. */
2848 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2852 x86_64_elf_select_section (tree decl, int reloc,
2853 unsigned HOST_WIDE_INT align)
2855 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2856 && ix86_in_large_data_p (decl))
2858 const char *sname = NULL;
2859 unsigned int flags = SECTION_WRITE;
2860 switch (categorize_decl_for_section (decl, reloc))
2865 case SECCAT_DATA_REL:
2866 sname = ".ldata.rel";
2868 case SECCAT_DATA_REL_LOCAL:
2869 sname = ".ldata.rel.local";
2871 case SECCAT_DATA_REL_RO:
2872 sname = ".ldata.rel.ro";
2874 case SECCAT_DATA_REL_RO_LOCAL:
2875 sname = ".ldata.rel.ro.local";
2879 flags |= SECTION_BSS;
2882 case SECCAT_RODATA_MERGE_STR:
2883 case SECCAT_RODATA_MERGE_STR_INIT:
2884 case SECCAT_RODATA_MERGE_CONST:
2888 case SECCAT_SRODATA:
2895 /* We don't split these for medium model. Place them into
2896 default sections and hope for best. */
2898 case SECCAT_EMUTLS_VAR:
2899 case SECCAT_EMUTLS_TMPL:
2904 /* We might get called with string constants, but get_named_section
2905 doesn't like them as they are not DECLs. Also, we need to set
2906 flags in that case. */
2908 return get_section (sname, flags, NULL);
2909 return get_named_section (decl, sname, reloc);
2912 return default_elf_select_section (decl, reloc, align);
2915 /* Build up a unique section name, expressed as a
2916 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2917 RELOC indicates whether the initial value of EXP requires
2918 link-time relocations. */
2920 static void ATTRIBUTE_UNUSED
2921 x86_64_elf_unique_section (tree decl, int reloc)
2923 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2924 && ix86_in_large_data_p (decl))
2926 const char *prefix = NULL;
2927 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2928 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2930 switch (categorize_decl_for_section (decl, reloc))
2933 case SECCAT_DATA_REL:
2934 case SECCAT_DATA_REL_LOCAL:
2935 case SECCAT_DATA_REL_RO:
2936 case SECCAT_DATA_REL_RO_LOCAL:
2937 prefix = one_only ? ".ld" : ".ldata";
2940 prefix = one_only ? ".lb" : ".lbss";
2943 case SECCAT_RODATA_MERGE_STR:
2944 case SECCAT_RODATA_MERGE_STR_INIT:
2945 case SECCAT_RODATA_MERGE_CONST:
2946 prefix = one_only ? ".lr" : ".lrodata";
2948 case SECCAT_SRODATA:
2955 /* We don't split these for medium model. Place them into
2956 default sections and hope for best. */
2958 case SECCAT_EMUTLS_VAR:
2959 prefix = targetm.emutls.var_section;
2961 case SECCAT_EMUTLS_TMPL:
2962 prefix = targetm.emutls.tmpl_section;
2967 const char *name, *linkonce;
2970 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2971 name = targetm.strip_name_encoding (name);
2973 /* If we're using one_only, then there needs to be a .gnu.linkonce
2974 prefix to the section name. */
2975 linkonce = one_only ? ".gnu.linkonce" : "";
2977 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2979 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2983 default_unique_section (decl, reloc);
2986 #ifdef COMMON_ASM_OP
2987 /* This says how to output assembler code to declare an
2988 uninitialized external linkage data object.
2990 For medium model x86-64 we need to use .largecomm opcode for
2993 x86_elf_aligned_common (FILE *file,
2994 const char *name, unsigned HOST_WIDE_INT size,
2997 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2998 && size > (unsigned int)ix86_section_threshold)
2999 fprintf (file, ".largecomm\t");
3001 fprintf (file, "%s", COMMON_ASM_OP);
3002 assemble_name (file, name);
3003 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3004 size, align / BITS_PER_UNIT);
3008 /* Utility function for targets to use in implementing
3009 ASM_OUTPUT_ALIGNED_BSS. */
3012 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3013 const char *name, unsigned HOST_WIDE_INT size,
3016 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3017 && size > (unsigned int)ix86_section_threshold)
3018 switch_to_section (get_named_section (decl, ".lbss", 0));
3020 switch_to_section (bss_section);
3021 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3022 #ifdef ASM_DECLARE_OBJECT_NAME
3023 last_assemble_variable_decl = decl;
3024 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3026 /* Standard thing is just output label for the object. */
3027 ASM_OUTPUT_LABEL (file, name);
3028 #endif /* ASM_DECLARE_OBJECT_NAME */
3029 ASM_OUTPUT_SKIP (file, size ? size : 1);
3033 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3035 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3036 make the problem with not enough registers even worse. */
3037 #ifdef INSN_SCHEDULING
3039 flag_schedule_insns = 0;
3043 /* The Darwin libraries never set errno, so we might as well
3044 avoid calling them when that's the only reason we would. */
3045 flag_errno_math = 0;
3047 /* The default values of these switches depend on the TARGET_64BIT
3048 that is not known at this moment. Mark these values with 2 and
3049 let user the to override these. In case there is no command line option
3050 specifying them, we will set the defaults in override_options. */
3052 flag_omit_frame_pointer = 2;
3053 flag_pcc_struct_return = 2;
3054 flag_asynchronous_unwind_tables = 2;
3055 flag_vect_cost_model = 1;
3056 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3057 SUBTARGET_OPTIMIZATION_OPTIONS;
3061 /* Decide whether we can make a sibling call to a function. DECL is the
3062 declaration of the function being targeted by the call and EXP is the
3063 CALL_EXPR representing the call. */
3066 ix86_function_ok_for_sibcall (tree decl, tree exp)
3071 /* If we are generating position-independent code, we cannot sibcall
3072 optimize any indirect call, or a direct call to a global function,
3073 as the PLT requires %ebx be live. */
3074 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3081 func = TREE_TYPE (CALL_EXPR_FN (exp));
3082 if (POINTER_TYPE_P (func))
3083 func = TREE_TYPE (func);
3086 /* Check that the return value locations are the same. Like
3087 if we are returning floats on the 80387 register stack, we cannot
3088 make a sibcall from a function that doesn't return a float to a
3089 function that does or, conversely, from a function that does return
3090 a float to a function that doesn't; the necessary stack adjustment
3091 would not be executed. This is also the place we notice
3092 differences in the return value ABI. Note that it is ok for one
3093 of the functions to have void return type as long as the return
3094 value of the other is passed in a register. */
3095 a = ix86_function_value (TREE_TYPE (exp), func, false);
3096 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3098 if (STACK_REG_P (a) || STACK_REG_P (b))
3100 if (!rtx_equal_p (a, b))
3103 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3105 else if (!rtx_equal_p (a, b))
3108 /* If this call is indirect, we'll need to be able to use a call-clobbered
3109 register for the address of the target function. Make sure that all
3110 such registers are not used for passing parameters. */
3111 if (!decl && !TARGET_64BIT)
3115 /* We're looking at the CALL_EXPR, we need the type of the function. */
3116 type = CALL_EXPR_FN (exp); /* pointer expression */
3117 type = TREE_TYPE (type); /* pointer type */
3118 type = TREE_TYPE (type); /* function type */
3120 if (ix86_function_regparm (type, NULL) >= 3)
3122 /* ??? Need to count the actual number of registers to be used,
3123 not the possible number of registers. Fix later. */
3128 /* Dllimport'd functions are also called indirectly. */
3129 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3130 && decl && DECL_DLLIMPORT_P (decl)
3131 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3134 /* If we forced aligned the stack, then sibcalling would unalign the
3135 stack, which may break the called function. */
3136 if (cfun->machine->force_align_arg_pointer)
3139 /* Otherwise okay. That also includes certain types of indirect calls. */
3143 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3144 calling convention attributes;
3145 arguments as in struct attribute_spec.handler. */
3148 ix86_handle_cconv_attribute (tree *node, tree name,
3150 int flags ATTRIBUTE_UNUSED,
3153 if (TREE_CODE (*node) != FUNCTION_TYPE
3154 && TREE_CODE (*node) != METHOD_TYPE
3155 && TREE_CODE (*node) != FIELD_DECL
3156 && TREE_CODE (*node) != TYPE_DECL)
3158 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3159 IDENTIFIER_POINTER (name));
3160 *no_add_attrs = true;
3164 /* Can combine regparm with all attributes but fastcall. */
3165 if (is_attribute_p ("regparm", name))
3169 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3171 error ("fastcall and regparm attributes are not compatible");
3174 cst = TREE_VALUE (args);
3175 if (TREE_CODE (cst) != INTEGER_CST)
3177 warning (OPT_Wattributes,
3178 "%qs attribute requires an integer constant argument",
3179 IDENTIFIER_POINTER (name));
3180 *no_add_attrs = true;
3182 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3184 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3185 IDENTIFIER_POINTER (name), REGPARM_MAX);
3186 *no_add_attrs = true;
3190 && lookup_attribute (ix86_force_align_arg_pointer_string,
3191 TYPE_ATTRIBUTES (*node))
3192 && compare_tree_int (cst, REGPARM_MAX-1))
3194 error ("%s functions limited to %d register parameters",
3195 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3203 /* Do not warn when emulating the MS ABI. */
3204 if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
3205 warning (OPT_Wattributes, "%qs attribute ignored",
3206 IDENTIFIER_POINTER (name));
3207 *no_add_attrs = true;
3211 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3212 if (is_attribute_p ("fastcall", name))
3214 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3216 error ("fastcall and cdecl attributes are not compatible");
3218 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3220 error ("fastcall and stdcall attributes are not compatible");
3222 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3224 error ("fastcall and regparm attributes are not compatible");
3228 /* Can combine stdcall with fastcall (redundant), regparm and
3230 else if (is_attribute_p ("stdcall", name))
3232 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3234 error ("stdcall and cdecl attributes are not compatible");
3236 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3238 error ("stdcall and fastcall attributes are not compatible");
3242 /* Can combine cdecl with regparm and sseregparm. */
3243 else if (is_attribute_p ("cdecl", name))
3245 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3247 error ("stdcall and cdecl attributes are not compatible");
3249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3251 error ("fastcall and cdecl attributes are not compatible");
3255 /* Can combine sseregparm with all attributes. */
3260 /* Return 0 if the attributes for two types are incompatible, 1 if they
3261 are compatible, and 2 if they are nearly compatible (which causes a
3262 warning to be generated). */
3265 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3267 /* Check for mismatch of non-default calling convention. */
3268 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3270 if (TREE_CODE (type1) != FUNCTION_TYPE
3271 && TREE_CODE (type1) != METHOD_TYPE)
3274 /* Check for mismatched fastcall/regparm types. */
3275 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3276 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3277 || (ix86_function_regparm (type1, NULL)
3278 != ix86_function_regparm (type2, NULL)))
3281 /* Check for mismatched sseregparm types. */
3282 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3283 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3286 /* Check for mismatched return types (cdecl vs stdcall). */
3287 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3288 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3294 /* Return the regparm value for a function with the indicated TYPE and DECL.
3295 DECL may be NULL when calling function indirectly
3296 or considering a libcall. */
3299 ix86_function_regparm (const_tree type, const_tree decl)
3302 int regparm = ix86_regparm;
3304 static bool error_issued;
3308 if (ix86_function_type_abi (type) == DEFAULT_ABI)
3310 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3313 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3317 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3319 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3321 /* We can't use regparm(3) for nested functions because
3322 these pass static chain pointer in %ecx register. */
3323 if (!error_issued && regparm == 3
3324 && decl_function_context (decl)
3325 && !DECL_NO_STATIC_CHAIN (decl))
3327 error ("nested functions are limited to 2 register parameters");
3328 error_issued = true;
3336 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3339 /* Use register calling convention for local functions when possible. */
3340 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3341 && flag_unit_at_a_time && !profile_flag)
3343 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3344 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3347 int local_regparm, globals = 0, regno;
3350 /* Make sure no regparm register is taken by a
3351 fixed register variable. */
3352 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3353 if (fixed_regs[local_regparm])
3356 /* We can't use regparm(3) for nested functions as these use
3357 static chain pointer in third argument. */
3358 if (local_regparm == 3
3359 && (decl_function_context (decl)
3360 || ix86_force_align_arg_pointer)
3361 && !DECL_NO_STATIC_CHAIN (decl))
3364 /* If the function realigns its stackpointer, the prologue will
3365 clobber %ecx. If we've already generated code for the callee,
3366 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3367 scanning the attributes for the self-realigning property. */
3368 f = DECL_STRUCT_FUNCTION (decl);
3369 if (local_regparm == 3
3370 && (f ? !!f->machine->force_align_arg_pointer
3371 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3372 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3375 /* Each fixed register usage increases register pressure,
3376 so less registers should be used for argument passing.
3377 This functionality can be overriden by an explicit
3379 for (regno = 0; regno <= DI_REG; regno++)
3380 if (fixed_regs[regno])
3384 = globals < local_regparm ? local_regparm - globals : 0;
3386 if (local_regparm > regparm)
3387 regparm = local_regparm;
3394 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3395 DFmode (2) arguments in SSE registers for a function with the
3396 indicated TYPE and DECL. DECL may be NULL when calling function
3397 indirectly or considering a libcall. Otherwise return 0. */
3400 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3402 gcc_assert (!TARGET_64BIT);
3404 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3405 by the sseregparm attribute. */
3406 if (TARGET_SSEREGPARM
3407 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3414 error ("Calling %qD with attribute sseregparm without "
3415 "SSE/SSE2 enabled", decl);
3417 error ("Calling %qT with attribute sseregparm without "
3418 "SSE/SSE2 enabled", type);
3426 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3427 (and DFmode for SSE2) arguments in SSE registers. */
3428 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3430 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3431 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3433 return TARGET_SSE2 ? 2 : 1;
3439 /* Return true if EAX is live at the start of the function. Used by
3440 ix86_expand_prologue to determine if we need special help before
3441 calling allocate_stack_worker. */
3444 ix86_eax_live_at_start_p (void)
3446 /* Cheat. Don't bother working forward from ix86_function_regparm
3447 to the function type to whether an actual argument is located in
3448 eax. Instead just look at cfg info, which is still close enough
3449 to correct at this point. This gives false positives for broken
3450 functions that might use uninitialized data that happens to be
3451 allocated in eax, but who cares? */
3452 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3455 /* Value is the number of bytes of arguments automatically
3456 popped when returning from a subroutine call.
3457 FUNDECL is the declaration node of the function (as a tree),
3458 FUNTYPE is the data type of the function (as a tree),
3459 or for a library call it is an identifier node for the subroutine name.
3460 SIZE is the number of bytes of arguments passed on the stack.
3462 On the 80386, the RTD insn may be used to pop them if the number
3463 of args is fixed, but if the number is variable then the caller
3464 must pop them all. RTD can't be used for library calls now
3465 because the library is compiled with the Unix compiler.
3466 Use of RTD is a selectable option, since it is incompatible with
3467 standard Unix calling sequences. If the option is not selected,
3468 the caller must always pop the args.
3470 The attribute stdcall is equivalent to RTD on a per module basis. */
3473 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3477 /* None of the 64-bit ABIs pop arguments. */
3481 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3483 /* Cdecl functions override -mrtd, and never pop the stack. */
3484 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3486 /* Stdcall and fastcall functions will pop the stack if not
3488 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3489 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3492 if (rtd && ! stdarg_p (funtype))
3496 /* Lose any fake structure return argument if it is passed on the stack. */
3497 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3498 && !KEEP_AGGREGATE_RETURN_POINTER)
3500 int nregs = ix86_function_regparm (funtype, fundecl);
3502 return GET_MODE_SIZE (Pmode);
3508 /* Argument support functions. */
3510 /* Return true when register may be used to pass function parameters. */
3512 ix86_function_arg_regno_p (int regno)
3515 const int *parm_regs;
3520 return (regno < REGPARM_MAX
3521 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3523 return (regno < REGPARM_MAX
3524 || (TARGET_MMX && MMX_REGNO_P (regno)
3525 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3526 || (TARGET_SSE && SSE_REGNO_P (regno)
3527 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3532 if (SSE_REGNO_P (regno) && TARGET_SSE)
3537 if (TARGET_SSE && SSE_REGNO_P (regno)
3538 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3542 /* TODO: The function should depend on current function ABI but
3543 builtins.c would need updating then. Therefore we use the
3546 /* RAX is used as hidden argument to va_arg functions. */
3547 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
3550 if (DEFAULT_ABI == MS_ABI)
3551 parm_regs = x86_64_ms_abi_int_parameter_registers;
3553 parm_regs = x86_64_int_parameter_registers;
3554 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3555 : X86_64_REGPARM_MAX); i++)
3556 if (regno == parm_regs[i])
3561 /* Return if we do not know how to pass TYPE solely in registers. */
3564 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3566 if (must_pass_in_stack_var_size_or_pad (mode, type))
3569 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3570 The layout_type routine is crafty and tries to trick us into passing
3571 currently unsupported vector types on the stack by using TImode. */
3572 return (!TARGET_64BIT && mode == TImode
3573 && type && TREE_CODE (type) != VECTOR_TYPE);
3576 /* It returns the size, in bytes, of the area reserved for arguments passed
3577 in registers for the function represented by fndecl dependent to the used
3580 ix86_reg_parm_stack_space (const_tree fndecl)
3583 /* For libcalls it is possible that there is no fndecl at hand.
3584 Therefore assume for this case the default abi of the target. */
3586 call_abi = DEFAULT_ABI;
3588 call_abi = ix86_function_abi (fndecl);
3594 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3597 ix86_function_type_abi (const_tree fntype)
3599 if (TARGET_64BIT && fntype != NULL)
3602 if (DEFAULT_ABI == SYSV_ABI)
3603 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3605 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3607 if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
3608 sorry ("using sysv calling convention on target w64 is not supported");
3616 ix86_function_abi (const_tree fndecl)
3620 return ix86_function_type_abi (TREE_TYPE (fndecl));
3623 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3626 ix86_cfun_abi (void)
3628 if (! cfun || ! TARGET_64BIT)
3630 return cfun->machine->call_abi;
3634 extern void init_regs (void);
3636 /* Implementation of call abi switching target hook. Specific to FNDECL
3637 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3639 To prevent redudant calls of costy function init_regs (), it checks not to
3640 reset register usage for default abi. */
3642 ix86_call_abi_override (const_tree fndecl)
3644 if (fndecl == NULL_TREE)
3645 cfun->machine->call_abi = DEFAULT_ABI;
3647 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
3648 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
3650 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3652 call_used_regs[4 /*RSI*/] = 0;
3653 call_used_regs[5 /*RDI*/] = 0;
3657 else if (TARGET_64BIT)
3659 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3661 call_used_regs[4 /*RSI*/] = 1;
3662 call_used_regs[5 /*RDI*/] = 1;
3668 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3669 for a call to a function whose data type is FNTYPE.
3670 For a library call, FNTYPE is 0. */
3673 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3674 tree fntype, /* tree ptr for function decl */
3675 rtx libname, /* SYMBOL_REF of library name or 0 */
3678 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3679 memset (cum, 0, sizeof (*cum));
3681 cum->call_abi = ix86_function_type_abi (fntype);
3682 /* Set up the number of registers to use for passing arguments. */
3683 cum->nregs = ix86_regparm;
3686 if (cum->call_abi != DEFAULT_ABI)
3687 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3692 cum->sse_nregs = SSE_REGPARM_MAX;
3695 if (cum->call_abi != DEFAULT_ABI)
3696 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3697 : X64_SSE_REGPARM_MAX;
3701 cum->mmx_nregs = MMX_REGPARM_MAX;
3702 cum->warn_sse = true;
3703 cum->warn_mmx = true;
3705 /* Because type might mismatch in between caller and callee, we need to
3706 use actual type of function for local calls.
3707 FIXME: cgraph_analyze can be told to actually record if function uses
3708 va_start so for local functions maybe_vaarg can be made aggressive
3710 FIXME: once typesytem is fixed, we won't need this code anymore. */
3712 fntype = TREE_TYPE (fndecl);
3713 cum->maybe_vaarg = (fntype
3714 ? (!prototype_p (fntype) || stdarg_p (fntype))
3719 /* If there are variable arguments, then we won't pass anything
3720 in registers in 32-bit mode. */
3721 if (stdarg_p (fntype))
3731 /* Use ecx and edx registers if function has fastcall attribute,
3732 else look for regparm information. */
3735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3741 cum->nregs = ix86_function_regparm (fntype, fndecl);
3744 /* Set up the number of SSE registers used for passing SFmode
3745 and DFmode arguments. Warn for mismatching ABI. */
3746 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3750 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3751 But in the case of vector types, it is some vector mode.
3753 When we have only some of our vector isa extensions enabled, then there
3754 are some modes for which vector_mode_supported_p is false. For these
3755 modes, the generic vector support in gcc will choose some non-vector mode
3756 in order to implement the type. By computing the natural mode, we'll
3757 select the proper ABI location for the operand and not depend on whatever
3758 the middle-end decides to do with these vector types. */
3760 static enum machine_mode
3761 type_natural_mode (const_tree type)
3763 enum machine_mode mode = TYPE_MODE (type);
3765 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if ((size == 8 || size == 16)
3769 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3770 && TYPE_VECTOR_SUBPARTS (type) > 1)
3772 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3774 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3775 mode = MIN_MODE_VECTOR_FLOAT;
3777 mode = MIN_MODE_VECTOR_INT;
3779 /* Get the mode which has this inner mode and number of units. */
3780 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3781 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3782 && GET_MODE_INNER (mode) == innermode)
3792 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3793 this may not agree with the mode that the type system has chosen for the
3794 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3795 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3798 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3803 if (orig_mode != BLKmode)
3804 tmp = gen_rtx_REG (orig_mode, regno);
3807 tmp = gen_rtx_REG (mode, regno);
3808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3809 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3815 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3816 of this code is to classify each 8bytes of incoming argument by the register
3817 class and assign registers accordingly. */
3819 /* Return the union class of CLASS1 and CLASS2.
3820 See the x86-64 PS ABI for details. */
3822 static enum x86_64_reg_class
3823 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3825 /* Rule #1: If both classes are equal, this is the resulting class. */
3826 if (class1 == class2)
3829 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3831 if (class1 == X86_64_NO_CLASS)
3833 if (class2 == X86_64_NO_CLASS)
3836 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3837 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3838 return X86_64_MEMORY_CLASS;
3840 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3841 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3842 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3843 return X86_64_INTEGERSI_CLASS;
3844 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3845 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3846 return X86_64_INTEGER_CLASS;
3848 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3850 if (class1 == X86_64_X87_CLASS
3851 || class1 == X86_64_X87UP_CLASS
3852 || class1 == X86_64_COMPLEX_X87_CLASS
3853 || class2 == X86_64_X87_CLASS
3854 || class2 == X86_64_X87UP_CLASS
3855 || class2 == X86_64_COMPLEX_X87_CLASS)
3856 return X86_64_MEMORY_CLASS;
3858 /* Rule #6: Otherwise class SSE is used. */
3859 return X86_64_SSE_CLASS;
3862 /* Classify the argument of type TYPE and mode MODE.
3863 CLASSES will be filled by the register class used to pass each word
3864 of the operand. The number of words is returned. In case the parameter
3865 should be passed in memory, 0 is returned. As a special case for zero
3866 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3868 BIT_OFFSET is used internally for handling records and specifies offset
3869 of the offset in bits modulo 256 to avoid overflow cases.
3871 See the x86-64 PS ABI for details.
3875 classify_argument (enum machine_mode mode, const_tree type,
3876 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3878 HOST_WIDE_INT bytes =
3879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3882 /* Variable sized entities are always passed/returned in memory. */
3886 if (mode != VOIDmode
3887 && targetm.calls.must_pass_in_stack (mode, type))
3890 if (type && AGGREGATE_TYPE_P (type))
3894 enum x86_64_reg_class subclasses[MAX_CLASSES];
3896 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3900 for (i = 0; i < words; i++)
3901 classes[i] = X86_64_NO_CLASS;
3903 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3904 signalize memory class, so handle it as special case. */
3907 classes[0] = X86_64_NO_CLASS;
3911 /* Classify each field of record and merge classes. */
3912 switch (TREE_CODE (type))
3915 /* And now merge the fields of structure. */
3916 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3918 if (TREE_CODE (field) == FIELD_DECL)
3922 if (TREE_TYPE (field) == error_mark_node)
3925 /* Bitfields are always classified as integer. Handle them
3926 early, since later code would consider them to be
3927 misaligned integers. */
3928 if (DECL_BIT_FIELD (field))
3930 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3931 i < ((int_bit_position (field) + (bit_offset % 64))
3932 + tree_low_cst (DECL_SIZE (field), 0)
3935 merge_classes (X86_64_INTEGER_CLASS,
3940 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3941 TREE_TYPE (field), subclasses,
3942 (int_bit_position (field)
3943 + bit_offset) % 256);
3946 for (i = 0; i < num; i++)
3949 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3951 merge_classes (subclasses[i], classes[i + pos]);
3959 /* Arrays are handled as small records. */
3962 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3963 TREE_TYPE (type), subclasses, bit_offset);
3967 /* The partial classes are now full classes. */
3968 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3969 subclasses[0] = X86_64_SSE_CLASS;
3970 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3971 subclasses[0] = X86_64_INTEGER_CLASS;
3973 for (i = 0; i < words; i++)
3974 classes[i] = subclasses[i % num];
3979 case QUAL_UNION_TYPE:
3980 /* Unions are similar to RECORD_TYPE but offset is always 0.
3982 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3984 if (TREE_CODE (field) == FIELD_DECL)
3988 if (TREE_TYPE (field) == error_mark_node)
3991 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3992 TREE_TYPE (field), subclasses,
3996 for (i = 0; i < num; i++)
3997 classes[i] = merge_classes (subclasses[i], classes[i]);
4006 /* Final merger cleanup. */
4007 for (i = 0; i < words; i++)
4009 /* If one class is MEMORY, everything should be passed in
4011 if (classes[i] == X86_64_MEMORY_CLASS)
4014 /* The X86_64_SSEUP_CLASS should be always preceded by
4015 X86_64_SSE_CLASS. */
4016 if (classes[i] == X86_64_SSEUP_CLASS
4017 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4018 classes[i] = X86_64_SSE_CLASS;
4020 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4021 if (classes[i] == X86_64_X87UP_CLASS
4022 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4023 classes[i] = X86_64_SSE_CLASS;
4028 /* Compute alignment needed. We align all types to natural boundaries with
4029 exception of XFmode that is aligned to 64bits. */
4030 if (mode != VOIDmode && mode != BLKmode)
4032 int mode_alignment = GET_MODE_BITSIZE (mode);
4035 mode_alignment = 128;
4036 else if (mode == XCmode)
4037 mode_alignment = 256;
4038 if (COMPLEX_MODE_P (mode))
4039 mode_alignment /= 2;
4040 /* Misaligned fields are always returned in memory. */
4041 if (bit_offset % mode_alignment)
4045 /* for V1xx modes, just use the base mode */
4046 if (VECTOR_MODE_P (mode) && mode != V1DImode
4047 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4048 mode = GET_MODE_INNER (mode);
4050 /* Classification of atomic types. */
4055 classes[0] = X86_64_SSE_CLASS;
4058 classes[0] = X86_64_SSE_CLASS;
4059 classes[1] = X86_64_SSEUP_CLASS;
4068 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4069 classes[0] = X86_64_INTEGERSI_CLASS;
4071 classes[0] = X86_64_INTEGER_CLASS;
4075 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4080 if (!(bit_offset % 64))
4081 classes[0] = X86_64_SSESF_CLASS;
4083 classes[0] = X86_64_SSE_CLASS;
4086 classes[0] = X86_64_SSEDF_CLASS;
4089 classes[0] = X86_64_X87_CLASS;
4090 classes[1] = X86_64_X87UP_CLASS;
4093 classes[0] = X86_64_SSE_CLASS;
4094 classes[1] = X86_64_SSEUP_CLASS;
4097 classes[0] = X86_64_SSE_CLASS;
4100 classes[0] = X86_64_SSEDF_CLASS;
4101 classes[1] = X86_64_SSEDF_CLASS;
4104 classes[0] = X86_64_COMPLEX_X87_CLASS;
4107 /* This modes is larger than 16 bytes. */
4115 classes[0] = X86_64_SSE_CLASS;
4116 classes[1] = X86_64_SSEUP_CLASS;
4123 classes[0] = X86_64_SSE_CLASS;
4129 gcc_assert (VECTOR_MODE_P (mode));
4134 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
4136 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4137 classes[0] = X86_64_INTEGERSI_CLASS;
4139 classes[0] = X86_64_INTEGER_CLASS;
4140 classes[1] = X86_64_INTEGER_CLASS;
4141 return 1 + (bytes > 8);
4145 /* Examine the argument and return set number of register required in each
4146 class. Return 0 iff parameter should be passed in memory. */
4148 examine_argument (enum machine_mode mode, const_tree type, int in_return,
4149 int *int_nregs, int *sse_nregs)
4151 enum x86_64_reg_class regclass[MAX_CLASSES];
4152 int n = classify_argument (mode, type, regclass, 0);
4158 for (n--; n >= 0; n--)
4159 switch (regclass[n])
4161 case X86_64_INTEGER_CLASS:
4162 case X86_64_INTEGERSI_CLASS:
4165 case X86_64_SSE_CLASS:
4166 case X86_64_SSESF_CLASS:
4167 case X86_64_SSEDF_CLASS:
4170 case X86_64_NO_CLASS:
4171 case X86_64_SSEUP_CLASS:
4173 case X86_64_X87_CLASS:
4174 case X86_64_X87UP_CLASS:
4178 case X86_64_COMPLEX_X87_CLASS:
4179 return in_return ? 2 : 0;
4180 case X86_64_MEMORY_CLASS:
4186 /* Construct container for the argument used by GCC interface. See
4187 FUNCTION_ARG for the detailed description. */
4190 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4191 const_tree type, int in_return, int nintregs, int nsseregs,
4192 const int *intreg, int sse_regno)
4194 /* The following variables hold the static issued_error state. */
4195 static bool issued_sse_arg_error;
4196 static bool issued_sse_ret_error;
4197 static bool issued_x87_ret_error;
4199 enum machine_mode tmpmode;
4201 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4202 enum x86_64_reg_class regclass[MAX_CLASSES];
4206 int needed_sseregs, needed_intregs;
4207 rtx exp[MAX_CLASSES];
4210 n = classify_argument (mode, type, regclass, 0);
4213 if (!examine_argument (mode, type, in_return, &needed_intregs,
4216 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4219 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4220 some less clueful developer tries to use floating-point anyway. */
4221 if (needed_sseregs && !TARGET_SSE)
4225 if (!issued_sse_ret_error)
4227 error ("SSE register return with SSE disabled");
4228 issued_sse_ret_error = true;
4231 else if (!issued_sse_arg_error)
4233 error ("SSE register argument with SSE disabled");
4234 issued_sse_arg_error = true;
4239 /* Likewise, error if the ABI requires us to return values in the
4240 x87 registers and the user specified -mno-80387. */
4241 if (!TARGET_80387 && in_return)
4242 for (i = 0; i < n; i++)
4243 if (regclass[i] == X86_64_X87_CLASS
4244 || regclass[i] == X86_64_X87UP_CLASS
4245 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4247 if (!issued_x87_ret_error)
4249 error ("x87 register return with x87 disabled");
4250 issued_x87_ret_error = true;
4255 /* First construct simple cases. Avoid SCmode, since we want to use
4256 single register to pass this type. */
4257 if (n == 1 && mode != SCmode)
4258 switch (regclass[0])
4260 case X86_64_INTEGER_CLASS:
4261 case X86_64_INTEGERSI_CLASS:
4262 return gen_rtx_REG (mode, intreg[0]);
4263 case X86_64_SSE_CLASS:
4264 case X86_64_SSESF_CLASS:
4265 case X86_64_SSEDF_CLASS:
4266 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4267 case X86_64_X87_CLASS:
4268 case X86_64_COMPLEX_X87_CLASS:
4269 return gen_rtx_REG (mode, FIRST_STACK_REG);
4270 case X86_64_NO_CLASS:
4271 /* Zero sized array, struct or class. */
4276 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4277 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4281 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4282 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4283 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4284 && regclass[1] == X86_64_INTEGER_CLASS
4285 && (mode == CDImode || mode == TImode || mode == TFmode)
4286 && intreg[0] + 1 == intreg[1])
4287 return gen_rtx_REG (mode, intreg[0]);
4289 /* Otherwise figure out the entries of the PARALLEL. */
4290 for (i = 0; i < n; i++)
4292 switch (regclass[i])
4294 case X86_64_NO_CLASS:
4296 case X86_64_INTEGER_CLASS:
4297 case X86_64_INTEGERSI_CLASS:
4298 /* Merge TImodes on aligned occasions here too. */
4299 if (i * 8 + 8 > bytes)
4300 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4301 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4305 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4306 if (tmpmode == BLKmode)
4308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4309 gen_rtx_REG (tmpmode, *intreg),
4313 case X86_64_SSESF_CLASS:
4314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4315 gen_rtx_REG (SFmode,
4316 SSE_REGNO (sse_regno)),
4320 case X86_64_SSEDF_CLASS:
4321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4322 gen_rtx_REG (DFmode,
4323 SSE_REGNO (sse_regno)),
4327 case X86_64_SSE_CLASS:
4328 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4333 gen_rtx_REG (tmpmode,
4334 SSE_REGNO (sse_regno)),
4336 if (tmpmode == TImode)
4345 /* Empty aligned struct, union or class. */
4349 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4350 for (i = 0; i < nexps; i++)
4351 XVECEXP (ret, 0, i) = exp [i];
4355 /* Update the data in CUM to advance over an argument of mode MODE
4356 and data type TYPE. (TYPE is null for libcalls where that information
4357 may not be available.) */
4360 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4361 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4377 cum->words += words;
4378 cum->nregs -= words;
4379 cum->regno += words;
4381 if (cum->nregs <= 0)
4389 if (cum->float_in_sse < 2)
4392 if (cum->float_in_sse < 1)
4403 if (!type || !AGGREGATE_TYPE_P (type))
4405 cum->sse_words += words;
4406 cum->sse_nregs -= 1;
4407 cum->sse_regno += 1;
4408 if (cum->sse_nregs <= 0)
4421 if (!type || !AGGREGATE_TYPE_P (type))
4423 cum->mmx_words += words;
4424 cum->mmx_nregs -= 1;
4425 cum->mmx_regno += 1;
4426 if (cum->mmx_nregs <= 0)
4437 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438 tree type, HOST_WIDE_INT words)
4440 int int_nregs, sse_nregs;
4442 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4443 cum->words += words;
4444 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4446 cum->nregs -= int_nregs;
4447 cum->sse_nregs -= sse_nregs;
4448 cum->regno += int_nregs;
4449 cum->sse_regno += sse_nregs;
4452 cum->words += words;
4456 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4457 HOST_WIDE_INT words)
4459 /* Otherwise, this should be passed indirect. */
4460 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4462 cum->words += words;
4471 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4472 tree type, int named ATTRIBUTE_UNUSED)
4474 HOST_WIDE_INT bytes, words;
4476 if (mode == BLKmode)
4477 bytes = int_size_in_bytes (type);
4479 bytes = GET_MODE_SIZE (mode);
4480 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4483 mode = type_natural_mode (type);
4485 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4486 function_arg_advance_ms_64 (cum, bytes, words);
4487 else if (TARGET_64BIT)
4488 function_arg_advance_64 (cum, mode, type, words);
4490 function_arg_advance_32 (cum, mode, type, bytes, words);
4493 /* Define where to put the arguments to a function.
4494 Value is zero to push the argument on the stack,
4495 or a hard register in which to store the argument.
4497 MODE is the argument's machine mode.
4498 TYPE is the data type of the argument (as a tree).
4499 This is null for libcalls where that information may
4501 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4502 the preceding args and about the function being called.
4503 NAMED is nonzero if this argument is a named parameter
4504 (otherwise it is an extra parameter matching an ellipsis). */
4507 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4508 enum machine_mode orig_mode, tree type,
4509 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4511 static bool warnedsse, warnedmmx;
4513 /* Avoid the AL settings for the Unix64 ABI. */
4514 if (mode == VOIDmode)
4530 if (words <= cum->nregs)
4532 int regno = cum->regno;
4534 /* Fastcall allocates the first two DWORD (SImode) or
4535 smaller arguments to ECX and EDX if it isn't an
4541 || (type && AGGREGATE_TYPE_P (type)))
4544 /* ECX not EAX is the first allocated register. */
4545 if (regno == AX_REG)
4548 return gen_rtx_REG (mode, regno);
4553 if (cum->float_in_sse < 2)
4556 if (cum->float_in_sse < 1)
4566 if (!type || !AGGREGATE_TYPE_P (type))
4568 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4571 warning (0, "SSE vector argument without SSE enabled "
4575 return gen_reg_or_parallel (mode, orig_mode,
4576 cum->sse_regno + FIRST_SSE_REG);
4585 if (!type || !AGGREGATE_TYPE_P (type))
4587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4590 warning (0, "MMX vector argument without MMX enabled "
4594 return gen_reg_or_parallel (mode, orig_mode,
4595 cum->mmx_regno + FIRST_MMX_REG);
4604 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4605 enum machine_mode orig_mode, tree type)
4607 /* Handle a hidden AL argument containing number of registers
4608 for varargs x86-64 functions. */
4609 if (mode == VOIDmode)
4610 return GEN_INT (cum->maybe_vaarg
4611 ? (cum->sse_nregs < 0
4612 ? (cum->call_abi == DEFAULT_ABI
4614 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4615 : X64_SSE_REGPARM_MAX))
4619 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4621 &x86_64_int_parameter_registers [cum->regno],
4626 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4627 enum machine_mode orig_mode, int named,
4628 HOST_WIDE_INT bytes)
4632 /* Avoid the AL settings for the Unix64 ABI. */
4633 if (mode == VOIDmode)
4636 /* If we've run out of registers, it goes on the stack. */
4637 if (cum->nregs == 0)
4640 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4642 /* Only floating point modes are passed in anything but integer regs. */
4643 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4646 regno = cum->regno + FIRST_SSE_REG;
4651 /* Unnamed floating parameters are passed in both the
4652 SSE and integer registers. */
4653 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4654 t2 = gen_rtx_REG (mode, regno);
4655 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4656 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4657 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4660 /* Handle aggregated types passed in register. */
4661 if (orig_mode == BLKmode)
4663 if (bytes > 0 && bytes <= 8)
4664 mode = (bytes > 4 ? DImode : SImode);
4665 if (mode == BLKmode)
4669 return gen_reg_or_parallel (mode, orig_mode, regno);
4673 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4674 tree type, int named)
4676 enum machine_mode mode = omode;
4677 HOST_WIDE_INT bytes, words;
4679 if (mode == BLKmode)
4680 bytes = int_size_in_bytes (type);
4682 bytes = GET_MODE_SIZE (mode);
4683 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4685 /* To simplify the code below, represent vector types with a vector mode
4686 even if MMX/SSE are not active. */
4687 if (type && TREE_CODE (type) == VECTOR_TYPE)
4688 mode = type_natural_mode (type);
4690 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4691 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4692 else if (TARGET_64BIT)
4693 return function_arg_64 (cum, mode, omode, type);
4695 return function_arg_32 (cum, mode, omode, type, bytes, words);
4698 /* A C expression that indicates when an argument must be passed by
4699 reference. If nonzero for an argument, a copy of that argument is
4700 made in memory and a pointer to the argument is passed instead of
4701 the argument itself. The pointer is passed in whatever way is
4702 appropriate for passing a pointer to that type. */
4705 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4706 enum machine_mode mode ATTRIBUTE_UNUSED,
4707 const_tree type, bool named ATTRIBUTE_UNUSED)
4709 /* See Windows x64 Software Convention. */
4710 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4712 int msize = (int) GET_MODE_SIZE (mode);
4715 /* Arrays are passed by reference. */
4716 if (TREE_CODE (type) == ARRAY_TYPE)
4719 if (AGGREGATE_TYPE_P (type))
4721 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4722 are passed by reference. */
4723 msize = int_size_in_bytes (type);
4727 /* __m128 is passed by reference. */
4729 case 1: case 2: case 4: case 8:
4735 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4741 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4744 contains_aligned_value_p (tree type)
4746 enum machine_mode mode = TYPE_MODE (type);
4747 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
4750 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4752 if (TYPE_ALIGN (type) < 128)
4755 if (AGGREGATE_TYPE_P (type))
4757 /* Walk the aggregates recursively. */
4758 switch (TREE_CODE (type))
4762 case QUAL_UNION_TYPE:
4766 /* Walk all the structure fields. */
4767 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4769 if (TREE_CODE (field) == FIELD_DECL
4770 && contains_aligned_value_p (TREE_TYPE (field)))
4777 /* Just for use if some languages passes arrays by value. */
4778 if (contains_aligned_value_p (TREE_TYPE (type)))
4789 /* Gives the alignment boundary, in bits, of an argument with the
4790 specified mode and type. */
4793 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4798 /* Since canonical type is used for call, we convert it to
4799 canonical type if needed. */
4800 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4801 type = TYPE_CANONICAL (type);
4802 align = TYPE_ALIGN (type);
4805 align = GET_MODE_ALIGNMENT (mode);
4806 if (align < PARM_BOUNDARY)
4807 align = PARM_BOUNDARY;
4808 /* In 32bit, only _Decimal128 and __float128 are aligned to their
4809 natural boundaries. */
4810 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
4812 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4813 make an exception for SSE modes since these require 128bit
4816 The handling here differs from field_alignment. ICC aligns MMX
4817 arguments to 4 byte boundaries, while structure fields are aligned
4818 to 8 byte boundaries. */
4821 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
4822 align = PARM_BOUNDARY;
4826 if (!contains_aligned_value_p (type))
4827 align = PARM_BOUNDARY;
4830 if (align > BIGGEST_ALIGNMENT)
4831 align = BIGGEST_ALIGNMENT;
4835 /* Return true if N is a possible register number of function value. */
4838 ix86_function_value_regno_p (int regno)
4845 case FIRST_FLOAT_REG:
4846 /* TODO: The function should depend on current function ABI but
4847 builtins.c would need updating then. Therefore we use the
4849 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
4851 return TARGET_FLOAT_RETURNS_IN_80387;
4857 if (TARGET_MACHO || TARGET_64BIT)
4865 /* Define how to find the value returned by a function.
4866 VALTYPE is the data type of the value (as a tree).
4867 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4868 otherwise, FUNC is 0. */
4871 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4872 const_tree fntype, const_tree fn)
4876 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4877 we normally prevent this case when mmx is not available. However
4878 some ABIs may require the result to be returned like DImode. */
4879 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4880 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4882 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4883 we prevent this case when sse is not available. However some ABIs
4884 may require the result to be returned like integer TImode. */
4885 else if (mode == TImode
4886 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4887 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4889 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4890 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4891 regno = FIRST_FLOAT_REG;
4893 /* Most things go in %eax. */
4896 /* Override FP return register with %xmm0 for local functions when
4897 SSE math is enabled or for functions with sseregparm attribute. */
4898 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4900 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4901 if ((sse_level >= 1 && mode == SFmode)
4902 || (sse_level == 2 && mode == DFmode))
4903 regno = FIRST_SSE_REG;
4906 return gen_rtx_REG (orig_mode, regno);
4910 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4915 /* Handle libcalls, which don't provide a type node. */
4916 if (valtype == NULL)
4928 return gen_rtx_REG (mode, FIRST_SSE_REG);
4931 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4935 return gen_rtx_REG (mode, AX_REG);
4939 ret = construct_container (mode, orig_mode, valtype, 1,
4940 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4941 x86_64_int_return_registers, 0);
4943 /* For zero sized structures, construct_container returns NULL, but we
4944 need to keep rest of compiler happy by returning meaningful value. */
4946 ret = gen_rtx_REG (orig_mode, AX_REG);
4952 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4954 unsigned int regno = AX_REG;
4958 switch (GET_MODE_SIZE (mode))
4961 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4962 && !COMPLEX_MODE_P (mode))
4963 regno = FIRST_SSE_REG;
4967 if (mode == SFmode || mode == DFmode)
4968 regno = FIRST_SSE_REG;
4974 return gen_rtx_REG (orig_mode, regno);
4978 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4979 enum machine_mode orig_mode, enum machine_mode mode)
4981 const_tree fn, fntype;
4984 if (fntype_or_decl && DECL_P (fntype_or_decl))
4985 fn = fntype_or_decl;
4986 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4988 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
4989 return function_value_ms_64 (orig_mode, mode);
4990 else if (TARGET_64BIT)
4991 return function_value_64 (orig_mode, mode, valtype);
4993 return function_value_32 (orig_mode, mode, fntype, fn);
4997 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4998 bool outgoing ATTRIBUTE_UNUSED)
5000 enum machine_mode mode, orig_mode;
5002 orig_mode = TYPE_MODE (valtype);
5003 mode = type_natural_mode (valtype);
5004 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5008 ix86_libcall_value (enum machine_mode mode)
5010 return ix86_function_value_1 (NULL, NULL, mode, mode);
5013 /* Return true iff type is returned in memory. */
5015 static int ATTRIBUTE_UNUSED
5016 return_in_memory_32 (const_tree type, enum machine_mode mode)
5020 if (mode == BLKmode)
5023 size = int_size_in_bytes (type);
5025 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5028 if (VECTOR_MODE_P (mode) || mode == TImode)
5030 /* User-created vectors small enough to fit in EAX. */
5034 /* MMX/3dNow values are returned in MM0,
5035 except when it doesn't exits. */
5037 return (TARGET_MMX ? 0 : 1);
5039 /* SSE values are returned in XMM0, except when it doesn't exist. */
5041 return (TARGET_SSE ? 0 : 1);
5052 static int ATTRIBUTE_UNUSED
5053 return_in_memory_64 (const_tree type, enum machine_mode mode)
5055 int needed_intregs, needed_sseregs;
5056 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5059 static int ATTRIBUTE_UNUSED
5060 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5062 HOST_WIDE_INT size = int_size_in_bytes (type);
5064 /* __m128 is returned in xmm0. */
5065 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5066 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5069 /* Otherwise, the size must be exactly in [1248]. */
5070 return (size != 1 && size != 2 && size != 4 && size != 8);
5074 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5076 #ifdef SUBTARGET_RETURN_IN_MEMORY
5077 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5079 const enum machine_mode mode = type_natural_mode (type);
5081 if (TARGET_64BIT_MS_ABI)
5082 return return_in_memory_ms_64 (type, mode);
5083 else if (TARGET_64BIT)
5084 return return_in_memory_64 (type, mode);
5086 return return_in_memory_32 (type, mode);
5090 /* Return false iff TYPE is returned in memory. This version is used
5091 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5092 but differs notably in that when MMX is available, 8-byte vectors
5093 are returned in memory, rather than in MMX registers. */
5096 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5099 enum machine_mode mode = type_natural_mode (type);
5102 return return_in_memory_64 (type, mode);
5104 if (mode == BLKmode)
5107 size = int_size_in_bytes (type);
5109 if (VECTOR_MODE_P (mode))
5111 /* Return in memory only if MMX registers *are* available. This
5112 seems backwards, but it is consistent with the existing
5119 else if (mode == TImode)
5121 else if (mode == XFmode)
5127 /* When returning SSE vector types, we have a choice of either
5128 (1) being abi incompatible with a -march switch, or
5129 (2) generating an error.
5130 Given no good solution, I think the safest thing is one warning.
5131 The user won't be able to use -Werror, but....
5133 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5134 called in response to actually generating a caller or callee that
5135 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
5136 via aggregate_value_p for general type probing from tree-ssa. */
5139 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5141 static bool warnedsse, warnedmmx;
5143 if (!TARGET_64BIT && type)
5145 /* Look at the return type of the function, not the function type. */
5146 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5148 if (!TARGET_SSE && !warnedsse)
5151 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5154 warning (0, "SSE vector return without SSE enabled "
5159 if (!TARGET_MMX && !warnedmmx)
5161 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5164 warning (0, "MMX vector return without MMX enabled "
5174 /* Create the va_list data type. */
5177 ix86_build_builtin_va_list (void)
5179 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5181 /* For i386 we use plain pointer to argument area. */
5182 if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
5183 return build_pointer_type (char_type_node);
5185 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5186 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5188 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5189 unsigned_type_node);
5190 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5191 unsigned_type_node);
5192 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5194 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5197 va_list_gpr_counter_field = f_gpr;
5198 va_list_fpr_counter_field = f_fpr;
5200 DECL_FIELD_CONTEXT (f_gpr) = record;
5201 DECL_FIELD_CONTEXT (f_fpr) = record;
5202 DECL_FIELD_CONTEXT (f_ovf) = record;
5203 DECL_FIELD_CONTEXT (f_sav) = record;
5205 TREE_CHAIN (record) = type_decl;
5206 TYPE_NAME (record) = type_decl;
5207 TYPE_FIELDS (record) = f_gpr;
5208 TREE_CHAIN (f_gpr) = f_fpr;
5209 TREE_CHAIN (f_fpr) = f_ovf;
5210 TREE_CHAIN (f_ovf) = f_sav;
5212 layout_type (record);
5214 /* The correct type is an array type of one element. */
5215 return build_array_type (record, build_index_type (size_zero_node));
5218 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5221 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5230 int regparm = ix86_regparm;
5232 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5233 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
5235 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5238 /* Indicate to allocate space on the stack for varargs save area. */
5239 ix86_save_varrargs_registers = 1;
5240 /* We need 16-byte stack alignment to save SSE registers. If user
5241 asked for lower preferred_stack_boundary, lets just hope that he knows
5242 what he is doing and won't varargs SSE values.
5244 We also may end up assuming that only 64bit values are stored in SSE
5245 register let some floating point program work. */
5246 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5247 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5249 save_area = frame_pointer_rtx;
5250 set = get_varargs_alias_set ();
5252 for (i = cum->regno;
5254 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5257 mem = gen_rtx_MEM (Pmode,
5258 plus_constant (save_area, i * UNITS_PER_WORD));
5259 MEM_NOTRAP_P (mem) = 1;
5260 set_mem_alias_set (mem, set);
5261 emit_move_insn (mem, gen_rtx_REG (Pmode,
5262 x86_64_int_parameter_registers[i]));
5265 if (cum->sse_nregs && cfun->va_list_fpr_size)
5267 /* Now emit code to save SSE registers. The AX parameter contains number
5268 of SSE parameter registers used to call this function. We use
5269 sse_prologue_save insn template that produces computed jump across
5270 SSE saves. We need some preparation work to get this working. */
5272 label = gen_label_rtx ();
5273 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5275 /* Compute address to jump to :
5276 label - eax*4 + nnamed_sse_arguments*4 */
5277 tmp_reg = gen_reg_rtx (Pmode);
5278 nsse_reg = gen_reg_rtx (Pmode);
5279 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5280 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5281 gen_rtx_MULT (Pmode, nsse_reg,
5286 gen_rtx_CONST (DImode,
5287 gen_rtx_PLUS (DImode,
5289 GEN_INT (cum->sse_regno * 4))));
5291 emit_move_insn (nsse_reg, label_ref);
5292 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5294 /* Compute address of memory block we save into. We always use pointer
5295 pointing 127 bytes after first byte to store - this is needed to keep
5296 instruction size limited by 4 bytes. */
5297 tmp_reg = gen_reg_rtx (Pmode);
5298 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5299 plus_constant (save_area,
5300 8 * X86_64_REGPARM_MAX + 127)));
5301 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5302 MEM_NOTRAP_P (mem) = 1;
5303 set_mem_alias_set (mem, set);
5304 set_mem_align (mem, BITS_PER_WORD);
5306 /* And finally do the dirty job! */
5307 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5308 GEN_INT (cum->sse_regno), label));
5313 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5315 alias_set_type set = get_varargs_alias_set ();
5318 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
5322 mem = gen_rtx_MEM (Pmode,
5323 plus_constant (virtual_incoming_args_rtx,
5324 i * UNITS_PER_WORD));
5325 MEM_NOTRAP_P (mem) = 1;
5326 set_mem_alias_set (mem, set);
5328 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5329 emit_move_insn (mem, reg);
5334 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5335 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5338 CUMULATIVE_ARGS next_cum;
5341 /* This argument doesn't appear to be used anymore. Which is good,
5342 because the old code here didn't suppress rtl generation. */
5343 gcc_assert (!no_rtl);
5348 fntype = TREE_TYPE (current_function_decl);
5350 /* For varargs, we do not want to skip the dummy va_dcl argument.
5351 For stdargs, we do want to skip the last named argument. */
5353 if (stdarg_p (fntype))
5354 function_arg_advance (&next_cum, mode, type, 1);
5356 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5357 setup_incoming_varargs_ms_64 (&next_cum);
5359 setup_incoming_varargs_64 (&next_cum);
5362 /* Implement va_start. */
5365 ix86_va_start (tree valist, rtx nextarg)
5367 HOST_WIDE_INT words, n_gpr, n_fpr;
5368 tree f_gpr, f_fpr, f_ovf, f_sav;
5369 tree gpr, fpr, ovf, sav, t;
5372 /* Only 64bit target needs something special. */
5373 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5375 std_expand_builtin_va_start (valist, nextarg);
5379 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5380 f_fpr = TREE_CHAIN (f_gpr);
5381 f_ovf = TREE_CHAIN (f_fpr);
5382 f_sav = TREE_CHAIN (f_ovf);
5384 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5385 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5386 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5387 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5388 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5390 /* Count number of gp and fp argument registers used. */
5391 words = crtl->args.info.words;
5392 n_gpr = crtl->args.info.regno;
5393 n_fpr = crtl->args.info.sse_regno;
5395 if (cfun->va_list_gpr_size)
5397 type = TREE_TYPE (gpr);
5398 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5399 build_int_cst (type, n_gpr * 8));
5400 TREE_SIDE_EFFECTS (t) = 1;
5401 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5404 if (cfun->va_list_fpr_size)
5406 type = TREE_TYPE (fpr);
5407 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5408 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
5409 TREE_SIDE_EFFECTS (t) = 1;
5410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5413 /* Find the overflow area. */
5414 type = TREE_TYPE (ovf);
5415 t = make_tree (type, virtual_incoming_args_rtx);
5417 t = build2 (POINTER_PLUS_EXPR, type, t,
5418 size_int (words * UNITS_PER_WORD));
5419 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5420 TREE_SIDE_EFFECTS (t) = 1;
5421 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5423 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5425 /* Find the register save area.
5426 Prologue of the function save it right above stack frame. */
5427 type = TREE_TYPE (sav);
5428 t = make_tree (type, frame_pointer_rtx);
5429 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5430 TREE_SIDE_EFFECTS (t) = 1;
5431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5435 /* Implement va_arg. */
5438 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5440 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5441 tree f_gpr, f_fpr, f_ovf, f_sav;
5442 tree gpr, fpr, ovf, sav, t;
5444 tree lab_false, lab_over = NULL_TREE;
5449 enum machine_mode nat_mode;
5451 /* Only 64bit target needs something special. */
5452 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5453 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5455 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5456 f_fpr = TREE_CHAIN (f_gpr);
5457 f_ovf = TREE_CHAIN (f_fpr);
5458 f_sav = TREE_CHAIN (f_ovf);
5460 valist = build_va_arg_indirect_ref (valist);
5461 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5462 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5463 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5464 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5466 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5468 type = build_pointer_type (type);
5469 size = int_size_in_bytes (type);
5470 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5472 nat_mode = type_natural_mode (type);
5473 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5474 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5477 /* Pull the value out of the saved registers. */
5479 addr = create_tmp_var (ptr_type_node, "addr");
5480 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5484 int needed_intregs, needed_sseregs;
5486 tree int_addr, sse_addr;
5488 lab_false = create_artificial_label ();
5489 lab_over = create_artificial_label ();
5491 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5493 need_temp = (!REG_P (container)
5494 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5495 || TYPE_ALIGN (type) > 128));
5497 /* In case we are passing structure, verify that it is consecutive block
5498 on the register save area. If not we need to do moves. */
5499 if (!need_temp && !REG_P (container))
5501 /* Verify that all registers are strictly consecutive */
5502 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5506 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5508 rtx slot = XVECEXP (container, 0, i);
5509 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5510 || INTVAL (XEXP (slot, 1)) != i * 16)
5518 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5520 rtx slot = XVECEXP (container, 0, i);
5521 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5522 || INTVAL (XEXP (slot, 1)) != i * 8)
5534 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5535 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5536 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5537 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5540 /* First ensure that we fit completely in registers. */
5543 t = build_int_cst (TREE_TYPE (gpr),
5544 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5545 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5546 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5547 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5548 gimplify_and_add (t, pre_p);
5552 t = build_int_cst (TREE_TYPE (fpr),
5553 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5554 + X86_64_REGPARM_MAX * 8);
5555 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5556 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5557 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5558 gimplify_and_add (t, pre_p);
5561 /* Compute index to start of area used for integer regs. */
5564 /* int_addr = gpr + sav; */
5565 t = fold_convert (sizetype, gpr);
5566 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5567 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5568 gimplify_and_add (t, pre_p);
5572 /* sse_addr = fpr + sav; */
5573 t = fold_convert (sizetype, fpr);
5574 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5575 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5576 gimplify_and_add (t, pre_p);
5581 tree temp = create_tmp_var (type, "va_arg_tmp");
5584 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5585 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5586 gimplify_and_add (t, pre_p);
5588 for (i = 0; i < XVECLEN (container, 0); i++)
5590 rtx slot = XVECEXP (container, 0, i);
5591 rtx reg = XEXP (slot, 0);
5592 enum machine_mode mode = GET_MODE (reg);
5593 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5594 tree addr_type = build_pointer_type (piece_type);
5597 tree dest_addr, dest;
5599 if (SSE_REGNO_P (REGNO (reg)))
5601 src_addr = sse_addr;
5602 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5606 src_addr = int_addr;
5607 src_offset = REGNO (reg) * 8;
5609 src_addr = fold_convert (addr_type, src_addr);
5610 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5611 size_int (src_offset));
5612 src = build_va_arg_indirect_ref (src_addr);
5614 dest_addr = fold_convert (addr_type, addr);
5615 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5616 size_int (INTVAL (XEXP (slot, 1))));
5617 dest = build_va_arg_indirect_ref (dest_addr);
5619 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5620 gimplify_and_add (t, pre_p);
5626 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5627 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5628 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5629 gimplify_and_add (t, pre_p);
5633 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5634 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5635 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5636 gimplify_and_add (t, pre_p);
5639 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5640 gimplify_and_add (t, pre_p);
5642 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5643 append_to_statement_list (t, pre_p);
5646 /* ... otherwise out of the overflow area. */
5648 /* Care for on-stack alignment if needed. */
5649 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5650 || integer_zerop (TYPE_SIZE (type)))
5654 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5655 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5656 size_int (align - 1));
5657 t = fold_convert (sizetype, t);
5658 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5660 t = fold_convert (TREE_TYPE (ovf), t);
5662 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5664 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5665 gimplify_and_add (t2, pre_p);
5667 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5668 size_int (rsize * UNITS_PER_WORD));
5669 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5670 gimplify_and_add (t, pre_p);
5674 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5675 append_to_statement_list (t, pre_p);
5678 ptrtype = build_pointer_type (type);
5679 addr = fold_convert (ptrtype, addr);
5682 addr = build_va_arg_indirect_ref (addr);
5683 return build_va_arg_indirect_ref (addr);
5686 /* Return nonzero if OPNUM's MEM should be matched
5687 in movabs* patterns. */
5690 ix86_check_movabs (rtx insn, int opnum)
5694 set = PATTERN (insn);
5695 if (GET_CODE (set) == PARALLEL)
5696 set = XVECEXP (set, 0, 0);
5697 gcc_assert (GET_CODE (set) == SET);
5698 mem = XEXP (set, opnum);
5699 while (GET_CODE (mem) == SUBREG)
5700 mem = SUBREG_REG (mem);
5701 gcc_assert (MEM_P (mem));
5702 return (volatile_ok || !MEM_VOLATILE_P (mem));
5705 /* Initialize the table of extra 80387 mathematical constants. */
5708 init_ext_80387_constants (void)
5710 static const char * cst[5] =
5712 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5713 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5714 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5715 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5716 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5720 for (i = 0; i < 5; i++)
5722 real_from_string (&ext_80387_constants_table[i], cst[i]);
5723 /* Ensure each constant is rounded to XFmode precision. */
5724 real_convert (&ext_80387_constants_table[i],
5725 XFmode, &ext_80387_constants_table[i]);
5728 ext_80387_constants_init = 1;
5731 /* Return true if the constant is something that can be loaded with
5732 a special instruction. */
5735 standard_80387_constant_p (rtx x)
5737 enum machine_mode mode = GET_MODE (x);
5741 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5744 if (x == CONST0_RTX (mode))
5746 if (x == CONST1_RTX (mode))
5749 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5751 /* For XFmode constants, try to find a special 80387 instruction when
5752 optimizing for size or on those CPUs that benefit from them. */
5754 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5758 if (! ext_80387_constants_init)
5759 init_ext_80387_constants ();
5761 for (i = 0; i < 5; i++)
5762 if (real_identical (&r, &ext_80387_constants_table[i]))
5766 /* Load of the constant -0.0 or -1.0 will be split as
5767 fldz;fchs or fld1;fchs sequence. */
5768 if (real_isnegzero (&r))
5770 if (real_identical (&r, &dconstm1))
5776 /* Return the opcode of the special instruction to be used to load
5780 standard_80387_constant_opcode (rtx x)
5782 switch (standard_80387_constant_p (x))
5806 /* Return the CONST_DOUBLE representing the 80387 constant that is
5807 loaded by the specified special instruction. The argument IDX
5808 matches the return value from standard_80387_constant_p. */
5811 standard_80387_constant_rtx (int idx)
5815 if (! ext_80387_constants_init)
5816 init_ext_80387_constants ();
5832 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5836 /* Return 1 if mode is a valid mode for sse. */
5838 standard_sse_mode_p (enum machine_mode mode)
5855 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5858 standard_sse_constant_p (rtx x)
5860 enum machine_mode mode = GET_MODE (x);
5862 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5864 if (vector_all_ones_operand (x, mode)
5865 && standard_sse_mode_p (mode))
5866 return TARGET_SSE2 ? 2 : -1;
5871 /* Return the opcode of the special instruction to be used to load
5875 standard_sse_constant_opcode (rtx insn, rtx x)
5877 switch (standard_sse_constant_p (x))
5880 if (get_attr_mode (insn) == MODE_V4SF)
5881 return "xorps\t%0, %0";
5882 else if (get_attr_mode (insn) == MODE_V2DF)
5883 return "xorpd\t%0, %0";
5885 return "pxor\t%0, %0";
5887 return "pcmpeqd\t%0, %0";
5892 /* Returns 1 if OP contains a symbol reference */
5895 symbolic_reference_mentioned_p (rtx op)
5900 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5903 fmt = GET_RTX_FORMAT (GET_CODE (op));
5904 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5910 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5911 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5915 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5922 /* Return 1 if it is appropriate to emit `ret' instructions in the
5923 body of a function. Do this only if the epilogue is simple, needing a
5924 couple of insns. Prior to reloading, we can't tell how many registers
5925 must be saved, so return 0 then. Return 0 if there is no frame
5926 marker to de-allocate. */
5929 ix86_can_use_return_insn_p (void)
5931 struct ix86_frame frame;
5933 if (! reload_completed || frame_pointer_needed)
5936 /* Don't allow more than 32 pop, since that's all we can do
5937 with one instruction. */
5938 if (crtl->args.pops_args
5939 && crtl->args.size >= 32768)
5942 ix86_compute_frame_layout (&frame);
5943 return frame.to_allocate == 0 && frame.nregs == 0;
5946 /* Value should be nonzero if functions must have frame pointers.
5947 Zero means the frame pointer need not be set up (and parms may
5948 be accessed via the stack pointer) in functions that seem suitable. */
5951 ix86_frame_pointer_required (void)
5953 /* If we accessed previous frames, then the generated code expects
5954 to be able to access the saved ebp value in our frame. */
5955 if (cfun->machine->accesses_prev_frame)
5958 /* Several x86 os'es need a frame pointer for other reasons,
5959 usually pertaining to setjmp. */
5960 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5963 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5964 the frame pointer by default. Turn it back on now if we've not
5965 got a leaf function. */
5966 if (TARGET_OMIT_LEAF_FRAME_POINTER
5967 && (!current_function_is_leaf
5968 || ix86_current_function_calls_tls_descriptor))
5977 /* Record that the current function accesses previous call frames. */
5980 ix86_setup_frame_addresses (void)
5982 cfun->machine->accesses_prev_frame = 1;
5985 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5986 # define USE_HIDDEN_LINKONCE 1
5988 # define USE_HIDDEN_LINKONCE 0
5991 static int pic_labels_used;
5993 /* Fills in the label name that should be used for a pc thunk for
5994 the given register. */
5997 get_pc_thunk_name (char name[32], unsigned int regno)
5999 gcc_assert (!TARGET_64BIT);
6001 if (USE_HIDDEN_LINKONCE)
6002 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6004 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6008 /* This function generates code for -fpic that loads %ebx with
6009 the return address of the caller and then returns. */
6012 ix86_file_end (void)
6017 for (regno = 0; regno < 8; ++regno)
6021 if (! ((pic_labels_used >> regno) & 1))
6024 get_pc_thunk_name (name, regno);
6029 switch_to_section (darwin_sections[text_coal_section]);
6030 fputs ("\t.weak_definition\t", asm_out_file);
6031 assemble_name (asm_out_file, name);
6032 fputs ("\n\t.private_extern\t", asm_out_file);
6033 assemble_name (asm_out_file, name);
6034 fputs ("\n", asm_out_file);
6035 ASM_OUTPUT_LABEL (asm_out_file, name);
6039 if (USE_HIDDEN_LINKONCE)
6043 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6045 TREE_PUBLIC (decl) = 1;
6046 TREE_STATIC (decl) = 1;
6047 DECL_ONE_ONLY (decl) = 1;
6049 (*targetm.asm_out.unique_section) (decl, 0);
6050 switch_to_section (get_named_section (decl, NULL, 0));
6052 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6053 fputs ("\t.hidden\t", asm_out_file);
6054 assemble_name (asm_out_file, name);
6055 fputc ('\n', asm_out_file);
6056 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6060 switch_to_section (text_section);
6061 ASM_OUTPUT_LABEL (asm_out_file, name);
6064 xops[0] = gen_rtx_REG (Pmode, regno);
6065 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6066 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6067 output_asm_insn ("ret", xops);
6070 if (NEED_INDICATE_EXEC_STACK)
6071 file_end_indicate_exec_stack ();
6074 /* Emit code for the SET_GOT patterns. */
6077 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
6083 if (TARGET_VXWORKS_RTP && flag_pic)
6085 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6086 xops[2] = gen_rtx_MEM (Pmode,
6087 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6088 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6090 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6091 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6092 an unadorned address. */
6093 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6094 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6095 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6099 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6101 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
6103 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6106 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6108 output_asm_insn ("call\t%a2", xops);
6111 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6112 is what will be referenced by the Mach-O PIC subsystem. */
6114 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6117 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6118 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6121 output_asm_insn ("pop%z0\t%0", xops);
6126 get_pc_thunk_name (name, REGNO (dest));
6127 pic_labels_used |= 1 << REGNO (dest);
6129 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6130 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6131 output_asm_insn ("call\t%X2", xops);
6132 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6133 is what will be referenced by the Mach-O PIC subsystem. */
6136 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6138 targetm.asm_out.internal_label (asm_out_file, "L",
6139 CODE_LABEL_NUMBER (label));
6146 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
6147 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6149 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6154 /* Generate an "push" pattern for input ARG. */
6159 return gen_rtx_SET (VOIDmode,
6161 gen_rtx_PRE_DEC (Pmode,
6162 stack_pointer_rtx)),
6166 /* Return >= 0 if there is an unused call-clobbered register available
6167 for the entire function. */
6170 ix86_select_alt_pic_regnum (void)
6172 if (current_function_is_leaf && !crtl->profile
6173 && !ix86_current_function_calls_tls_descriptor)
6176 for (i = 2; i >= 0; --i)
6177 if (!df_regs_ever_live_p (i))
6181 return INVALID_REGNUM;
6184 /* Return 1 if we need to save REGNO. */
6186 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6188 if (pic_offset_table_rtx
6189 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6190 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6192 || crtl->calls_eh_return
6193 || crtl->uses_const_pool))
6195 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6200 if (crtl->calls_eh_return && maybe_eh_return)
6205 unsigned test = EH_RETURN_DATA_REGNO (i);
6206 if (test == INVALID_REGNUM)
6213 if (cfun->machine->force_align_arg_pointer
6214 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6217 return (df_regs_ever_live_p (regno)
6218 && !call_used_regs[regno]
6219 && !fixed_regs[regno]
6220 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6223 /* Return number of registers to be saved on the stack. */
6226 ix86_nsaved_regs (void)
6231 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6232 if (ix86_save_reg (regno, true))
6237 /* Return the offset between two registers, one to be eliminated, and the other
6238 its replacement, at the start of a routine. */
6241 ix86_initial_elimination_offset (int from, int to)
6243 struct ix86_frame frame;
6244 ix86_compute_frame_layout (&frame);
6246 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6247 return frame.hard_frame_pointer_offset;
6248 else if (from == FRAME_POINTER_REGNUM
6249 && to == HARD_FRAME_POINTER_REGNUM)
6250 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6253 gcc_assert (to == STACK_POINTER_REGNUM);
6255 if (from == ARG_POINTER_REGNUM)
6256 return frame.stack_pointer_offset;
6258 gcc_assert (from == FRAME_POINTER_REGNUM);
6259 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6263 /* Fill structure ix86_frame about frame of currently computed function. */
6266 ix86_compute_frame_layout (struct ix86_frame *frame)
6268 HOST_WIDE_INT total_size;
6269 unsigned int stack_alignment_needed;
6270 HOST_WIDE_INT offset;
6271 unsigned int preferred_alignment;
6272 HOST_WIDE_INT size = get_frame_size ();
6274 frame->nregs = ix86_nsaved_regs ();
6277 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6278 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6280 /* During reload iteration the amount of registers saved can change.
6281 Recompute the value as needed. Do not recompute when amount of registers
6282 didn't change as reload does multiple calls to the function and does not
6283 expect the decision to change within single iteration. */
6285 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6287 int count = frame->nregs;
6289 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6290 /* The fast prologue uses move instead of push to save registers. This
6291 is significantly longer, but also executes faster as modern hardware
6292 can execute the moves in parallel, but can't do that for push/pop.
6294 Be careful about choosing what prologue to emit: When function takes
6295 many instructions to execute we may use slow version as well as in
6296 case function is known to be outside hot spot (this is known with
6297 feedback only). Weight the size of function by number of registers
6298 to save as it is cheap to use one or two push instructions but very
6299 slow to use many of them. */
6301 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6302 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6303 || (flag_branch_probabilities
6304 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6305 cfun->machine->use_fast_prologue_epilogue = false;
6307 cfun->machine->use_fast_prologue_epilogue
6308 = !expensive_function_p (count);
6310 if (TARGET_PROLOGUE_USING_MOVE
6311 && cfun->machine->use_fast_prologue_epilogue)
6312 frame->save_regs_using_mov = true;
6314 frame->save_regs_using_mov = false;
6317 /* Skip return address and saved base pointer. */
6318 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6320 frame->hard_frame_pointer_offset = offset;
6322 /* Do some sanity checking of stack_alignment_needed and
6323 preferred_alignment, since i386 port is the only using those features
6324 that may break easily. */
6326 gcc_assert (!size || stack_alignment_needed);
6327 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6328 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6329 gcc_assert (stack_alignment_needed
6330 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6332 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6333 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6335 /* Register save area */
6336 offset += frame->nregs * UNITS_PER_WORD;
6339 if (ix86_save_varrargs_registers)
6341 offset += X86_64_VARARGS_SIZE;
6342 frame->va_arg_size = X86_64_VARARGS_SIZE;
6345 frame->va_arg_size = 0;
6347 /* Align start of frame for local function. */
6348 frame->padding1 = ((offset + stack_alignment_needed - 1)
6349 & -stack_alignment_needed) - offset;
6351 offset += frame->padding1;
6353 /* Frame pointer points here. */
6354 frame->frame_pointer_offset = offset;
6358 /* Add outgoing arguments area. Can be skipped if we eliminated
6359 all the function calls as dead code.
6360 Skipping is however impossible when function calls alloca. Alloca
6361 expander assumes that last crtl->outgoing_args_size
6362 of stack frame are unused. */
6363 if (ACCUMULATE_OUTGOING_ARGS
6364 && (!current_function_is_leaf || cfun->calls_alloca
6365 || ix86_current_function_calls_tls_descriptor))
6367 offset += crtl->outgoing_args_size;
6368 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6371 frame->outgoing_arguments_size = 0;
6373 /* Align stack boundary. Only needed if we're calling another function
6375 if (!current_function_is_leaf || cfun->calls_alloca
6376 || ix86_current_function_calls_tls_descriptor)
6377 frame->padding2 = ((offset + preferred_alignment - 1)
6378 & -preferred_alignment) - offset;
6380 frame->padding2 = 0;
6382 offset += frame->padding2;
6384 /* We've reached end of stack frame. */
6385 frame->stack_pointer_offset = offset;
6387 /* Size prologue needs to allocate. */
6388 frame->to_allocate =
6389 (size + frame->padding1 + frame->padding2
6390 + frame->outgoing_arguments_size + frame->va_arg_size);
6392 if ((!frame->to_allocate && frame->nregs <= 1)
6393 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6394 frame->save_regs_using_mov = false;
6396 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
6397 && current_function_is_leaf
6398 && !ix86_current_function_calls_tls_descriptor)
6400 frame->red_zone_size = frame->to_allocate;
6401 if (frame->save_regs_using_mov)
6402 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6403 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6404 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6407 frame->red_zone_size = 0;
6408 frame->to_allocate -= frame->red_zone_size;
6409 frame->stack_pointer_offset -= frame->red_zone_size;
6411 fprintf (stderr, "\n");
6412 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6413 fprintf (stderr, "size: %ld\n", (long)size);
6414 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6415 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6416 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6417 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6418 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6419 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6420 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6421 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6422 (long)frame->hard_frame_pointer_offset);
6423 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6424 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6425 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6426 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6430 /* Emit code to save registers in the prologue. */
6433 ix86_emit_save_regs (void)
6438 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6439 if (ix86_save_reg (regno, true))
6441 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6442 RTX_FRAME_RELATED_P (insn) = 1;
6446 /* Emit code to save registers using MOV insns. First register
6447 is restored from POINTER + OFFSET. */
6449 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6455 if (ix86_save_reg (regno, true))
6457 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6459 gen_rtx_REG (Pmode, regno));
6460 RTX_FRAME_RELATED_P (insn) = 1;
6461 offset += UNITS_PER_WORD;
6465 /* Expand prologue or epilogue stack adjustment.
6466 The pattern exist to put a dependency on all ebp-based memory accesses.
6467 STYLE should be negative if instructions should be marked as frame related,
6468 zero if %r11 register is live and cannot be freely used and positive
6472 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6477 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6478 else if (x86_64_immediate_operand (offset, DImode))
6479 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6483 /* r11 is used by indirect sibcall return as well, set before the
6484 epilogue and used after the epilogue. ATM indirect sibcall
6485 shouldn't be used together with huge frame sizes in one
6486 function because of the frame_size check in sibcall.c. */
6488 r11 = gen_rtx_REG (DImode, R11_REG);
6489 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6491 RTX_FRAME_RELATED_P (insn) = 1;
6492 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6496 RTX_FRAME_RELATED_P (insn) = 1;
6499 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6502 ix86_internal_arg_pointer (void)
6504 bool has_force_align_arg_pointer =
6505 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6506 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6507 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6508 && DECL_NAME (current_function_decl)
6509 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6510 && DECL_FILE_SCOPE_P (current_function_decl))
6511 || ix86_force_align_arg_pointer
6512 || has_force_align_arg_pointer)
6514 /* Nested functions can't realign the stack due to a register
6516 if (DECL_CONTEXT (current_function_decl)
6517 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6519 if (ix86_force_align_arg_pointer)
6520 warning (0, "-mstackrealign ignored for nested functions");
6521 if (has_force_align_arg_pointer)
6522 error ("%s not supported for nested functions",
6523 ix86_force_align_arg_pointer_string);
6524 return virtual_incoming_args_rtx;
6526 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6527 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6530 return virtual_incoming_args_rtx;
6533 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6534 This is called from dwarf2out.c to emit call frame instructions
6535 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6537 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6539 rtx unspec = SET_SRC (pattern);
6540 gcc_assert (GET_CODE (unspec) == UNSPEC);
6544 case UNSPEC_REG_SAVE:
6545 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6546 SET_DEST (pattern));
6548 case UNSPEC_DEF_CFA:
6549 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6550 INTVAL (XVECEXP (unspec, 0, 0)));
6557 /* Expand the prologue into a bunch of separate insns. */
6560 ix86_expand_prologue (void)
6564 struct ix86_frame frame;
6565 HOST_WIDE_INT allocate;
6567 ix86_compute_frame_layout (&frame);
6569 if (cfun->machine->force_align_arg_pointer)
6573 /* Grab the argument pointer. */
6574 x = plus_constant (stack_pointer_rtx, 4);
6575 y = cfun->machine->force_align_arg_pointer;
6576 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6577 RTX_FRAME_RELATED_P (insn) = 1;
6579 /* The unwind info consists of two parts: install the fafp as the cfa,
6580 and record the fafp as the "save register" of the stack pointer.
6581 The later is there in order that the unwinder can see where it
6582 should restore the stack pointer across the and insn. */
6583 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6584 x = gen_rtx_SET (VOIDmode, y, x);
6585 RTX_FRAME_RELATED_P (x) = 1;
6586 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6588 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6589 RTX_FRAME_RELATED_P (y) = 1;
6590 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6591 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6592 REG_NOTES (insn) = x;
6594 /* Align the stack. */
6595 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6598 /* And here we cheat like madmen with the unwind info. We force the
6599 cfa register back to sp+4, which is exactly what it was at the
6600 start of the function. Re-pushing the return address results in
6601 the return at the same spot relative to the cfa, and thus is
6602 correct wrt the unwind info. */
6603 x = cfun->machine->force_align_arg_pointer;
6604 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6605 insn = emit_insn (gen_push (x));
6606 RTX_FRAME_RELATED_P (insn) = 1;
6609 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6610 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6611 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6612 REG_NOTES (insn) = x;
6615 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6616 slower on all targets. Also sdb doesn't like it. */
6618 if (frame_pointer_needed)
6620 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6621 RTX_FRAME_RELATED_P (insn) = 1;
6623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6624 RTX_FRAME_RELATED_P (insn) = 1;
6627 allocate = frame.to_allocate;
6629 if (!frame.save_regs_using_mov)
6630 ix86_emit_save_regs ();
6632 allocate += frame.nregs * UNITS_PER_WORD;
6634 /* When using red zone we may start register saving before allocating
6635 the stack frame saving one cycle of the prologue. However I will
6636 avoid doing this if I am going to have to probe the stack since
6637 at least on x86_64 the stack probe can turn into a call that clobbers
6638 a red zone location */
6639 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
6640 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6641 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6642 : stack_pointer_rtx,
6643 -frame.nregs * UNITS_PER_WORD);
6647 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6648 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6649 GEN_INT (-allocate), -1);
6652 /* Only valid for Win32. */
6653 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6657 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
6659 if (cfun->machine->call_abi == MS_ABI)
6662 eax_live = ix86_eax_live_at_start_p ();
6666 emit_insn (gen_push (eax));
6667 allocate -= UNITS_PER_WORD;
6670 emit_move_insn (eax, GEN_INT (allocate));
6673 insn = gen_allocate_stack_worker_64 (eax);
6675 insn = gen_allocate_stack_worker_32 (eax);
6676 insn = emit_insn (insn);
6677 RTX_FRAME_RELATED_P (insn) = 1;
6678 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6679 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6680 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6681 t, REG_NOTES (insn));
6685 if (frame_pointer_needed)
6686 t = plus_constant (hard_frame_pointer_rtx,
6689 - frame.nregs * UNITS_PER_WORD);
6691 t = plus_constant (stack_pointer_rtx, allocate);
6692 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6696 if (frame.save_regs_using_mov
6697 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
6698 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6700 if (!frame_pointer_needed || !frame.to_allocate)
6701 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6703 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6704 -frame.nregs * UNITS_PER_WORD);
6707 pic_reg_used = false;
6708 if (pic_offset_table_rtx
6709 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6712 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6714 if (alt_pic_reg_used != INVALID_REGNUM)
6715 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6717 pic_reg_used = true;
6724 if (ix86_cmodel == CM_LARGE_PIC)
6726 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6727 rtx label = gen_label_rtx ();
6729 LABEL_PRESERVE_P (label) = 1;
6730 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6731 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6732 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6733 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6734 pic_offset_table_rtx, tmp_reg));
6737 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6740 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6743 /* Prevent function calls from being scheduled before the call to mcount.
6744 In the pic_reg_used case, make sure that the got load isn't deleted. */
6748 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6749 emit_insn (gen_blockage ());
6752 /* Emit cld instruction if stringops are used in the function. */
6753 if (TARGET_CLD && ix86_current_function_needs_cld)
6754 emit_insn (gen_cld ());
6757 /* Emit code to restore saved registers using MOV insns. First register
6758 is restored from POINTER + OFFSET. */
6760 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6761 int maybe_eh_return)
6764 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6766 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6767 if (ix86_save_reg (regno, maybe_eh_return))
6769 /* Ensure that adjust_address won't be forced to produce pointer
6770 out of range allowed by x86-64 instruction set. */
6771 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6775 r11 = gen_rtx_REG (DImode, R11_REG);
6776 emit_move_insn (r11, GEN_INT (offset));
6777 emit_insn (gen_adddi3 (r11, r11, pointer));
6778 base_address = gen_rtx_MEM (Pmode, r11);
6781 emit_move_insn (gen_rtx_REG (Pmode, regno),
6782 adjust_address (base_address, Pmode, offset));
6783 offset += UNITS_PER_WORD;
6787 /* Restore function stack, frame, and registers. */
6790 ix86_expand_epilogue (int style)
6793 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6794 struct ix86_frame frame;
6795 HOST_WIDE_INT offset;
6797 ix86_compute_frame_layout (&frame);
6799 /* Calculate start of saved registers relative to ebp. Special care
6800 must be taken for the normal return case of a function using
6801 eh_return: the eax and edx registers are marked as saved, but not
6802 restored along this path. */
6803 offset = frame.nregs;
6804 if (crtl->calls_eh_return && style != 2)
6806 offset *= -UNITS_PER_WORD;
6808 /* If we're only restoring one register and sp is not valid then
6809 using a move instruction to restore the register since it's
6810 less work than reloading sp and popping the register.
6812 The default code result in stack adjustment using add/lea instruction,
6813 while this code results in LEAVE instruction (or discrete equivalent),
6814 so it is profitable in some other cases as well. Especially when there
6815 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6816 and there is exactly one register to pop. This heuristic may need some
6817 tuning in future. */
6818 if ((!sp_valid && frame.nregs <= 1)
6819 || (TARGET_EPILOGUE_USING_MOVE
6820 && cfun->machine->use_fast_prologue_epilogue
6821 && (frame.nregs > 1 || frame.to_allocate))
6822 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6823 || (frame_pointer_needed && TARGET_USE_LEAVE
6824 && cfun->machine->use_fast_prologue_epilogue
6825 && frame.nregs == 1)
6826 || crtl->calls_eh_return)
6828 /* Restore registers. We can use ebp or esp to address the memory
6829 locations. If both are available, default to ebp, since offsets
6830 are known to be small. Only exception is esp pointing directly to the
6831 end of block of saved registers, where we may simplify addressing
6834 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6835 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6836 frame.to_allocate, style == 2);
6838 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6839 offset, style == 2);
6841 /* eh_return epilogues need %ecx added to the stack pointer. */
6844 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6846 if (frame_pointer_needed)
6848 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6849 tmp = plus_constant (tmp, UNITS_PER_WORD);
6850 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6852 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6853 emit_move_insn (hard_frame_pointer_rtx, tmp);
6855 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6860 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6861 tmp = plus_constant (tmp, (frame.to_allocate
6862 + frame.nregs * UNITS_PER_WORD));
6863 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6866 else if (!frame_pointer_needed)
6867 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6868 GEN_INT (frame.to_allocate
6869 + frame.nregs * UNITS_PER_WORD),
6871 /* If not an i386, mov & pop is faster than "leave". */
6872 else if (TARGET_USE_LEAVE || optimize_size
6873 || !cfun->machine->use_fast_prologue_epilogue)
6874 emit_insn ((*ix86_gen_leave) ());
6877 pro_epilogue_adjust_stack (stack_pointer_rtx,
6878 hard_frame_pointer_rtx,
6881 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6886 /* First step is to deallocate the stack frame so that we can
6887 pop the registers. */
6890 gcc_assert (frame_pointer_needed);
6891 pro_epilogue_adjust_stack (stack_pointer_rtx,
6892 hard_frame_pointer_rtx,
6893 GEN_INT (offset), style);
6895 else if (frame.to_allocate)
6896 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6897 GEN_INT (frame.to_allocate), style);
6899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6900 if (ix86_save_reg (regno, false))
6901 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
6902 if (frame_pointer_needed)
6904 /* Leave results in shorter dependency chains on CPUs that are
6905 able to grok it fast. */
6906 if (TARGET_USE_LEAVE)
6907 emit_insn ((*ix86_gen_leave) ());
6909 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6913 if (cfun->machine->force_align_arg_pointer)
6915 emit_insn (gen_addsi3 (stack_pointer_rtx,
6916 cfun->machine->force_align_arg_pointer,
6920 /* Sibcall epilogues don't want a return instruction. */
6924 if (crtl->args.pops_args && crtl->args.size)
6926 rtx popc = GEN_INT (crtl->args.pops_args);
6928 /* i386 can only pop 64K bytes. If asked to pop more, pop
6929 return address, do explicit add, and jump indirectly to the
6932 if (crtl->args.pops_args >= 65536)
6934 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6936 /* There is no "pascal" calling convention in any 64bit ABI. */
6937 gcc_assert (!TARGET_64BIT);
6939 emit_insn (gen_popsi1 (ecx));
6940 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6941 emit_jump_insn (gen_return_indirect_internal (ecx));
6944 emit_jump_insn (gen_return_pop_internal (popc));
6947 emit_jump_insn (gen_return_internal ());
6950 /* Reset from the function's potential modifications. */
6953 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6954 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6956 if (pic_offset_table_rtx)
6957 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6959 /* Mach-O doesn't support labels at the end of objects, so if
6960 it looks like we might want one, insert a NOP. */
6962 rtx insn = get_last_insn ();
6965 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6966 insn = PREV_INSN (insn);
6970 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6971 fputs ("\tnop\n", file);
6977 /* Extract the parts of an RTL expression that is a valid memory address
6978 for an instruction. Return 0 if the structure of the address is
6979 grossly off. Return -1 if the address contains ASHIFT, so it is not
6980 strictly valid, but still used for computing length of lea instruction. */
6983 ix86_decompose_address (rtx addr, struct ix86_address *out)
6985 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6986 rtx base_reg, index_reg;
6987 HOST_WIDE_INT scale = 1;
6988 rtx scale_rtx = NULL_RTX;
6990 enum ix86_address_seg seg = SEG_DEFAULT;
6992 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6994 else if (GET_CODE (addr) == PLUS)
7004 addends[n++] = XEXP (op, 1);
7007 while (GET_CODE (op) == PLUS);
7012 for (i = n; i >= 0; --i)
7015 switch (GET_CODE (op))
7020 index = XEXP (op, 0);
7021 scale_rtx = XEXP (op, 1);
7025 if (XINT (op, 1) == UNSPEC_TP
7026 && TARGET_TLS_DIRECT_SEG_REFS
7027 && seg == SEG_DEFAULT)
7028 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7057 else if (GET_CODE (addr) == MULT)
7059 index = XEXP (addr, 0); /* index*scale */
7060 scale_rtx = XEXP (addr, 1);
7062 else if (GET_CODE (addr) == ASHIFT)
7066 /* We're called for lea too, which implements ashift on occasion. */
7067 index = XEXP (addr, 0);
7068 tmp = XEXP (addr, 1);
7069 if (!CONST_INT_P (tmp))
7071 scale = INTVAL (tmp);
7072 if ((unsigned HOST_WIDE_INT) scale > 3)
7078 disp = addr; /* displacement */
7080 /* Extract the integral value of scale. */
7083 if (!CONST_INT_P (scale_rtx))
7085 scale = INTVAL (scale_rtx);
7088 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7089 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7091 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7092 if (base_reg && index_reg && scale == 1
7093 && (index_reg == arg_pointer_rtx
7094 || index_reg == frame_pointer_rtx
7095 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
7098 tmp = base, base = index, index = tmp;
7099 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
7102 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7103 if ((base_reg == hard_frame_pointer_rtx
7104 || base_reg == frame_pointer_rtx
7105 || base_reg == arg_pointer_rtx) && !disp)
7108 /* Special case: on K6, [%esi] makes the instruction vector decoded.
7109 Avoid this by transforming to [%esi+0]. */
7110 if (TARGET_K6 && !optimize_size
7111 && base_reg && !index_reg && !disp
7113 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
7116 /* Special case: encode reg+reg instead of reg*2. */
7117 if (!base && index && scale && scale == 2)
7118 base = index, base_reg = index_reg, scale = 1;
7120 /* Special case: scaling cannot be encoded without base or displacement. */
7121 if (!base && !disp && index && scale != 1)
7133 /* Return cost of the memory address x.
7134 For i386, it is better to use a complex address than let gcc copy
7135 the address into a reg and make a new pseudo. But not if the address
7136 requires to two regs - that would mean more pseudos with longer
7139 ix86_address_cost (rtx x)
7141 struct ix86_address parts;
7143 int ok = ix86_decompose_address (x, &parts);
7147 if (parts.base && GET_CODE (parts.base) == SUBREG)
7148 parts.base = SUBREG_REG (parts.base);
7149 if (parts.index && GET_CODE (parts.index) == SUBREG)
7150 parts.index = SUBREG_REG (parts.index);
7152 /* Attempt to minimize number of registers in the address. */
7154 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7156 && (!REG_P (parts.index)
7157 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7161 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7163 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7164 && parts.base != parts.index)
7167 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7168 since it's predecode logic can't detect the length of instructions
7169 and it degenerates to vector decoded. Increase cost of such
7170 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7171 to split such addresses or even refuse such addresses at all.
7173 Following addressing modes are affected:
7178 The first and last case may be avoidable by explicitly coding the zero in
7179 memory address, but I don't have AMD-K6 machine handy to check this
7183 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7184 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7185 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7191 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7192 this is used for to form addresses to local data when -fPIC is in
7196 darwin_local_data_pic (rtx disp)
7198 if (GET_CODE (disp) == MINUS)
7200 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7201 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7202 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7204 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7205 if (! strcmp (sym_name, "<pic base>"))
7213 /* Determine if a given RTX is a valid constant. We already know this
7214 satisfies CONSTANT_P. */
7217 legitimate_constant_p (rtx x)
7219 switch (GET_CODE (x))
7224 if (GET_CODE (x) == PLUS)
7226 if (!CONST_INT_P (XEXP (x, 1)))
7231 if (TARGET_MACHO && darwin_local_data_pic (x))
7234 /* Only some unspecs are valid as "constants". */
7235 if (GET_CODE (x) == UNSPEC)
7236 switch (XINT (x, 1))
7241 return TARGET_64BIT;
7244 x = XVECEXP (x, 0, 0);
7245 return (GET_CODE (x) == SYMBOL_REF
7246 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7248 x = XVECEXP (x, 0, 0);
7249 return (GET_CODE (x) == SYMBOL_REF
7250 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7255 /* We must have drilled down to a symbol. */
7256 if (GET_CODE (x) == LABEL_REF)
7258 if (GET_CODE (x) != SYMBOL_REF)
7263 /* TLS symbols are never valid. */
7264 if (SYMBOL_REF_TLS_MODEL (x))
7267 /* DLLIMPORT symbols are never valid. */
7268 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7269 && SYMBOL_REF_DLLIMPORT_P (x))
7274 if (GET_MODE (x) == TImode
7275 && x != CONST0_RTX (TImode)
7281 if (x == CONST0_RTX (GET_MODE (x)))
7289 /* Otherwise we handle everything else in the move patterns. */
7293 /* Determine if it's legal to put X into the constant pool. This
7294 is not possible for the address of thread-local symbols, which
7295 is checked above. */
7298 ix86_cannot_force_const_mem (rtx x)
7300 /* We can always put integral constants and vectors in memory. */
7301 switch (GET_CODE (x))
7311 return !legitimate_constant_p (x);
7314 /* Determine if a given RTX is a valid constant address. */
7317 constant_address_p (rtx x)
7319 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7322 /* Nonzero if the constant value X is a legitimate general operand
7323 when generating PIC code. It is given that flag_pic is on and
7324 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7327 legitimate_pic_operand_p (rtx x)
7331 switch (GET_CODE (x))
7334 inner = XEXP (x, 0);
7335 if (GET_CODE (inner) == PLUS
7336 && CONST_INT_P (XEXP (inner, 1)))
7337 inner = XEXP (inner, 0);
7339 /* Only some unspecs are valid as "constants". */
7340 if (GET_CODE (inner) == UNSPEC)
7341 switch (XINT (inner, 1))
7346 return TARGET_64BIT;
7348 x = XVECEXP (inner, 0, 0);
7349 return (GET_CODE (x) == SYMBOL_REF
7350 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7358 return legitimate_pic_address_disp_p (x);
7365 /* Determine if a given CONST RTX is a valid memory displacement
7369 legitimate_pic_address_disp_p (rtx disp)
7373 /* In 64bit mode we can allow direct addresses of symbols and labels
7374 when they are not dynamic symbols. */
7377 rtx op0 = disp, op1;
7379 switch (GET_CODE (disp))
7385 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7387 op0 = XEXP (XEXP (disp, 0), 0);
7388 op1 = XEXP (XEXP (disp, 0), 1);
7389 if (!CONST_INT_P (op1)
7390 || INTVAL (op1) >= 16*1024*1024
7391 || INTVAL (op1) < -16*1024*1024)
7393 if (GET_CODE (op0) == LABEL_REF)
7395 if (GET_CODE (op0) != SYMBOL_REF)
7400 /* TLS references should always be enclosed in UNSPEC. */
7401 if (SYMBOL_REF_TLS_MODEL (op0))
7403 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7404 && ix86_cmodel != CM_LARGE_PIC)
7412 if (GET_CODE (disp) != CONST)
7414 disp = XEXP (disp, 0);
7418 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7419 of GOT tables. We should not need these anyway. */
7420 if (GET_CODE (disp) != UNSPEC
7421 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7422 && XINT (disp, 1) != UNSPEC_GOTOFF
7423 && XINT (disp, 1) != UNSPEC_PLTOFF))
7426 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7427 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7433 if (GET_CODE (disp) == PLUS)
7435 if (!CONST_INT_P (XEXP (disp, 1)))
7437 disp = XEXP (disp, 0);
7441 if (TARGET_MACHO && darwin_local_data_pic (disp))
7444 if (GET_CODE (disp) != UNSPEC)
7447 switch (XINT (disp, 1))
7452 /* We need to check for both symbols and labels because VxWorks loads
7453 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7455 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7456 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7458 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7459 While ABI specify also 32bit relocation but we don't produce it in
7460 small PIC model at all. */
7461 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7462 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7464 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7466 case UNSPEC_GOTTPOFF:
7467 case UNSPEC_GOTNTPOFF:
7468 case UNSPEC_INDNTPOFF:
7471 disp = XVECEXP (disp, 0, 0);
7472 return (GET_CODE (disp) == SYMBOL_REF
7473 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7475 disp = XVECEXP (disp, 0, 0);
7476 return (GET_CODE (disp) == SYMBOL_REF
7477 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7479 disp = XVECEXP (disp, 0, 0);
7480 return (GET_CODE (disp) == SYMBOL_REF
7481 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7487 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7488 memory address for an instruction. The MODE argument is the machine mode
7489 for the MEM expression that wants to use this address.
7491 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7492 convert common non-canonical forms to canonical form so that they will
7496 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7497 rtx addr, int strict)
7499 struct ix86_address parts;
7500 rtx base, index, disp;
7501 HOST_WIDE_INT scale;
7502 const char *reason = NULL;
7503 rtx reason_rtx = NULL_RTX;
7505 if (ix86_decompose_address (addr, &parts) <= 0)
7507 reason = "decomposition failed";
7512 index = parts.index;
7514 scale = parts.scale;
7516 /* Validate base register.
7518 Don't allow SUBREG's that span more than a word here. It can lead to spill
7519 failures when the base is one word out of a two word structure, which is
7520 represented internally as a DImode int. */
7529 else if (GET_CODE (base) == SUBREG
7530 && REG_P (SUBREG_REG (base))
7531 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7533 reg = SUBREG_REG (base);
7536 reason = "base is not a register";
7540 if (GET_MODE (base) != Pmode)
7542 reason = "base is not in Pmode";
7546 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7547 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7549 reason = "base is not valid";
7554 /* Validate index register.
7556 Don't allow SUBREG's that span more than a word here -- same as above. */
7565 else if (GET_CODE (index) == SUBREG
7566 && REG_P (SUBREG_REG (index))
7567 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7569 reg = SUBREG_REG (index);
7572 reason = "index is not a register";
7576 if (GET_MODE (index) != Pmode)
7578 reason = "index is not in Pmode";
7582 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7583 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7585 reason = "index is not valid";
7590 /* Validate scale factor. */
7593 reason_rtx = GEN_INT (scale);
7596 reason = "scale without index";
7600 if (scale != 2 && scale != 4 && scale != 8)
7602 reason = "scale is not a valid multiplier";
7607 /* Validate displacement. */
7612 if (GET_CODE (disp) == CONST
7613 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7614 switch (XINT (XEXP (disp, 0), 1))
7616 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7617 used. While ABI specify also 32bit relocations, we don't produce
7618 them at all and use IP relative instead. */
7621 gcc_assert (flag_pic);
7623 goto is_legitimate_pic;
7624 reason = "64bit address unspec";
7627 case UNSPEC_GOTPCREL:
7628 gcc_assert (flag_pic);
7629 goto is_legitimate_pic;
7631 case UNSPEC_GOTTPOFF:
7632 case UNSPEC_GOTNTPOFF:
7633 case UNSPEC_INDNTPOFF:
7639 reason = "invalid address unspec";
7643 else if (SYMBOLIC_CONST (disp)
7647 && MACHOPIC_INDIRECT
7648 && !machopic_operand_p (disp)
7654 if (TARGET_64BIT && (index || base))
7656 /* foo@dtpoff(%rX) is ok. */
7657 if (GET_CODE (disp) != CONST
7658 || GET_CODE (XEXP (disp, 0)) != PLUS
7659 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7660 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7661 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7662 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7664 reason = "non-constant pic memory reference";
7668 else if (! legitimate_pic_address_disp_p (disp))
7670 reason = "displacement is an invalid pic construct";
7674 /* This code used to verify that a symbolic pic displacement
7675 includes the pic_offset_table_rtx register.
7677 While this is good idea, unfortunately these constructs may
7678 be created by "adds using lea" optimization for incorrect
7687 This code is nonsensical, but results in addressing
7688 GOT table with pic_offset_table_rtx base. We can't
7689 just refuse it easily, since it gets matched by
7690 "addsi3" pattern, that later gets split to lea in the
7691 case output register differs from input. While this
7692 can be handled by separate addsi pattern for this case
7693 that never results in lea, this seems to be easier and
7694 correct fix for crash to disable this test. */
7696 else if (GET_CODE (disp) != LABEL_REF
7697 && !CONST_INT_P (disp)
7698 && (GET_CODE (disp) != CONST
7699 || !legitimate_constant_p (disp))
7700 && (GET_CODE (disp) != SYMBOL_REF
7701 || !legitimate_constant_p (disp)))
7703 reason = "displacement is not constant";
7706 else if (TARGET_64BIT
7707 && !x86_64_immediate_operand (disp, VOIDmode))
7709 reason = "displacement is out of range";
7714 /* Everything looks valid. */
7721 /* Return a unique alias set for the GOT. */
7723 static alias_set_type
7724 ix86_GOT_alias_set (void)
7726 static alias_set_type set = -1;
7728 set = new_alias_set ();
7732 /* Return a legitimate reference for ORIG (an address) using the
7733 register REG. If REG is 0, a new pseudo is generated.
7735 There are two types of references that must be handled:
7737 1. Global data references must load the address from the GOT, via
7738 the PIC reg. An insn is emitted to do this load, and the reg is
7741 2. Static data references, constant pool addresses, and code labels
7742 compute the address as an offset from the GOT, whose base is in
7743 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7744 differentiate them from global data objects. The returned
7745 address is the PIC reg + an unspec constant.
7747 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7748 reg also appears in the address. */
7751 legitimize_pic_address (rtx orig, rtx reg)
7758 if (TARGET_MACHO && !TARGET_64BIT)
7761 reg = gen_reg_rtx (Pmode);
7762 /* Use the generic Mach-O PIC machinery. */
7763 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7767 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7769 else if (TARGET_64BIT
7770 && ix86_cmodel != CM_SMALL_PIC
7771 && gotoff_operand (addr, Pmode))
7774 /* This symbol may be referenced via a displacement from the PIC
7775 base address (@GOTOFF). */
7777 if (reload_in_progress)
7778 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7779 if (GET_CODE (addr) == CONST)
7780 addr = XEXP (addr, 0);
7781 if (GET_CODE (addr) == PLUS)
7783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7785 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7788 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7789 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7791 tmpreg = gen_reg_rtx (Pmode);
7794 emit_move_insn (tmpreg, new_rtx);
7798 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7799 tmpreg, 1, OPTAB_DIRECT);
7802 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7804 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7806 /* This symbol may be referenced via a displacement from the PIC
7807 base address (@GOTOFF). */
7809 if (reload_in_progress)
7810 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7811 if (GET_CODE (addr) == CONST)
7812 addr = XEXP (addr, 0);
7813 if (GET_CODE (addr) == PLUS)
7815 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7817 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7820 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7821 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7822 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7826 emit_move_insn (reg, new_rtx);
7830 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7831 /* We can't use @GOTOFF for text labels on VxWorks;
7832 see gotoff_operand. */
7833 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7835 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7837 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7838 return legitimize_dllimport_symbol (addr, true);
7839 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7840 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7841 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7843 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7844 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7848 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7851 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7852 new_rtx = gen_const_mem (Pmode, new_rtx);
7853 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7856 reg = gen_reg_rtx (Pmode);
7857 /* Use directly gen_movsi, otherwise the address is loaded
7858 into register for CSE. We don't want to CSE this addresses,
7859 instead we CSE addresses from the GOT table, so skip this. */
7860 emit_insn (gen_movsi (reg, new_rtx));
7865 /* This symbol must be referenced via a load from the
7866 Global Offset Table (@GOT). */
7868 if (reload_in_progress)
7869 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7870 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7871 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7873 new_rtx = force_reg (Pmode, new_rtx);
7874 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7875 new_rtx = gen_const_mem (Pmode, new_rtx);
7876 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7879 reg = gen_reg_rtx (Pmode);
7880 emit_move_insn (reg, new_rtx);
7886 if (CONST_INT_P (addr)
7887 && !x86_64_immediate_operand (addr, VOIDmode))
7891 emit_move_insn (reg, addr);
7895 new_rtx = force_reg (Pmode, addr);
7897 else if (GET_CODE (addr) == CONST)
7899 addr = XEXP (addr, 0);
7901 /* We must match stuff we generate before. Assume the only
7902 unspecs that can get here are ours. Not that we could do
7903 anything with them anyway.... */
7904 if (GET_CODE (addr) == UNSPEC
7905 || (GET_CODE (addr) == PLUS
7906 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7908 gcc_assert (GET_CODE (addr) == PLUS);
7910 if (GET_CODE (addr) == PLUS)
7912 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7914 /* Check first to see if this is a constant offset from a @GOTOFF
7915 symbol reference. */
7916 if (gotoff_operand (op0, Pmode)
7917 && CONST_INT_P (op1))
7921 if (reload_in_progress)
7922 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7925 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7926 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7927 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7931 emit_move_insn (reg, new_rtx);
7937 if (INTVAL (op1) < -16*1024*1024
7938 || INTVAL (op1) >= 16*1024*1024)
7940 if (!x86_64_immediate_operand (op1, Pmode))
7941 op1 = force_reg (Pmode, op1);
7942 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7948 base = legitimize_pic_address (XEXP (addr, 0), reg);
7949 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7950 base == reg ? NULL_RTX : reg);
7952 if (CONST_INT_P (new_rtx))
7953 new_rtx = plus_constant (base, INTVAL (new_rtx));
7956 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7958 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7959 new_rtx = XEXP (new_rtx, 1);
7961 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7969 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7972 get_thread_pointer (int to_reg)
7976 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7980 reg = gen_reg_rtx (Pmode);
7981 insn = gen_rtx_SET (VOIDmode, reg, tp);
7982 insn = emit_insn (insn);
7987 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7988 false if we expect this to be used for a memory address and true if
7989 we expect to load the address into a register. */
7992 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7994 rtx dest, base, off, pic, tp;
7999 case TLS_MODEL_GLOBAL_DYNAMIC:
8000 dest = gen_reg_rtx (Pmode);
8001 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8003 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8005 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8008 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8009 insns = get_insns ();
8012 RTL_CONST_CALL_P (insns) = 1;
8013 emit_libcall_block (insns, dest, rax, x);
8015 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8016 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8018 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8020 if (TARGET_GNU2_TLS)
8022 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8024 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8028 case TLS_MODEL_LOCAL_DYNAMIC:
8029 base = gen_reg_rtx (Pmode);
8030 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8032 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8034 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8037 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8038 insns = get_insns ();
8041 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8042 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8043 RTL_CONST_CALL_P (insns) = 1;
8044 emit_libcall_block (insns, base, rax, note);
8046 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8047 emit_insn (gen_tls_local_dynamic_base_64 (base));
8049 emit_insn (gen_tls_local_dynamic_base_32 (base));
8051 if (TARGET_GNU2_TLS)
8053 rtx x = ix86_tls_module_base ();
8055 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8056 gen_rtx_MINUS (Pmode, x, tp));
8059 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8060 off = gen_rtx_CONST (Pmode, off);
8062 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
8064 if (TARGET_GNU2_TLS)
8066 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8068 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8073 case TLS_MODEL_INITIAL_EXEC:
8077 type = UNSPEC_GOTNTPOFF;
8081 if (reload_in_progress)
8082 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8083 pic = pic_offset_table_rtx;
8084 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
8086 else if (!TARGET_ANY_GNU_TLS)
8088 pic = gen_reg_rtx (Pmode);
8089 emit_insn (gen_set_got (pic));
8090 type = UNSPEC_GOTTPOFF;
8095 type = UNSPEC_INDNTPOFF;
8098 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8099 off = gen_rtx_CONST (Pmode, off);
8101 off = gen_rtx_PLUS (Pmode, pic, off);
8102 off = gen_const_mem (Pmode, off);
8103 set_mem_alias_set (off, ix86_GOT_alias_set ());
8105 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8107 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8108 off = force_reg (Pmode, off);
8109 return gen_rtx_PLUS (Pmode, base, off);
8113 base = get_thread_pointer (true);
8114 dest = gen_reg_rtx (Pmode);
8115 emit_insn (gen_subsi3 (dest, base, off));
8119 case TLS_MODEL_LOCAL_EXEC:
8120 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
8121 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8122 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8123 off = gen_rtx_CONST (Pmode, off);
8125 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8127 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8128 return gen_rtx_PLUS (Pmode, base, off);
8132 base = get_thread_pointer (true);
8133 dest = gen_reg_rtx (Pmode);
8134 emit_insn (gen_subsi3 (dest, base, off));
8145 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8148 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8149 htab_t dllimport_map;
8152 get_dllimport_decl (tree decl)
8154 struct tree_map *h, in;
8158 size_t namelen, prefixlen;
8164 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8166 in.hash = htab_hash_pointer (decl);
8167 in.base.from = decl;
8168 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8169 h = (struct tree_map *) *loc;
8173 *loc = h = GGC_NEW (struct tree_map);
8175 h->base.from = decl;
8176 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8177 DECL_ARTIFICIAL (to) = 1;
8178 DECL_IGNORED_P (to) = 1;
8179 DECL_EXTERNAL (to) = 1;
8180 TREE_READONLY (to) = 1;
8182 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8183 name = targetm.strip_name_encoding (name);
8184 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8185 namelen = strlen (name);
8186 prefixlen = strlen (prefix);
8187 imp_name = (char *) alloca (namelen + prefixlen + 1);
8188 memcpy (imp_name, prefix, prefixlen);
8189 memcpy (imp_name + prefixlen, name, namelen + 1);
8191 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8192 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8193 SET_SYMBOL_REF_DECL (rtl, to);
8194 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8196 rtl = gen_const_mem (Pmode, rtl);
8197 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8199 SET_DECL_RTL (to, rtl);
8200 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8205 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8206 true if we require the result be a register. */
8209 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8214 gcc_assert (SYMBOL_REF_DECL (symbol));
8215 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8217 x = DECL_RTL (imp_decl);
8219 x = force_reg (Pmode, x);
8223 /* Try machine-dependent ways of modifying an illegitimate address
8224 to be legitimate. If we find one, return the new, valid address.
8225 This macro is used in only one place: `memory_address' in explow.c.
8227 OLDX is the address as it was before break_out_memory_refs was called.
8228 In some cases it is useful to look at this to decide what needs to be done.
8230 MODE and WIN are passed so that this macro can use
8231 GO_IF_LEGITIMATE_ADDRESS.
8233 It is always safe for this macro to do nothing. It exists to recognize
8234 opportunities to optimize the output.
8236 For the 80386, we handle X+REG by loading X into a register R and
8237 using R+REG. R will go in a general reg and indexing will be used.
8238 However, if REG is a broken-out memory address or multiplication,
8239 nothing needs to be done because REG can certainly go in a general reg.
8241 When -fpic is used, special handling is needed for symbolic references.
8242 See comments by legitimize_pic_address in i386.c for details. */
8245 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8250 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8252 return legitimize_tls_address (x, (enum tls_model) log, false);
8253 if (GET_CODE (x) == CONST
8254 && GET_CODE (XEXP (x, 0)) == PLUS
8255 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8256 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8258 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8259 (enum tls_model) log, false);
8260 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8263 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8265 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8266 return legitimize_dllimport_symbol (x, true);
8267 if (GET_CODE (x) == CONST
8268 && GET_CODE (XEXP (x, 0)) == PLUS
8269 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8270 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8272 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8273 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8277 if (flag_pic && SYMBOLIC_CONST (x))
8278 return legitimize_pic_address (x, 0);
8280 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8281 if (GET_CODE (x) == ASHIFT
8282 && CONST_INT_P (XEXP (x, 1))
8283 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8286 log = INTVAL (XEXP (x, 1));
8287 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8288 GEN_INT (1 << log));
8291 if (GET_CODE (x) == PLUS)
8293 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8295 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8296 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8297 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8300 log = INTVAL (XEXP (XEXP (x, 0), 1));
8301 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8302 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8303 GEN_INT (1 << log));
8306 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8307 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8308 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8311 log = INTVAL (XEXP (XEXP (x, 1), 1));
8312 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8313 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8314 GEN_INT (1 << log));
8317 /* Put multiply first if it isn't already. */
8318 if (GET_CODE (XEXP (x, 1)) == MULT)
8320 rtx tmp = XEXP (x, 0);
8321 XEXP (x, 0) = XEXP (x, 1);
8326 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8327 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8328 created by virtual register instantiation, register elimination, and
8329 similar optimizations. */
8330 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8333 x = gen_rtx_PLUS (Pmode,
8334 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8335 XEXP (XEXP (x, 1), 0)),
8336 XEXP (XEXP (x, 1), 1));
8340 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8341 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8342 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8344 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8345 && CONSTANT_P (XEXP (x, 1)))
8348 rtx other = NULL_RTX;
8350 if (CONST_INT_P (XEXP (x, 1)))
8352 constant = XEXP (x, 1);
8353 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8355 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8357 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8358 other = XEXP (x, 1);
8366 x = gen_rtx_PLUS (Pmode,
8367 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8368 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8369 plus_constant (other, INTVAL (constant)));
8373 if (changed && legitimate_address_p (mode, x, FALSE))
8376 if (GET_CODE (XEXP (x, 0)) == MULT)
8379 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8382 if (GET_CODE (XEXP (x, 1)) == MULT)
8385 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8389 && REG_P (XEXP (x, 1))
8390 && REG_P (XEXP (x, 0)))
8393 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8396 x = legitimize_pic_address (x, 0);
8399 if (changed && legitimate_address_p (mode, x, FALSE))
8402 if (REG_P (XEXP (x, 0)))
8404 rtx temp = gen_reg_rtx (Pmode);
8405 rtx val = force_operand (XEXP (x, 1), temp);
8407 emit_move_insn (temp, val);
8413 else if (REG_P (XEXP (x, 1)))
8415 rtx temp = gen_reg_rtx (Pmode);
8416 rtx val = force_operand (XEXP (x, 0), temp);
8418 emit_move_insn (temp, val);
8428 /* Print an integer constant expression in assembler syntax. Addition
8429 and subtraction are the only arithmetic that may appear in these
8430 expressions. FILE is the stdio stream to write to, X is the rtx, and
8431 CODE is the operand print code from the output string. */
8434 output_pic_addr_const (FILE *file, rtx x, int code)
8438 switch (GET_CODE (x))
8441 gcc_assert (flag_pic);
8446 if (! TARGET_MACHO || TARGET_64BIT)
8447 output_addr_const (file, x);
8450 const char *name = XSTR (x, 0);
8452 /* Mark the decl as referenced so that cgraph will
8453 output the function. */
8454 if (SYMBOL_REF_DECL (x))
8455 mark_decl_referenced (SYMBOL_REF_DECL (x));
8458 if (MACHOPIC_INDIRECT
8459 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8460 name = machopic_indirection_name (x, /*stub_p=*/true);
8462 assemble_name (file, name);
8464 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
8465 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8466 fputs ("@PLT", file);
8473 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8474 assemble_name (asm_out_file, buf);
8478 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8482 /* This used to output parentheses around the expression,
8483 but that does not work on the 386 (either ATT or BSD assembler). */
8484 output_pic_addr_const (file, XEXP (x, 0), code);
8488 if (GET_MODE (x) == VOIDmode)
8490 /* We can use %d if the number is <32 bits and positive. */
8491 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8492 fprintf (file, "0x%lx%08lx",
8493 (unsigned long) CONST_DOUBLE_HIGH (x),
8494 (unsigned long) CONST_DOUBLE_LOW (x));
8496 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8499 /* We can't handle floating point constants;
8500 PRINT_OPERAND must handle them. */
8501 output_operand_lossage ("floating constant misused");
8505 /* Some assemblers need integer constants to appear first. */
8506 if (CONST_INT_P (XEXP (x, 0)))
8508 output_pic_addr_const (file, XEXP (x, 0), code);
8510 output_pic_addr_const (file, XEXP (x, 1), code);
8514 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8515 output_pic_addr_const (file, XEXP (x, 1), code);
8517 output_pic_addr_const (file, XEXP (x, 0), code);
8523 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8524 output_pic_addr_const (file, XEXP (x, 0), code);
8526 output_pic_addr_const (file, XEXP (x, 1), code);
8528 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8532 gcc_assert (XVECLEN (x, 0) == 1);
8533 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8534 switch (XINT (x, 1))
8537 fputs ("@GOT", file);
8540 fputs ("@GOTOFF", file);
8543 fputs ("@PLTOFF", file);
8545 case UNSPEC_GOTPCREL:
8546 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8547 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8549 case UNSPEC_GOTTPOFF:
8550 /* FIXME: This might be @TPOFF in Sun ld too. */
8551 fputs ("@GOTTPOFF", file);
8554 fputs ("@TPOFF", file);
8558 fputs ("@TPOFF", file);
8560 fputs ("@NTPOFF", file);
8563 fputs ("@DTPOFF", file);
8565 case UNSPEC_GOTNTPOFF:
8567 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8568 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8570 fputs ("@GOTNTPOFF", file);
8572 case UNSPEC_INDNTPOFF:
8573 fputs ("@INDNTPOFF", file);
8576 output_operand_lossage ("invalid UNSPEC as operand");
8582 output_operand_lossage ("invalid expression as operand");
8586 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8587 We need to emit DTP-relative relocations. */
8589 static void ATTRIBUTE_UNUSED
8590 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8592 fputs (ASM_LONG, file);
8593 output_addr_const (file, x);
8594 fputs ("@DTPOFF", file);
8600 fputs (", 0", file);
8607 /* In the name of slightly smaller debug output, and to cater to
8608 general assembler lossage, recognize PIC+GOTOFF and turn it back
8609 into a direct symbol reference.
8611 On Darwin, this is necessary to avoid a crash, because Darwin
8612 has a different PIC label for each routine but the DWARF debugging
8613 information is not associated with any particular routine, so it's
8614 necessary to remove references to the PIC label from RTL stored by
8615 the DWARF output code. */
8618 ix86_delegitimize_address (rtx orig_x)
8621 /* reg_addend is NULL or a multiple of some register. */
8622 rtx reg_addend = NULL_RTX;
8623 /* const_addend is NULL or a const_int. */
8624 rtx const_addend = NULL_RTX;
8625 /* This is the result, or NULL. */
8626 rtx result = NULL_RTX;
8633 if (GET_CODE (x) != CONST
8634 || GET_CODE (XEXP (x, 0)) != UNSPEC
8635 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8638 return XVECEXP (XEXP (x, 0), 0, 0);
8641 if (GET_CODE (x) != PLUS
8642 || GET_CODE (XEXP (x, 1)) != CONST)
8645 if (REG_P (XEXP (x, 0))
8646 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8647 /* %ebx + GOT/GOTOFF */
8649 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8651 /* %ebx + %reg * scale + GOT/GOTOFF */
8652 reg_addend = XEXP (x, 0);
8653 if (REG_P (XEXP (reg_addend, 0))
8654 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8655 reg_addend = XEXP (reg_addend, 1);
8656 else if (REG_P (XEXP (reg_addend, 1))
8657 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8658 reg_addend = XEXP (reg_addend, 0);
8661 if (!REG_P (reg_addend)
8662 && GET_CODE (reg_addend) != MULT
8663 && GET_CODE (reg_addend) != ASHIFT)
8669 x = XEXP (XEXP (x, 1), 0);
8670 if (GET_CODE (x) == PLUS
8671 && CONST_INT_P (XEXP (x, 1)))
8673 const_addend = XEXP (x, 1);
8677 if (GET_CODE (x) == UNSPEC
8678 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8679 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8680 result = XVECEXP (x, 0, 0);
8682 if (TARGET_MACHO && darwin_local_data_pic (x)
8684 result = XEXP (x, 0);
8690 result = gen_rtx_PLUS (Pmode, result, const_addend);
8692 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8696 /* If X is a machine specific address (i.e. a symbol or label being
8697 referenced as a displacement from the GOT implemented using an
8698 UNSPEC), then return the base term. Otherwise return X. */
8701 ix86_find_base_term (rtx x)
8707 if (GET_CODE (x) != CONST)
8710 if (GET_CODE (term) == PLUS
8711 && (CONST_INT_P (XEXP (term, 1))
8712 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8713 term = XEXP (term, 0);
8714 if (GET_CODE (term) != UNSPEC
8715 || XINT (term, 1) != UNSPEC_GOTPCREL)
8718 term = XVECEXP (term, 0, 0);
8720 if (GET_CODE (term) != SYMBOL_REF
8721 && GET_CODE (term) != LABEL_REF)
8727 term = ix86_delegitimize_address (x);
8729 if (GET_CODE (term) != SYMBOL_REF
8730 && GET_CODE (term) != LABEL_REF)
8737 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8742 if (mode == CCFPmode || mode == CCFPUmode)
8744 enum rtx_code second_code, bypass_code;
8745 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8746 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8747 code = ix86_fp_compare_code_to_integer (code);
8751 code = reverse_condition (code);
8802 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8806 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8807 Those same assemblers have the same but opposite lossage on cmov. */
8809 suffix = fp ? "nbe" : "a";
8810 else if (mode == CCCmode)
8833 gcc_assert (mode == CCmode || mode == CCCmode);
8855 gcc_assert (mode == CCmode || mode == CCCmode);
8856 suffix = fp ? "nb" : "ae";
8859 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8866 else if (mode == CCCmode)
8867 suffix = fp ? "nb" : "ae";
8872 suffix = fp ? "u" : "p";
8875 suffix = fp ? "nu" : "np";
8880 fputs (suffix, file);
8883 /* Print the name of register X to FILE based on its machine mode and number.
8884 If CODE is 'w', pretend the mode is HImode.
8885 If CODE is 'b', pretend the mode is QImode.
8886 If CODE is 'k', pretend the mode is SImode.
8887 If CODE is 'q', pretend the mode is DImode.
8888 If CODE is 'h', pretend the reg is the 'high' byte register.
8889 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8892 print_reg (rtx x, int code, FILE *file)
8894 gcc_assert (x == pc_rtx
8895 || (REGNO (x) != ARG_POINTER_REGNUM
8896 && REGNO (x) != FRAME_POINTER_REGNUM
8897 && REGNO (x) != FLAGS_REG
8898 && REGNO (x) != FPSR_REG
8899 && REGNO (x) != FPCR_REG));
8901 if (ASSEMBLER_DIALECT == ASM_ATT)
8906 gcc_assert (TARGET_64BIT);
8907 fputs ("rip", file);
8911 if (code == 'w' || MMX_REG_P (x))
8913 else if (code == 'b')
8915 else if (code == 'k')
8917 else if (code == 'q')
8919 else if (code == 'y')
8921 else if (code == 'h')
8924 code = GET_MODE_SIZE (GET_MODE (x));
8926 /* Irritatingly, AMD extended registers use different naming convention
8927 from the normal registers. */
8928 if (REX_INT_REG_P (x))
8930 gcc_assert (TARGET_64BIT);
8934 error ("extended registers have no high halves");
8937 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8940 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8943 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8946 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8949 error ("unsupported operand size for extended register");
8957 if (STACK_TOP_P (x))
8959 fputs ("st(0)", file);
8966 if (! ANY_FP_REG_P (x))
8967 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8972 fputs (hi_reg_name[REGNO (x)], file);
8975 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8977 fputs (qi_reg_name[REGNO (x)], file);
8980 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8982 fputs (qi_high_reg_name[REGNO (x)], file);
8989 /* Locate some local-dynamic symbol still in use by this function
8990 so that we can print its name in some tls_local_dynamic_base
8994 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8998 if (GET_CODE (x) == SYMBOL_REF
8999 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9001 cfun->machine->some_ld_name = XSTR (x, 0);
9009 get_some_local_dynamic_name (void)
9013 if (cfun->machine->some_ld_name)
9014 return cfun->machine->some_ld_name;
9016 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9018 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9019 return cfun->machine->some_ld_name;
9025 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9026 C -- print opcode suffix for set/cmov insn.
9027 c -- like C, but print reversed condition
9028 E,e -- likewise, but for compare-and-branch fused insn.
9029 F,f -- likewise, but for floating-point.
9030 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9032 R -- print the prefix for register names.
9033 z -- print the opcode suffix for the size of the current operand.
9034 * -- print a star (in certain assembler syntax)
9035 A -- print an absolute memory reference.
9036 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9037 s -- print a shift double count, followed by the assemblers argument
9039 b -- print the QImode name of the register for the indicated operand.
9040 %b0 would print %al if operands[0] is reg 0.
9041 w -- likewise, print the HImode name of the register.
9042 k -- likewise, print the SImode name of the register.
9043 q -- likewise, print the DImode name of the register.
9044 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9045 y -- print "st(0)" instead of "st" as a register.
9046 D -- print condition for SSE cmp instruction.
9047 P -- if PIC, print an @PLT suffix.
9048 X -- don't print any sort of PIC '@' suffix for a symbol.
9049 & -- print some in-use local-dynamic symbol name.
9050 H -- print a memory address offset by 8; used for sse high-parts
9051 Y -- print condition for SSE5 com* instruction.
9052 + -- print a branch hint as 'cs' or 'ds' prefix
9053 ; -- print a semicolon (after prefixes due to bug in older gas).
9057 print_operand (FILE *file, rtx x, int code)
9064 if (ASSEMBLER_DIALECT == ASM_ATT)
9069 assemble_name (file, get_some_local_dynamic_name ());
9073 switch (ASSEMBLER_DIALECT)
9080 /* Intel syntax. For absolute addresses, registers should not
9081 be surrounded by braces. */
9085 PRINT_OPERAND (file, x, 0);
9095 PRINT_OPERAND (file, x, 0);
9100 if (ASSEMBLER_DIALECT == ASM_ATT)
9105 if (ASSEMBLER_DIALECT == ASM_ATT)
9110 if (ASSEMBLER_DIALECT == ASM_ATT)
9115 if (ASSEMBLER_DIALECT == ASM_ATT)
9120 if (ASSEMBLER_DIALECT == ASM_ATT)
9125 if (ASSEMBLER_DIALECT == ASM_ATT)
9130 /* 387 opcodes don't get size suffixes if the operands are
9132 if (STACK_REG_P (x))
9135 /* Likewise if using Intel opcodes. */
9136 if (ASSEMBLER_DIALECT == ASM_INTEL)
9139 /* This is the size of op from size of operand. */
9140 switch (GET_MODE_SIZE (GET_MODE (x)))
9149 #ifdef HAVE_GAS_FILDS_FISTS
9159 if (GET_MODE (x) == SFmode)
9174 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9178 #ifdef GAS_MNEMONICS
9207 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9209 PRINT_OPERAND (file, x, 0);
9215 /* Little bit of braindamage here. The SSE compare instructions
9216 does use completely different names for the comparisons that the
9217 fp conditional moves. */
9218 switch (GET_CODE (x))
9233 fputs ("unord", file);
9237 fputs ("neq", file);
9241 fputs ("nlt", file);
9245 fputs ("nle", file);
9248 fputs ("ord", file);
9255 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9256 if (ASSEMBLER_DIALECT == ASM_ATT)
9258 switch (GET_MODE (x))
9260 case HImode: putc ('w', file); break;
9262 case SFmode: putc ('l', file); break;
9264 case DFmode: putc ('q', file); break;
9265 default: gcc_unreachable ();
9272 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9275 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9276 if (ASSEMBLER_DIALECT == ASM_ATT)
9279 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9282 /* Like above, but reverse condition */
9284 /* Check to see if argument to %c is really a constant
9285 and not a condition code which needs to be reversed. */
9286 if (!COMPARISON_P (x))
9288 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9291 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9294 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9295 if (ASSEMBLER_DIALECT == ASM_ATT)
9298 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9302 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
9306 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
9310 /* It doesn't actually matter what mode we use here, as we're
9311 only going to use this for printing. */
9312 x = adjust_address_nv (x, DImode, 8);
9319 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9322 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9325 int pred_val = INTVAL (XEXP (x, 0));
9327 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9328 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9330 int taken = pred_val > REG_BR_PROB_BASE / 2;
9331 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9333 /* Emit hints only in the case default branch prediction
9334 heuristics would fail. */
9335 if (taken != cputaken)
9337 /* We use 3e (DS) prefix for taken branches and
9338 2e (CS) prefix for not taken branches. */
9340 fputs ("ds ; ", file);
9342 fputs ("cs ; ", file);
9350 switch (GET_CODE (x))
9353 fputs ("neq", file);
9360 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9364 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9375 fputs ("unord", file);
9378 fputs ("ord", file);
9381 fputs ("ueq", file);
9384 fputs ("nlt", file);
9387 fputs ("nle", file);
9390 fputs ("ule", file);
9393 fputs ("ult", file);
9396 fputs ("une", file);
9405 fputs (" ; ", file);
9412 output_operand_lossage ("invalid operand code '%c'", code);
9417 print_reg (x, code, file);
9421 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9422 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9423 && GET_MODE (x) != BLKmode)
9426 switch (GET_MODE_SIZE (GET_MODE (x)))
9428 case 1: size = "BYTE"; break;
9429 case 2: size = "WORD"; break;
9430 case 4: size = "DWORD"; break;
9431 case 8: size = "QWORD"; break;
9432 case 12: size = "XWORD"; break;
9434 if (GET_MODE (x) == XFmode)
9443 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9446 else if (code == 'w')
9448 else if (code == 'k')
9452 fputs (" PTR ", file);
9456 /* Avoid (%rip) for call operands. */
9457 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9458 && !CONST_INT_P (x))
9459 output_addr_const (file, x);
9460 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9461 output_operand_lossage ("invalid constraints for operand");
9466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9471 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9472 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9474 if (ASSEMBLER_DIALECT == ASM_ATT)
9476 fprintf (file, "0x%08lx", (long unsigned int) l);
9479 /* These float cases don't actually occur as immediate operands. */
9480 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9484 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9485 fprintf (file, "%s", dstr);
9488 else if (GET_CODE (x) == CONST_DOUBLE
9489 && GET_MODE (x) == XFmode)
9493 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9494 fprintf (file, "%s", dstr);
9499 /* We have patterns that allow zero sets of memory, for instance.
9500 In 64-bit mode, we should probably support all 8-byte vectors,
9501 since we can in fact encode that into an immediate. */
9502 if (GET_CODE (x) == CONST_VECTOR)
9504 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9510 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9512 if (ASSEMBLER_DIALECT == ASM_ATT)
9515 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9516 || GET_CODE (x) == LABEL_REF)
9518 if (ASSEMBLER_DIALECT == ASM_ATT)
9521 fputs ("OFFSET FLAT:", file);
9524 if (CONST_INT_P (x))
9525 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9527 output_pic_addr_const (file, x, code);
9529 output_addr_const (file, x);
9533 /* Print a memory operand whose address is ADDR. */
9536 print_operand_address (FILE *file, rtx addr)
9538 struct ix86_address parts;
9539 rtx base, index, disp;
9541 int ok = ix86_decompose_address (addr, &parts);
9546 index = parts.index;
9548 scale = parts.scale;
9556 if (ASSEMBLER_DIALECT == ASM_ATT)
9558 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9564 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9565 if (TARGET_64BIT && !base && !index)
9569 if (GET_CODE (disp) == CONST
9570 && GET_CODE (XEXP (disp, 0)) == PLUS
9571 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9572 symbol = XEXP (XEXP (disp, 0), 0);
9574 if (GET_CODE (symbol) == LABEL_REF
9575 || (GET_CODE (symbol) == SYMBOL_REF
9576 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9579 if (!base && !index)
9581 /* Displacement only requires special attention. */
9583 if (CONST_INT_P (disp))
9585 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9586 fputs ("ds:", file);
9587 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9590 output_pic_addr_const (file, disp, 0);
9592 output_addr_const (file, disp);
9596 if (ASSEMBLER_DIALECT == ASM_ATT)
9601 output_pic_addr_const (file, disp, 0);
9602 else if (GET_CODE (disp) == LABEL_REF)
9603 output_asm_label (disp);
9605 output_addr_const (file, disp);
9610 print_reg (base, 0, file);
9614 print_reg (index, 0, file);
9616 fprintf (file, ",%d", scale);
9622 rtx offset = NULL_RTX;
9626 /* Pull out the offset of a symbol; print any symbol itself. */
9627 if (GET_CODE (disp) == CONST
9628 && GET_CODE (XEXP (disp, 0)) == PLUS
9629 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9631 offset = XEXP (XEXP (disp, 0), 1);
9632 disp = gen_rtx_CONST (VOIDmode,
9633 XEXP (XEXP (disp, 0), 0));
9637 output_pic_addr_const (file, disp, 0);
9638 else if (GET_CODE (disp) == LABEL_REF)
9639 output_asm_label (disp);
9640 else if (CONST_INT_P (disp))
9643 output_addr_const (file, disp);
9649 print_reg (base, 0, file);
9652 if (INTVAL (offset) >= 0)
9654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9658 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9665 print_reg (index, 0, file);
9667 fprintf (file, "*%d", scale);
9675 output_addr_const_extra (FILE *file, rtx x)
9679 if (GET_CODE (x) != UNSPEC)
9682 op = XVECEXP (x, 0, 0);
9683 switch (XINT (x, 1))
9685 case UNSPEC_GOTTPOFF:
9686 output_addr_const (file, op);
9687 /* FIXME: This might be @TPOFF in Sun ld. */
9688 fputs ("@GOTTPOFF", file);
9691 output_addr_const (file, op);
9692 fputs ("@TPOFF", file);
9695 output_addr_const (file, op);
9697 fputs ("@TPOFF", file);
9699 fputs ("@NTPOFF", file);
9702 output_addr_const (file, op);
9703 fputs ("@DTPOFF", file);
9705 case UNSPEC_GOTNTPOFF:
9706 output_addr_const (file, op);
9708 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9709 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9711 fputs ("@GOTNTPOFF", file);
9713 case UNSPEC_INDNTPOFF:
9714 output_addr_const (file, op);
9715 fputs ("@INDNTPOFF", file);
9725 /* Split one or more DImode RTL references into pairs of SImode
9726 references. The RTL can be REG, offsettable MEM, integer constant, or
9727 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9728 split and "num" is its length. lo_half and hi_half are output arrays
9729 that parallel "operands". */
9732 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9736 rtx op = operands[num];
9738 /* simplify_subreg refuse to split volatile memory addresses,
9739 but we still have to handle it. */
9742 lo_half[num] = adjust_address (op, SImode, 0);
9743 hi_half[num] = adjust_address (op, SImode, 4);
9747 lo_half[num] = simplify_gen_subreg (SImode, op,
9748 GET_MODE (op) == VOIDmode
9749 ? DImode : GET_MODE (op), 0);
9750 hi_half[num] = simplify_gen_subreg (SImode, op,
9751 GET_MODE (op) == VOIDmode
9752 ? DImode : GET_MODE (op), 4);
9756 /* Split one or more TImode RTL references into pairs of DImode
9757 references. The RTL can be REG, offsettable MEM, integer constant, or
9758 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9759 split and "num" is its length. lo_half and hi_half are output arrays
9760 that parallel "operands". */
9763 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9767 rtx op = operands[num];
9769 /* simplify_subreg refuse to split volatile memory addresses, but we
9770 still have to handle it. */
9773 lo_half[num] = adjust_address (op, DImode, 0);
9774 hi_half[num] = adjust_address (op, DImode, 8);
9778 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9779 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9784 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9785 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9786 is the expression of the binary operation. The output may either be
9787 emitted here, or returned to the caller, like all output_* functions.
9789 There is no guarantee that the operands are the same mode, as they
9790 might be within FLOAT or FLOAT_EXTEND expressions. */
9792 #ifndef SYSV386_COMPAT
9793 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9794 wants to fix the assemblers because that causes incompatibility
9795 with gcc. No-one wants to fix gcc because that causes
9796 incompatibility with assemblers... You can use the option of
9797 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9798 #define SYSV386_COMPAT 1
9802 output_387_binary_op (rtx insn, rtx *operands)
9804 static char buf[30];
9807 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9809 #ifdef ENABLE_CHECKING
9810 /* Even if we do not want to check the inputs, this documents input
9811 constraints. Which helps in understanding the following code. */
9812 if (STACK_REG_P (operands[0])
9813 && ((REG_P (operands[1])
9814 && REGNO (operands[0]) == REGNO (operands[1])
9815 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9816 || (REG_P (operands[2])
9817 && REGNO (operands[0]) == REGNO (operands[2])
9818 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9819 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9822 gcc_assert (is_sse);
9825 switch (GET_CODE (operands[3]))
9828 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9829 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9837 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9838 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9846 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9847 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9855 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9856 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9870 if (GET_MODE (operands[0]) == SFmode)
9871 strcat (buf, "ss\t{%2, %0|%0, %2}");
9873 strcat (buf, "sd\t{%2, %0|%0, %2}");
9878 switch (GET_CODE (operands[3]))
9882 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9884 rtx temp = operands[2];
9885 operands[2] = operands[1];
9889 /* know operands[0] == operands[1]. */
9891 if (MEM_P (operands[2]))
9897 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9899 if (STACK_TOP_P (operands[0]))
9900 /* How is it that we are storing to a dead operand[2]?
9901 Well, presumably operands[1] is dead too. We can't
9902 store the result to st(0) as st(0) gets popped on this
9903 instruction. Instead store to operands[2] (which I
9904 think has to be st(1)). st(1) will be popped later.
9905 gcc <= 2.8.1 didn't have this check and generated
9906 assembly code that the Unixware assembler rejected. */
9907 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9909 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9913 if (STACK_TOP_P (operands[0]))
9914 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9916 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9921 if (MEM_P (operands[1]))
9927 if (MEM_P (operands[2]))
9933 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9936 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9937 derived assemblers, confusingly reverse the direction of
9938 the operation for fsub{r} and fdiv{r} when the
9939 destination register is not st(0). The Intel assembler
9940 doesn't have this brain damage. Read !SYSV386_COMPAT to
9941 figure out what the hardware really does. */
9942 if (STACK_TOP_P (operands[0]))
9943 p = "{p\t%0, %2|rp\t%2, %0}";
9945 p = "{rp\t%2, %0|p\t%0, %2}";
9947 if (STACK_TOP_P (operands[0]))
9948 /* As above for fmul/fadd, we can't store to st(0). */
9949 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9951 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9956 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9959 if (STACK_TOP_P (operands[0]))
9960 p = "{rp\t%0, %1|p\t%1, %0}";
9962 p = "{p\t%1, %0|rp\t%0, %1}";
9964 if (STACK_TOP_P (operands[0]))
9965 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9967 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9972 if (STACK_TOP_P (operands[0]))
9974 if (STACK_TOP_P (operands[1]))
9975 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9977 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9980 else if (STACK_TOP_P (operands[1]))
9983 p = "{\t%1, %0|r\t%0, %1}";
9985 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9991 p = "{r\t%2, %0|\t%0, %2}";
9993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
10006 /* Return needed mode for entity in optimize_mode_switching pass. */
10009 ix86_mode_needed (int entity, rtx insn)
10011 enum attr_i387_cw mode;
10013 /* The mode UNINITIALIZED is used to store control word after a
10014 function call or ASM pattern. The mode ANY specify that function
10015 has no requirements on the control word and make no changes in the
10016 bits we are interested in. */
10019 || (NONJUMP_INSN_P (insn)
10020 && (asm_noperands (PATTERN (insn)) >= 0
10021 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10022 return I387_CW_UNINITIALIZED;
10024 if (recog_memoized (insn) < 0)
10025 return I387_CW_ANY;
10027 mode = get_attr_i387_cw (insn);
10032 if (mode == I387_CW_TRUNC)
10037 if (mode == I387_CW_FLOOR)
10042 if (mode == I387_CW_CEIL)
10047 if (mode == I387_CW_MASK_PM)
10052 gcc_unreachable ();
10055 return I387_CW_ANY;
10058 /* Output code to initialize control word copies used by trunc?f?i and
10059 rounding patterns. CURRENT_MODE is set to current control word,
10060 while NEW_MODE is set to new control word. */
10063 emit_i387_cw_initialization (int mode)
10065 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10068 enum ix86_stack_slot slot;
10070 rtx reg = gen_reg_rtx (HImode);
10072 emit_insn (gen_x86_fnstcw_1 (stored_mode));
10073 emit_move_insn (reg, copy_rtx (stored_mode));
10075 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
10079 case I387_CW_TRUNC:
10080 /* round toward zero (truncate) */
10081 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10082 slot = SLOT_CW_TRUNC;
10085 case I387_CW_FLOOR:
10086 /* round down toward -oo */
10087 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10088 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10089 slot = SLOT_CW_FLOOR;
10093 /* round up toward +oo */
10094 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10095 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10096 slot = SLOT_CW_CEIL;
10099 case I387_CW_MASK_PM:
10100 /* mask precision exception for nearbyint() */
10101 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10102 slot = SLOT_CW_MASK_PM;
10106 gcc_unreachable ();
10113 case I387_CW_TRUNC:
10114 /* round toward zero (truncate) */
10115 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10116 slot = SLOT_CW_TRUNC;
10119 case I387_CW_FLOOR:
10120 /* round down toward -oo */
10121 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10122 slot = SLOT_CW_FLOOR;
10126 /* round up toward +oo */
10127 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10128 slot = SLOT_CW_CEIL;
10131 case I387_CW_MASK_PM:
10132 /* mask precision exception for nearbyint() */
10133 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10134 slot = SLOT_CW_MASK_PM;
10138 gcc_unreachable ();
10142 gcc_assert (slot < MAX_386_STACK_LOCALS);
10144 new_mode = assign_386_stack_local (HImode, slot);
10145 emit_move_insn (new_mode, reg);
10148 /* Output code for INSN to convert a float to a signed int. OPERANDS
10149 are the insn operands. The output may be [HSD]Imode and the input
10150 operand may be [SDX]Fmode. */
10153 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10155 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10156 int dimode_p = GET_MODE (operands[0]) == DImode;
10157 int round_mode = get_attr_i387_cw (insn);
10159 /* Jump through a hoop or two for DImode, since the hardware has no
10160 non-popping instruction. We used to do this a different way, but
10161 that was somewhat fragile and broke with post-reload splitters. */
10162 if ((dimode_p || fisttp) && !stack_top_dies)
10163 output_asm_insn ("fld\t%y1", operands);
10165 gcc_assert (STACK_TOP_P (operands[1]));
10166 gcc_assert (MEM_P (operands[0]));
10167 gcc_assert (GET_MODE (operands[1]) != TFmode);
10170 output_asm_insn ("fisttp%z0\t%0", operands);
10173 if (round_mode != I387_CW_ANY)
10174 output_asm_insn ("fldcw\t%3", operands);
10175 if (stack_top_dies || dimode_p)
10176 output_asm_insn ("fistp%z0\t%0", operands);
10178 output_asm_insn ("fist%z0\t%0", operands);
10179 if (round_mode != I387_CW_ANY)
10180 output_asm_insn ("fldcw\t%2", operands);
10186 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10187 have the values zero or one, indicates the ffreep insn's operand
10188 from the OPERANDS array. */
10190 static const char *
10191 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10193 if (TARGET_USE_FFREEP)
10194 #if HAVE_AS_IX86_FFREEP
10195 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10198 static char retval[] = ".word\t0xc_df";
10199 int regno = REGNO (operands[opno]);
10201 gcc_assert (FP_REGNO_P (regno));
10203 retval[9] = '0' + (regno - FIRST_STACK_REG);
10208 return opno ? "fstp\t%y1" : "fstp\t%y0";
10212 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10213 should be used. UNORDERED_P is true when fucom should be used. */
10216 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10218 int stack_top_dies;
10219 rtx cmp_op0, cmp_op1;
10220 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10224 cmp_op0 = operands[0];
10225 cmp_op1 = operands[1];
10229 cmp_op0 = operands[1];
10230 cmp_op1 = operands[2];
10235 if (GET_MODE (operands[0]) == SFmode)
10237 return "ucomiss\t{%1, %0|%0, %1}";
10239 return "comiss\t{%1, %0|%0, %1}";
10242 return "ucomisd\t{%1, %0|%0, %1}";
10244 return "comisd\t{%1, %0|%0, %1}";
10247 gcc_assert (STACK_TOP_P (cmp_op0));
10249 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10251 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10253 if (stack_top_dies)
10255 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10256 return output_387_ffreep (operands, 1);
10259 return "ftst\n\tfnstsw\t%0";
10262 if (STACK_REG_P (cmp_op1)
10264 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10265 && REGNO (cmp_op1) != FIRST_STACK_REG)
10267 /* If both the top of the 387 stack dies, and the other operand
10268 is also a stack register that dies, then this must be a
10269 `fcompp' float compare */
10273 /* There is no double popping fcomi variant. Fortunately,
10274 eflags is immune from the fstp's cc clobbering. */
10276 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10278 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10279 return output_387_ffreep (operands, 0);
10284 return "fucompp\n\tfnstsw\t%0";
10286 return "fcompp\n\tfnstsw\t%0";
10291 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10293 static const char * const alt[16] =
10295 "fcom%z2\t%y2\n\tfnstsw\t%0",
10296 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10297 "fucom%z2\t%y2\n\tfnstsw\t%0",
10298 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10300 "ficom%z2\t%y2\n\tfnstsw\t%0",
10301 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10305 "fcomi\t{%y1, %0|%0, %y1}",
10306 "fcomip\t{%y1, %0|%0, %y1}",
10307 "fucomi\t{%y1, %0|%0, %y1}",
10308 "fucomip\t{%y1, %0|%0, %y1}",
10319 mask = eflags_p << 3;
10320 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10321 mask |= unordered_p << 1;
10322 mask |= stack_top_dies;
10324 gcc_assert (mask < 16);
10333 ix86_output_addr_vec_elt (FILE *file, int value)
10335 const char *directive = ASM_LONG;
10339 directive = ASM_QUAD;
10341 gcc_assert (!TARGET_64BIT);
10344 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10348 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10350 const char *directive = ASM_LONG;
10353 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10354 directive = ASM_QUAD;
10356 gcc_assert (!TARGET_64BIT);
10358 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10359 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10360 fprintf (file, "%s%s%d-%s%d\n",
10361 directive, LPREFIX, value, LPREFIX, rel);
10362 else if (HAVE_AS_GOTOFF_IN_DATA)
10363 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10365 else if (TARGET_MACHO)
10367 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10368 machopic_output_function_base_name (file);
10369 fprintf(file, "\n");
10373 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10374 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10377 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10381 ix86_expand_clear (rtx dest)
10385 /* We play register width games, which are only valid after reload. */
10386 gcc_assert (reload_completed);
10388 /* Avoid HImode and its attendant prefix byte. */
10389 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10390 dest = gen_rtx_REG (SImode, REGNO (dest));
10391 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10393 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10394 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10396 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10397 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10403 /* X is an unchanging MEM. If it is a constant pool reference, return
10404 the constant pool rtx, else NULL. */
10407 maybe_get_pool_constant (rtx x)
10409 x = ix86_delegitimize_address (XEXP (x, 0));
10411 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10412 return get_pool_constant (x);
10418 ix86_expand_move (enum machine_mode mode, rtx operands[])
10421 enum tls_model model;
10426 if (GET_CODE (op1) == SYMBOL_REF)
10428 model = SYMBOL_REF_TLS_MODEL (op1);
10431 op1 = legitimize_tls_address (op1, model, true);
10432 op1 = force_operand (op1, op0);
10436 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10437 && SYMBOL_REF_DLLIMPORT_P (op1))
10438 op1 = legitimize_dllimport_symbol (op1, false);
10440 else if (GET_CODE (op1) == CONST
10441 && GET_CODE (XEXP (op1, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10444 rtx addend = XEXP (XEXP (op1, 0), 1);
10445 rtx symbol = XEXP (XEXP (op1, 0), 0);
10448 model = SYMBOL_REF_TLS_MODEL (symbol);
10450 tmp = legitimize_tls_address (symbol, model, true);
10451 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10452 && SYMBOL_REF_DLLIMPORT_P (symbol))
10453 tmp = legitimize_dllimport_symbol (symbol, true);
10457 tmp = force_operand (tmp, NULL);
10458 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10459 op0, 1, OPTAB_DIRECT);
10465 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10467 if (TARGET_MACHO && !TARGET_64BIT)
10472 rtx temp = ((reload_in_progress
10473 || ((op0 && REG_P (op0))
10475 ? op0 : gen_reg_rtx (Pmode));
10476 op1 = machopic_indirect_data_reference (op1, temp);
10477 op1 = machopic_legitimize_pic_address (op1, mode,
10478 temp == op1 ? 0 : temp);
10480 else if (MACHOPIC_INDIRECT)
10481 op1 = machopic_indirect_data_reference (op1, 0);
10489 op1 = force_reg (Pmode, op1);
10490 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10492 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10493 op1 = legitimize_pic_address (op1, reg);
10502 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10503 || !push_operand (op0, mode))
10505 op1 = force_reg (mode, op1);
10507 if (push_operand (op0, mode)
10508 && ! general_no_elim_operand (op1, mode))
10509 op1 = copy_to_mode_reg (mode, op1);
10511 /* Force large constants in 64bit compilation into register
10512 to get them CSEed. */
10513 if (can_create_pseudo_p ()
10514 && (mode == DImode) && TARGET_64BIT
10515 && immediate_operand (op1, mode)
10516 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10517 && !register_operand (op0, mode)
10519 op1 = copy_to_mode_reg (mode, op1);
10521 if (can_create_pseudo_p ()
10522 && FLOAT_MODE_P (mode)
10523 && GET_CODE (op1) == CONST_DOUBLE)
10525 /* If we are loading a floating point constant to a register,
10526 force the value to memory now, since we'll get better code
10527 out the back end. */
10529 op1 = validize_mem (force_const_mem (mode, op1));
10530 if (!register_operand (op0, mode))
10532 rtx temp = gen_reg_rtx (mode);
10533 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10534 emit_move_insn (op0, temp);
10540 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10544 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10546 rtx op0 = operands[0], op1 = operands[1];
10547 unsigned int align = GET_MODE_ALIGNMENT (mode);
10549 /* Force constants other than zero into memory. We do not know how
10550 the instructions used to build constants modify the upper 64 bits
10551 of the register, once we have that information we may be able
10552 to handle some of them more efficiently. */
10553 if (can_create_pseudo_p ()
10554 && register_operand (op0, mode)
10555 && (CONSTANT_P (op1)
10556 || (GET_CODE (op1) == SUBREG
10557 && CONSTANT_P (SUBREG_REG (op1))))
10558 && standard_sse_constant_p (op1) <= 0)
10559 op1 = validize_mem (force_const_mem (mode, op1));
10561 /* We need to check memory alignment for SSE mode since attribute
10562 can make operands unaligned. */
10563 if (can_create_pseudo_p ()
10564 && SSE_REG_MODE_P (mode)
10565 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10566 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10570 /* ix86_expand_vector_move_misalign() does not like constants ... */
10571 if (CONSTANT_P (op1)
10572 || (GET_CODE (op1) == SUBREG
10573 && CONSTANT_P (SUBREG_REG (op1))))
10574 op1 = validize_mem (force_const_mem (mode, op1));
10576 /* ... nor both arguments in memory. */
10577 if (!register_operand (op0, mode)
10578 && !register_operand (op1, mode))
10579 op1 = force_reg (mode, op1);
10581 tmp[0] = op0; tmp[1] = op1;
10582 ix86_expand_vector_move_misalign (mode, tmp);
10586 /* Make operand1 a register if it isn't already. */
10587 if (can_create_pseudo_p ()
10588 && !register_operand (op0, mode)
10589 && !register_operand (op1, mode))
10591 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10595 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10598 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10599 straight to ix86_expand_vector_move. */
10600 /* Code generation for scalar reg-reg moves of single and double precision data:
10601 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10605 if (x86_sse_partial_reg_dependency == true)
10610 Code generation for scalar loads of double precision data:
10611 if (x86_sse_split_regs == true)
10612 movlpd mem, reg (gas syntax)
10616 Code generation for unaligned packed loads of single precision data
10617 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10618 if (x86_sse_unaligned_move_optimal)
10621 if (x86_sse_partial_reg_dependency == true)
10633 Code generation for unaligned packed loads of double precision data
10634 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10635 if (x86_sse_unaligned_move_optimal)
10638 if (x86_sse_split_regs == true)
10651 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10660 /* If we're optimizing for size, movups is the smallest. */
10663 op0 = gen_lowpart (V4SFmode, op0);
10664 op1 = gen_lowpart (V4SFmode, op1);
10665 emit_insn (gen_sse_movups (op0, op1));
10669 /* ??? If we have typed data, then it would appear that using
10670 movdqu is the only way to get unaligned data loaded with
10672 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10674 op0 = gen_lowpart (V16QImode, op0);
10675 op1 = gen_lowpart (V16QImode, op1);
10676 emit_insn (gen_sse2_movdqu (op0, op1));
10680 if (TARGET_SSE2 && mode == V2DFmode)
10684 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10686 op0 = gen_lowpart (V2DFmode, op0);
10687 op1 = gen_lowpart (V2DFmode, op1);
10688 emit_insn (gen_sse2_movupd (op0, op1));
10692 /* When SSE registers are split into halves, we can avoid
10693 writing to the top half twice. */
10694 if (TARGET_SSE_SPLIT_REGS)
10696 emit_clobber (op0);
10701 /* ??? Not sure about the best option for the Intel chips.
10702 The following would seem to satisfy; the register is
10703 entirely cleared, breaking the dependency chain. We
10704 then store to the upper half, with a dependency depth
10705 of one. A rumor has it that Intel recommends two movsd
10706 followed by an unpacklpd, but this is unconfirmed. And
10707 given that the dependency depth of the unpacklpd would
10708 still be one, I'm not sure why this would be better. */
10709 zero = CONST0_RTX (V2DFmode);
10712 m = adjust_address (op1, DFmode, 0);
10713 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10714 m = adjust_address (op1, DFmode, 8);
10715 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10719 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10721 op0 = gen_lowpart (V4SFmode, op0);
10722 op1 = gen_lowpart (V4SFmode, op1);
10723 emit_insn (gen_sse_movups (op0, op1));
10727 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10728 emit_move_insn (op0, CONST0_RTX (mode));
10730 emit_clobber (op0);
10732 if (mode != V4SFmode)
10733 op0 = gen_lowpart (V4SFmode, op0);
10734 m = adjust_address (op1, V2SFmode, 0);
10735 emit_insn (gen_sse_loadlps (op0, op0, m));
10736 m = adjust_address (op1, V2SFmode, 8);
10737 emit_insn (gen_sse_loadhps (op0, op0, m));
10740 else if (MEM_P (op0))
10742 /* If we're optimizing for size, movups is the smallest. */
10745 op0 = gen_lowpart (V4SFmode, op0);
10746 op1 = gen_lowpart (V4SFmode, op1);
10747 emit_insn (gen_sse_movups (op0, op1));
10751 /* ??? Similar to above, only less clear because of quote
10752 typeless stores unquote. */
10753 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10754 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10756 op0 = gen_lowpart (V16QImode, op0);
10757 op1 = gen_lowpart (V16QImode, op1);
10758 emit_insn (gen_sse2_movdqu (op0, op1));
10762 if (TARGET_SSE2 && mode == V2DFmode)
10764 m = adjust_address (op0, DFmode, 0);
10765 emit_insn (gen_sse2_storelpd (m, op1));
10766 m = adjust_address (op0, DFmode, 8);
10767 emit_insn (gen_sse2_storehpd (m, op1));
10771 if (mode != V4SFmode)
10772 op1 = gen_lowpart (V4SFmode, op1);
10773 m = adjust_address (op0, V2SFmode, 0);
10774 emit_insn (gen_sse_storelps (m, op1));
10775 m = adjust_address (op0, V2SFmode, 8);
10776 emit_insn (gen_sse_storehps (m, op1));
10780 gcc_unreachable ();
10783 /* Expand a push in MODE. This is some mode for which we do not support
10784 proper push instructions, at least from the registers that we expect
10785 the value to live in. */
10788 ix86_expand_push (enum machine_mode mode, rtx x)
10792 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10793 GEN_INT (-GET_MODE_SIZE (mode)),
10794 stack_pointer_rtx, 1, OPTAB_DIRECT);
10795 if (tmp != stack_pointer_rtx)
10796 emit_move_insn (stack_pointer_rtx, tmp);
10798 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10799 emit_move_insn (tmp, x);
10802 /* Helper function of ix86_fixup_binary_operands to canonicalize
10803 operand order. Returns true if the operands should be swapped. */
10806 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10809 rtx dst = operands[0];
10810 rtx src1 = operands[1];
10811 rtx src2 = operands[2];
10813 /* If the operation is not commutative, we can't do anything. */
10814 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10817 /* Highest priority is that src1 should match dst. */
10818 if (rtx_equal_p (dst, src1))
10820 if (rtx_equal_p (dst, src2))
10823 /* Next highest priority is that immediate constants come second. */
10824 if (immediate_operand (src2, mode))
10826 if (immediate_operand (src1, mode))
10829 /* Lowest priority is that memory references should come second. */
10839 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10840 destination to use for the operation. If different from the true
10841 destination in operands[0], a copy operation will be required. */
10844 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10847 rtx dst = operands[0];
10848 rtx src1 = operands[1];
10849 rtx src2 = operands[2];
10851 /* Canonicalize operand order. */
10852 if (ix86_swap_binary_operands_p (code, mode, operands))
10856 /* It is invalid to swap operands of different modes. */
10857 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10864 /* Both source operands cannot be in memory. */
10865 if (MEM_P (src1) && MEM_P (src2))
10867 /* Optimization: Only read from memory once. */
10868 if (rtx_equal_p (src1, src2))
10870 src2 = force_reg (mode, src2);
10874 src2 = force_reg (mode, src2);
10877 /* If the destination is memory, and we do not have matching source
10878 operands, do things in registers. */
10879 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10880 dst = gen_reg_rtx (mode);
10882 /* Source 1 cannot be a constant. */
10883 if (CONSTANT_P (src1))
10884 src1 = force_reg (mode, src1);
10886 /* Source 1 cannot be a non-matching memory. */
10887 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10888 src1 = force_reg (mode, src1);
10890 operands[1] = src1;
10891 operands[2] = src2;
10895 /* Similarly, but assume that the destination has already been
10896 set up properly. */
10899 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10900 enum machine_mode mode, rtx operands[])
10902 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10903 gcc_assert (dst == operands[0]);
10906 /* Attempt to expand a binary operator. Make the expansion closer to the
10907 actual machine, then just general_operand, which will allow 3 separate
10908 memory references (one output, two input) in a single insn. */
10911 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10914 rtx src1, src2, dst, op, clob;
10916 dst = ix86_fixup_binary_operands (code, mode, operands);
10917 src1 = operands[1];
10918 src2 = operands[2];
10920 /* Emit the instruction. */
10922 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10923 if (reload_in_progress)
10925 /* Reload doesn't know about the flags register, and doesn't know that
10926 it doesn't want to clobber it. We can only do this with PLUS. */
10927 gcc_assert (code == PLUS);
10932 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10933 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10936 /* Fix up the destination if needed. */
10937 if (dst != operands[0])
10938 emit_move_insn (operands[0], dst);
10941 /* Return TRUE or FALSE depending on whether the binary operator meets the
10942 appropriate constraints. */
10945 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10948 rtx dst = operands[0];
10949 rtx src1 = operands[1];
10950 rtx src2 = operands[2];
10952 /* Both source operands cannot be in memory. */
10953 if (MEM_P (src1) && MEM_P (src2))
10956 /* Canonicalize operand order for commutative operators. */
10957 if (ix86_swap_binary_operands_p (code, mode, operands))
10964 /* If the destination is memory, we must have a matching source operand. */
10965 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10968 /* Source 1 cannot be a constant. */
10969 if (CONSTANT_P (src1))
10972 /* Source 1 cannot be a non-matching memory. */
10973 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10979 /* Attempt to expand a unary operator. Make the expansion closer to the
10980 actual machine, then just general_operand, which will allow 2 separate
10981 memory references (one output, one input) in a single insn. */
10984 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10987 int matching_memory;
10988 rtx src, dst, op, clob;
10993 /* If the destination is memory, and we do not have matching source
10994 operands, do things in registers. */
10995 matching_memory = 0;
10998 if (rtx_equal_p (dst, src))
10999 matching_memory = 1;
11001 dst = gen_reg_rtx (mode);
11004 /* When source operand is memory, destination must match. */
11005 if (MEM_P (src) && !matching_memory)
11006 src = force_reg (mode, src);
11008 /* Emit the instruction. */
11010 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11011 if (reload_in_progress || code == NOT)
11013 /* Reload doesn't know about the flags register, and doesn't know that
11014 it doesn't want to clobber it. */
11015 gcc_assert (code == NOT);
11020 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11021 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11024 /* Fix up the destination if needed. */
11025 if (dst != operands[0])
11026 emit_move_insn (operands[0], dst);
11029 /* Return TRUE or FALSE depending on whether the unary operator meets the
11030 appropriate constraints. */
11033 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11034 enum machine_mode mode ATTRIBUTE_UNUSED,
11035 rtx operands[2] ATTRIBUTE_UNUSED)
11037 /* If one of operands is memory, source and destination must match. */
11038 if ((MEM_P (operands[0])
11039 || MEM_P (operands[1]))
11040 && ! rtx_equal_p (operands[0], operands[1]))
11045 /* Post-reload splitter for converting an SF or DFmode value in an
11046 SSE register into an unsigned SImode. */
11049 ix86_split_convert_uns_si_sse (rtx operands[])
11051 enum machine_mode vecmode;
11052 rtx value, large, zero_or_two31, input, two31, x;
11054 large = operands[1];
11055 zero_or_two31 = operands[2];
11056 input = operands[3];
11057 two31 = operands[4];
11058 vecmode = GET_MODE (large);
11059 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
11061 /* Load up the value into the low element. We must ensure that the other
11062 elements are valid floats -- zero is the easiest such value. */
11065 if (vecmode == V4SFmode)
11066 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11068 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11072 input = gen_rtx_REG (vecmode, REGNO (input));
11073 emit_move_insn (value, CONST0_RTX (vecmode));
11074 if (vecmode == V4SFmode)
11075 emit_insn (gen_sse_movss (value, value, input));
11077 emit_insn (gen_sse2_movsd (value, value, input));
11080 emit_move_insn (large, two31);
11081 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
11083 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
11084 emit_insn (gen_rtx_SET (VOIDmode, large, x));
11086 x = gen_rtx_AND (vecmode, zero_or_two31, large);
11087 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11089 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11090 emit_insn (gen_rtx_SET (VOIDmode, value, x));
11092 large = gen_rtx_REG (V4SImode, REGNO (large));
11093 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
11095 x = gen_rtx_REG (V4SImode, REGNO (value));
11096 if (vecmode == V4SFmode)
11097 emit_insn (gen_sse2_cvttps2dq (x, value));
11099 emit_insn (gen_sse2_cvttpd2dq (x, value));
11102 emit_insn (gen_xorv4si3 (value, value, large));
11105 /* Convert an unsigned DImode value into a DFmode, using only SSE.
11106 Expects the 64-bit DImode to be supplied in a pair of integral
11107 registers. Requires SSE2; will use SSE3 if available. For x86_32,
11108 -mfpmath=sse, !optimize_size only. */
11111 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11113 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11114 rtx int_xmm, fp_xmm;
11115 rtx biases, exponents;
11118 int_xmm = gen_reg_rtx (V4SImode);
11119 if (TARGET_INTER_UNIT_MOVES)
11120 emit_insn (gen_movdi_to_sse (int_xmm, input));
11121 else if (TARGET_SSE_SPLIT_REGS)
11123 emit_clobber (int_xmm);
11124 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11128 x = gen_reg_rtx (V2DImode);
11129 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11130 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11133 x = gen_rtx_CONST_VECTOR (V4SImode,
11134 gen_rtvec (4, GEN_INT (0x43300000UL),
11135 GEN_INT (0x45300000UL),
11136 const0_rtx, const0_rtx));
11137 exponents = validize_mem (force_const_mem (V4SImode, x));
11139 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11140 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11142 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11143 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11144 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11145 (0x1.0p84 + double(fp_value_hi_xmm)).
11146 Note these exponents differ by 32. */
11148 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11150 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11151 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11152 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11153 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11154 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11155 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11156 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11157 biases = validize_mem (force_const_mem (V2DFmode, biases));
11158 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11160 /* Add the upper and lower DFmode values together. */
11162 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11165 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11166 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11167 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11170 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11173 /* Not used, but eases macroization of patterns. */
11175 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11176 rtx input ATTRIBUTE_UNUSED)
11178 gcc_unreachable ();
11181 /* Convert an unsigned SImode value into a DFmode. Only currently used
11182 for SSE, but applicable anywhere. */
11185 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11187 REAL_VALUE_TYPE TWO31r;
11190 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11191 NULL, 1, OPTAB_DIRECT);
11193 fp = gen_reg_rtx (DFmode);
11194 emit_insn (gen_floatsidf2 (fp, x));
11196 real_ldexp (&TWO31r, &dconst1, 31);
11197 x = const_double_from_real_value (TWO31r, DFmode);
11199 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11201 emit_move_insn (target, x);
11204 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11205 32-bit mode; otherwise we have a direct convert instruction. */
11208 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11210 REAL_VALUE_TYPE TWO32r;
11211 rtx fp_lo, fp_hi, x;
11213 fp_lo = gen_reg_rtx (DFmode);
11214 fp_hi = gen_reg_rtx (DFmode);
11216 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11218 real_ldexp (&TWO32r, &dconst1, 32);
11219 x = const_double_from_real_value (TWO32r, DFmode);
11220 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11222 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11224 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11227 emit_move_insn (target, x);
11230 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11231 For x86_32, -mfpmath=sse, !optimize_size only. */
11233 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11235 REAL_VALUE_TYPE ONE16r;
11236 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11238 real_ldexp (&ONE16r, &dconst1, 16);
11239 x = const_double_from_real_value (ONE16r, SFmode);
11240 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11241 NULL, 0, OPTAB_DIRECT);
11242 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11243 NULL, 0, OPTAB_DIRECT);
11244 fp_hi = gen_reg_rtx (SFmode);
11245 fp_lo = gen_reg_rtx (SFmode);
11246 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11247 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11248 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11250 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11252 if (!rtx_equal_p (target, fp_hi))
11253 emit_move_insn (target, fp_hi);
11256 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11257 then replicate the value for all elements of the vector
11261 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11268 v = gen_rtvec (4, value, value, value, value);
11269 return gen_rtx_CONST_VECTOR (V4SImode, v);
11273 v = gen_rtvec (2, value, value);
11274 return gen_rtx_CONST_VECTOR (V2DImode, v);
11278 v = gen_rtvec (4, value, value, value, value);
11280 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11281 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11282 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11286 v = gen_rtvec (2, value, value);
11288 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11289 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11292 gcc_unreachable ();
11296 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11297 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11298 for an SSE register. If VECT is true, then replicate the mask for
11299 all elements of the vector register. If INVERT is true, then create
11300 a mask excluding the sign bit. */
11303 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11305 enum machine_mode vec_mode, imode;
11306 HOST_WIDE_INT hi, lo;
11311 /* Find the sign bit, sign extended to 2*HWI. */
11317 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11318 lo = 0x80000000, hi = lo < 0;
11324 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11325 if (HOST_BITS_PER_WIDE_INT >= 64)
11326 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11328 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11334 vec_mode = VOIDmode;
11335 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11336 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11340 gcc_unreachable ();
11344 lo = ~lo, hi = ~hi;
11346 /* Force this value into the low part of a fp vector constant. */
11347 mask = immed_double_const (lo, hi, imode);
11348 mask = gen_lowpart (mode, mask);
11350 if (vec_mode == VOIDmode)
11351 return force_reg (mode, mask);
11353 v = ix86_build_const_vector (mode, vect, mask);
11354 return force_reg (vec_mode, v);
11357 /* Generate code for floating point ABS or NEG. */
11360 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11363 rtx mask, set, use, clob, dst, src;
11364 bool use_sse = false;
11365 bool vector_mode = VECTOR_MODE_P (mode);
11366 enum machine_mode elt_mode = mode;
11370 elt_mode = GET_MODE_INNER (mode);
11373 else if (mode == TFmode)
11375 else if (TARGET_SSE_MATH)
11376 use_sse = SSE_FLOAT_MODE_P (mode);
11378 /* NEG and ABS performed with SSE use bitwise mask operations.
11379 Create the appropriate mask now. */
11381 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11390 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11391 set = gen_rtx_SET (VOIDmode, dst, set);
11396 set = gen_rtx_fmt_e (code, mode, src);
11397 set = gen_rtx_SET (VOIDmode, dst, set);
11400 use = gen_rtx_USE (VOIDmode, mask);
11401 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11402 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11403 gen_rtvec (3, set, use, clob)));
11410 /* Expand a copysign operation. Special case operand 0 being a constant. */
11413 ix86_expand_copysign (rtx operands[])
11415 enum machine_mode mode;
11416 rtx dest, op0, op1, mask, nmask;
11418 dest = operands[0];
11422 mode = GET_MODE (dest);
11424 if (GET_CODE (op0) == CONST_DOUBLE)
11426 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11428 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11429 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11431 if (mode == SFmode || mode == DFmode)
11433 enum machine_mode vmode;
11435 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11437 if (op0 == CONST0_RTX (mode))
11438 op0 = CONST0_RTX (vmode);
11443 if (mode == SFmode)
11444 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11445 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11447 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11449 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11452 else if (op0 != CONST0_RTX (mode))
11453 op0 = force_reg (mode, op0);
11455 mask = ix86_build_signbit_mask (mode, 0, 0);
11457 if (mode == SFmode)
11458 copysign_insn = gen_copysignsf3_const;
11459 else if (mode == DFmode)
11460 copysign_insn = gen_copysigndf3_const;
11462 copysign_insn = gen_copysigntf3_const;
11464 emit_insn (copysign_insn (dest, op0, op1, mask));
11468 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11470 nmask = ix86_build_signbit_mask (mode, 0, 1);
11471 mask = ix86_build_signbit_mask (mode, 0, 0);
11473 if (mode == SFmode)
11474 copysign_insn = gen_copysignsf3_var;
11475 else if (mode == DFmode)
11476 copysign_insn = gen_copysigndf3_var;
11478 copysign_insn = gen_copysigntf3_var;
11480 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11484 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11485 be a constant, and so has already been expanded into a vector constant. */
11488 ix86_split_copysign_const (rtx operands[])
11490 enum machine_mode mode, vmode;
11491 rtx dest, op0, op1, mask, x;
11493 dest = operands[0];
11496 mask = operands[3];
11498 mode = GET_MODE (dest);
11499 vmode = GET_MODE (mask);
11501 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11502 x = gen_rtx_AND (vmode, dest, mask);
11503 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11505 if (op0 != CONST0_RTX (vmode))
11507 x = gen_rtx_IOR (vmode, dest, op0);
11508 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11512 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11513 so we have to do two masks. */
11516 ix86_split_copysign_var (rtx operands[])
11518 enum machine_mode mode, vmode;
11519 rtx dest, scratch, op0, op1, mask, nmask, x;
11521 dest = operands[0];
11522 scratch = operands[1];
11525 nmask = operands[4];
11526 mask = operands[5];
11528 mode = GET_MODE (dest);
11529 vmode = GET_MODE (mask);
11531 if (rtx_equal_p (op0, op1))
11533 /* Shouldn't happen often (it's useless, obviously), but when it does
11534 we'd generate incorrect code if we continue below. */
11535 emit_move_insn (dest, op0);
11539 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11541 gcc_assert (REGNO (op1) == REGNO (scratch));
11543 x = gen_rtx_AND (vmode, scratch, mask);
11544 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11547 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11548 x = gen_rtx_NOT (vmode, dest);
11549 x = gen_rtx_AND (vmode, x, op0);
11550 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11554 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11556 x = gen_rtx_AND (vmode, scratch, mask);
11558 else /* alternative 2,4 */
11560 gcc_assert (REGNO (mask) == REGNO (scratch));
11561 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11562 x = gen_rtx_AND (vmode, scratch, op1);
11564 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11566 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11568 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11569 x = gen_rtx_AND (vmode, dest, nmask);
11571 else /* alternative 3,4 */
11573 gcc_assert (REGNO (nmask) == REGNO (dest));
11575 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11576 x = gen_rtx_AND (vmode, dest, op0);
11578 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11581 x = gen_rtx_IOR (vmode, dest, scratch);
11582 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11585 /* Return TRUE or FALSE depending on whether the first SET in INSN
11586 has source and destination with matching CC modes, and that the
11587 CC mode is at least as constrained as REQ_MODE. */
11590 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11593 enum machine_mode set_mode;
11595 set = PATTERN (insn);
11596 if (GET_CODE (set) == PARALLEL)
11597 set = XVECEXP (set, 0, 0);
11598 gcc_assert (GET_CODE (set) == SET);
11599 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11601 set_mode = GET_MODE (SET_DEST (set));
11605 if (req_mode != CCNOmode
11606 && (req_mode != CCmode
11607 || XEXP (SET_SRC (set), 1) != const0_rtx))
11611 if (req_mode == CCGCmode)
11615 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11619 if (req_mode == CCZmode)
11626 gcc_unreachable ();
11629 return (GET_MODE (SET_SRC (set)) == set_mode);
11632 /* Generate insn patterns to do an integer compare of OPERANDS. */
11635 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11637 enum machine_mode cmpmode;
11640 cmpmode = SELECT_CC_MODE (code, op0, op1);
11641 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11643 /* This is very simple, but making the interface the same as in the
11644 FP case makes the rest of the code easier. */
11645 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11646 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11648 /* Return the test that should be put into the flags user, i.e.
11649 the bcc, scc, or cmov instruction. */
11650 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11653 /* Figure out whether to use ordered or unordered fp comparisons.
11654 Return the appropriate mode to use. */
11657 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11659 /* ??? In order to make all comparisons reversible, we do all comparisons
11660 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11661 all forms trapping and nontrapping comparisons, we can make inequality
11662 comparisons trapping again, since it results in better code when using
11663 FCOM based compares. */
11664 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11668 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11670 enum machine_mode mode = GET_MODE (op0);
11672 if (SCALAR_FLOAT_MODE_P (mode))
11674 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11675 return ix86_fp_compare_mode (code);
11680 /* Only zero flag is needed. */
11681 case EQ: /* ZF=0 */
11682 case NE: /* ZF!=0 */
11684 /* Codes needing carry flag. */
11685 case GEU: /* CF=0 */
11686 case LTU: /* CF=1 */
11687 /* Detect overflow checks. They need just the carry flag. */
11688 if (GET_CODE (op0) == PLUS
11689 && rtx_equal_p (op1, XEXP (op0, 0)))
11693 case GTU: /* CF=0 & ZF=0 */
11694 case LEU: /* CF=1 | ZF=1 */
11695 /* Detect overflow checks. They need just the carry flag. */
11696 if (GET_CODE (op0) == MINUS
11697 && rtx_equal_p (op1, XEXP (op0, 0)))
11701 /* Codes possibly doable only with sign flag when
11702 comparing against zero. */
11703 case GE: /* SF=OF or SF=0 */
11704 case LT: /* SF<>OF or SF=1 */
11705 if (op1 == const0_rtx)
11708 /* For other cases Carry flag is not required. */
11710 /* Codes doable only with sign flag when comparing
11711 against zero, but we miss jump instruction for it
11712 so we need to use relational tests against overflow
11713 that thus needs to be zero. */
11714 case GT: /* ZF=0 & SF=OF */
11715 case LE: /* ZF=1 | SF<>OF */
11716 if (op1 == const0_rtx)
11720 /* strcmp pattern do (use flags) and combine may ask us for proper
11725 gcc_unreachable ();
11729 /* Return the fixed registers used for condition codes. */
11732 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11739 /* If two condition code modes are compatible, return a condition code
11740 mode which is compatible with both. Otherwise, return
11743 static enum machine_mode
11744 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11749 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11752 if ((m1 == CCGCmode && m2 == CCGOCmode)
11753 || (m1 == CCGOCmode && m2 == CCGCmode))
11759 gcc_unreachable ();
11789 /* These are only compatible with themselves, which we already
11795 /* Split comparison code CODE into comparisons we can do using branch
11796 instructions. BYPASS_CODE is comparison code for branch that will
11797 branch around FIRST_CODE and SECOND_CODE. If some of branches
11798 is not required, set value to UNKNOWN.
11799 We never require more than two branches. */
11802 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11803 enum rtx_code *first_code,
11804 enum rtx_code *second_code)
11806 *first_code = code;
11807 *bypass_code = UNKNOWN;
11808 *second_code = UNKNOWN;
11810 /* The fcomi comparison sets flags as follows:
11820 case GT: /* GTU - CF=0 & ZF=0 */
11821 case GE: /* GEU - CF=0 */
11822 case ORDERED: /* PF=0 */
11823 case UNORDERED: /* PF=1 */
11824 case UNEQ: /* EQ - ZF=1 */
11825 case UNLT: /* LTU - CF=1 */
11826 case UNLE: /* LEU - CF=1 | ZF=1 */
11827 case LTGT: /* EQ - ZF=0 */
11829 case LT: /* LTU - CF=1 - fails on unordered */
11830 *first_code = UNLT;
11831 *bypass_code = UNORDERED;
11833 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11834 *first_code = UNLE;
11835 *bypass_code = UNORDERED;
11837 case EQ: /* EQ - ZF=1 - fails on unordered */
11838 *first_code = UNEQ;
11839 *bypass_code = UNORDERED;
11841 case NE: /* NE - ZF=0 - fails on unordered */
11842 *first_code = LTGT;
11843 *second_code = UNORDERED;
11845 case UNGE: /* GEU - CF=0 - fails on unordered */
11847 *second_code = UNORDERED;
11849 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11851 *second_code = UNORDERED;
11854 gcc_unreachable ();
11856 if (!TARGET_IEEE_FP)
11858 *second_code = UNKNOWN;
11859 *bypass_code = UNKNOWN;
11863 /* Return cost of comparison done fcom + arithmetics operations on AX.
11864 All following functions do use number of instructions as a cost metrics.
11865 In future this should be tweaked to compute bytes for optimize_size and
11866 take into account performance of various instructions on various CPUs. */
11868 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11870 if (!TARGET_IEEE_FP)
11872 /* The cost of code output by ix86_expand_fp_compare. */
11896 gcc_unreachable ();
11900 /* Return cost of comparison done using fcomi operation.
11901 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11903 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11905 enum rtx_code bypass_code, first_code, second_code;
11906 /* Return arbitrarily high cost when instruction is not supported - this
11907 prevents gcc from using it. */
11910 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11911 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11914 /* Return cost of comparison done using sahf operation.
11915 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11917 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11919 enum rtx_code bypass_code, first_code, second_code;
11920 /* Return arbitrarily high cost when instruction is not preferred - this
11921 avoids gcc from using it. */
11922 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11924 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11925 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11928 /* Compute cost of the comparison done using any method.
11929 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11931 ix86_fp_comparison_cost (enum rtx_code code)
11933 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11936 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11937 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11939 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11940 if (min > sahf_cost)
11942 if (min > fcomi_cost)
11947 /* Return true if we should use an FCOMI instruction for this
11951 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11953 enum rtx_code swapped_code = swap_condition (code);
11955 return ((ix86_fp_comparison_cost (code)
11956 == ix86_fp_comparison_fcomi_cost (code))
11957 || (ix86_fp_comparison_cost (swapped_code)
11958 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11961 /* Swap, force into registers, or otherwise massage the two operands
11962 to a fp comparison. The operands are updated in place; the new
11963 comparison code is returned. */
11965 static enum rtx_code
11966 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11968 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11969 rtx op0 = *pop0, op1 = *pop1;
11970 enum machine_mode op_mode = GET_MODE (op0);
11971 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11973 /* All of the unordered compare instructions only work on registers.
11974 The same is true of the fcomi compare instructions. The XFmode
11975 compare instructions require registers except when comparing
11976 against zero or when converting operand 1 from fixed point to
11980 && (fpcmp_mode == CCFPUmode
11981 || (op_mode == XFmode
11982 && ! (standard_80387_constant_p (op0) == 1
11983 || standard_80387_constant_p (op1) == 1)
11984 && GET_CODE (op1) != FLOAT)
11985 || ix86_use_fcomi_compare (code)))
11987 op0 = force_reg (op_mode, op0);
11988 op1 = force_reg (op_mode, op1);
11992 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11993 things around if they appear profitable, otherwise force op0
11994 into a register. */
11996 if (standard_80387_constant_p (op0) == 0
11998 && ! (standard_80387_constant_p (op1) == 0
12002 tmp = op0, op0 = op1, op1 = tmp;
12003 code = swap_condition (code);
12007 op0 = force_reg (op_mode, op0);
12009 if (CONSTANT_P (op1))
12011 int tmp = standard_80387_constant_p (op1);
12013 op1 = validize_mem (force_const_mem (op_mode, op1));
12017 op1 = force_reg (op_mode, op1);
12020 op1 = force_reg (op_mode, op1);
12024 /* Try to rearrange the comparison to make it cheaper. */
12025 if (ix86_fp_comparison_cost (code)
12026 > ix86_fp_comparison_cost (swap_condition (code))
12027 && (REG_P (op1) || can_create_pseudo_p ()))
12030 tmp = op0, op0 = op1, op1 = tmp;
12031 code = swap_condition (code);
12033 op0 = force_reg (op_mode, op0);
12041 /* Convert comparison codes we use to represent FP comparison to integer
12042 code that will result in proper branch. Return UNKNOWN if no such code
12046 ix86_fp_compare_code_to_integer (enum rtx_code code)
12075 /* Generate insn patterns to do a floating point compare of OPERANDS. */
12078 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12079 rtx *second_test, rtx *bypass_test)
12081 enum machine_mode fpcmp_mode, intcmp_mode;
12083 int cost = ix86_fp_comparison_cost (code);
12084 enum rtx_code bypass_code, first_code, second_code;
12086 fpcmp_mode = ix86_fp_compare_mode (code);
12087 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12090 *second_test = NULL_RTX;
12092 *bypass_test = NULL_RTX;
12094 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12096 /* Do fcomi/sahf based test when profitable. */
12097 if (ix86_fp_comparison_arithmetics_cost (code) > cost
12098 && (bypass_code == UNKNOWN || bypass_test)
12099 && (second_code == UNKNOWN || second_test))
12101 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12102 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12108 gcc_assert (TARGET_SAHF);
12111 scratch = gen_reg_rtx (HImode);
12112 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12114 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
12117 /* The FP codes work out to act like unsigned. */
12118 intcmp_mode = fpcmp_mode;
12120 if (bypass_code != UNKNOWN)
12121 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12122 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12124 if (second_code != UNKNOWN)
12125 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12131 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
12132 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12133 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
12135 scratch = gen_reg_rtx (HImode);
12136 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
12138 /* In the unordered case, we have to check C2 for NaN's, which
12139 doesn't happen to work out to anything nice combination-wise.
12140 So do some bit twiddling on the value we've got in AH to come
12141 up with an appropriate set of condition codes. */
12143 intcmp_mode = CCNOmode;
12148 if (code == GT || !TARGET_IEEE_FP)
12150 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12155 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12156 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12157 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12158 intcmp_mode = CCmode;
12164 if (code == LT && TARGET_IEEE_FP)
12166 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12167 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12168 intcmp_mode = CCmode;
12173 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12179 if (code == GE || !TARGET_IEEE_FP)
12181 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12186 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12187 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12194 if (code == LE && TARGET_IEEE_FP)
12196 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12197 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12198 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12199 intcmp_mode = CCmode;
12204 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12210 if (code == EQ && TARGET_IEEE_FP)
12212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12213 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12214 intcmp_mode = CCmode;
12219 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12226 if (code == NE && TARGET_IEEE_FP)
12228 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12229 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12235 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12250 gcc_unreachable ();
12254 /* Return the test that should be put into the flags user, i.e.
12255 the bcc, scc, or cmov instruction. */
12256 return gen_rtx_fmt_ee (code, VOIDmode,
12257 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12262 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12265 op0 = ix86_compare_op0;
12266 op1 = ix86_compare_op1;
12269 *second_test = NULL_RTX;
12271 *bypass_test = NULL_RTX;
12273 if (ix86_compare_emitted)
12275 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12276 ix86_compare_emitted = NULL_RTX;
12278 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12280 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12281 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12282 second_test, bypass_test);
12285 ret = ix86_expand_int_compare (code, op0, op1);
12290 /* Return true if the CODE will result in nontrivial jump sequence. */
12292 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12294 enum rtx_code bypass_code, first_code, second_code;
12297 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12298 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12302 ix86_expand_branch (enum rtx_code code, rtx label)
12306 /* If we have emitted a compare insn, go straight to simple.
12307 ix86_expand_compare won't emit anything if ix86_compare_emitted
12309 if (ix86_compare_emitted)
12312 switch (GET_MODE (ix86_compare_op0))
12318 tmp = ix86_expand_compare (code, NULL, NULL);
12319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12320 gen_rtx_LABEL_REF (VOIDmode, label),
12322 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12331 enum rtx_code bypass_code, first_code, second_code;
12333 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12334 &ix86_compare_op1);
12336 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12338 /* Check whether we will use the natural sequence with one jump. If
12339 so, we can expand jump early. Otherwise delay expansion by
12340 creating compound insn to not confuse optimizers. */
12341 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12343 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12344 gen_rtx_LABEL_REF (VOIDmode, label),
12345 pc_rtx, NULL_RTX, NULL_RTX);
12349 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12350 ix86_compare_op0, ix86_compare_op1);
12351 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12352 gen_rtx_LABEL_REF (VOIDmode, label),
12354 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12356 use_fcomi = ix86_use_fcomi_compare (code);
12357 vec = rtvec_alloc (3 + !use_fcomi);
12358 RTVEC_ELT (vec, 0) = tmp;
12360 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12362 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12365 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12367 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12376 /* Expand DImode branch into multiple compare+branch. */
12378 rtx lo[2], hi[2], label2;
12379 enum rtx_code code1, code2, code3;
12380 enum machine_mode submode;
12382 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12384 tmp = ix86_compare_op0;
12385 ix86_compare_op0 = ix86_compare_op1;
12386 ix86_compare_op1 = tmp;
12387 code = swap_condition (code);
12389 if (GET_MODE (ix86_compare_op0) == DImode)
12391 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12392 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12397 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12398 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12402 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12403 avoid two branches. This costs one extra insn, so disable when
12404 optimizing for size. */
12406 if ((code == EQ || code == NE)
12408 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12413 if (hi[1] != const0_rtx)
12414 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12415 NULL_RTX, 0, OPTAB_WIDEN);
12418 if (lo[1] != const0_rtx)
12419 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12420 NULL_RTX, 0, OPTAB_WIDEN);
12422 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12423 NULL_RTX, 0, OPTAB_WIDEN);
12425 ix86_compare_op0 = tmp;
12426 ix86_compare_op1 = const0_rtx;
12427 ix86_expand_branch (code, label);
12431 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12432 op1 is a constant and the low word is zero, then we can just
12433 examine the high word. Similarly for low word -1 and
12434 less-or-equal-than or greater-than. */
12436 if (CONST_INT_P (hi[1]))
12439 case LT: case LTU: case GE: case GEU:
12440 if (lo[1] == const0_rtx)
12442 ix86_compare_op0 = hi[0];
12443 ix86_compare_op1 = hi[1];
12444 ix86_expand_branch (code, label);
12448 case LE: case LEU: case GT: case GTU:
12449 if (lo[1] == constm1_rtx)
12451 ix86_compare_op0 = hi[0];
12452 ix86_compare_op1 = hi[1];
12453 ix86_expand_branch (code, label);
12461 /* Otherwise, we need two or three jumps. */
12463 label2 = gen_label_rtx ();
12466 code2 = swap_condition (code);
12467 code3 = unsigned_condition (code);
12471 case LT: case GT: case LTU: case GTU:
12474 case LE: code1 = LT; code2 = GT; break;
12475 case GE: code1 = GT; code2 = LT; break;
12476 case LEU: code1 = LTU; code2 = GTU; break;
12477 case GEU: code1 = GTU; code2 = LTU; break;
12479 case EQ: code1 = UNKNOWN; code2 = NE; break;
12480 case NE: code2 = UNKNOWN; break;
12483 gcc_unreachable ();
12488 * if (hi(a) < hi(b)) goto true;
12489 * if (hi(a) > hi(b)) goto false;
12490 * if (lo(a) < lo(b)) goto true;
12494 ix86_compare_op0 = hi[0];
12495 ix86_compare_op1 = hi[1];
12497 if (code1 != UNKNOWN)
12498 ix86_expand_branch (code1, label);
12499 if (code2 != UNKNOWN)
12500 ix86_expand_branch (code2, label2);
12502 ix86_compare_op0 = lo[0];
12503 ix86_compare_op1 = lo[1];
12504 ix86_expand_branch (code3, label);
12506 if (code2 != UNKNOWN)
12507 emit_label (label2);
12512 gcc_unreachable ();
12516 /* Split branch based on floating point condition. */
12518 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12519 rtx target1, rtx target2, rtx tmp, rtx pushed)
12521 rtx second, bypass;
12522 rtx label = NULL_RTX;
12524 int bypass_probability = -1, second_probability = -1, probability = -1;
12527 if (target2 != pc_rtx)
12530 code = reverse_condition_maybe_unordered (code);
12535 condition = ix86_expand_fp_compare (code, op1, op2,
12536 tmp, &second, &bypass);
12538 /* Remove pushed operand from stack. */
12540 ix86_free_from_memory (GET_MODE (pushed));
12542 if (split_branch_probability >= 0)
12544 /* Distribute the probabilities across the jumps.
12545 Assume the BYPASS and SECOND to be always test
12547 probability = split_branch_probability;
12549 /* Value of 1 is low enough to make no need for probability
12550 to be updated. Later we may run some experiments and see
12551 if unordered values are more frequent in practice. */
12553 bypass_probability = 1;
12555 second_probability = 1;
12557 if (bypass != NULL_RTX)
12559 label = gen_label_rtx ();
12560 i = emit_jump_insn (gen_rtx_SET
12562 gen_rtx_IF_THEN_ELSE (VOIDmode,
12564 gen_rtx_LABEL_REF (VOIDmode,
12567 if (bypass_probability >= 0)
12569 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12570 GEN_INT (bypass_probability),
12573 i = emit_jump_insn (gen_rtx_SET
12575 gen_rtx_IF_THEN_ELSE (VOIDmode,
12576 condition, target1, target2)));
12577 if (probability >= 0)
12579 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12580 GEN_INT (probability),
12582 if (second != NULL_RTX)
12584 i = emit_jump_insn (gen_rtx_SET
12586 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12588 if (second_probability >= 0)
12590 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12591 GEN_INT (second_probability),
12594 if (label != NULL_RTX)
12595 emit_label (label);
12599 ix86_expand_setcc (enum rtx_code code, rtx dest)
12601 rtx ret, tmp, tmpreg, equiv;
12602 rtx second_test, bypass_test;
12604 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12605 return 0; /* FAIL */
12607 gcc_assert (GET_MODE (dest) == QImode);
12609 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12610 PUT_MODE (ret, QImode);
12615 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12616 if (bypass_test || second_test)
12618 rtx test = second_test;
12620 rtx tmp2 = gen_reg_rtx (QImode);
12623 gcc_assert (!second_test);
12624 test = bypass_test;
12626 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12628 PUT_MODE (test, QImode);
12629 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12632 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12634 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12637 /* Attach a REG_EQUAL note describing the comparison result. */
12638 if (ix86_compare_op0 && ix86_compare_op1)
12640 equiv = simplify_gen_relational (code, QImode,
12641 GET_MODE (ix86_compare_op0),
12642 ix86_compare_op0, ix86_compare_op1);
12643 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12646 return 1; /* DONE */
12649 /* Expand comparison setting or clearing carry flag. Return true when
12650 successful and set pop for the operation. */
12652 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12654 enum machine_mode mode =
12655 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12657 /* Do not handle DImode compares that go through special path. */
12658 if (mode == (TARGET_64BIT ? TImode : DImode))
12661 if (SCALAR_FLOAT_MODE_P (mode))
12663 rtx second_test = NULL, bypass_test = NULL;
12664 rtx compare_op, compare_seq;
12666 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12668 /* Shortcut: following common codes never translate
12669 into carry flag compares. */
12670 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12671 || code == ORDERED || code == UNORDERED)
12674 /* These comparisons require zero flag; swap operands so they won't. */
12675 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12676 && !TARGET_IEEE_FP)
12681 code = swap_condition (code);
12684 /* Try to expand the comparison and verify that we end up with
12685 carry flag based comparison. This fails to be true only when
12686 we decide to expand comparison using arithmetic that is not
12687 too common scenario. */
12689 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12690 &second_test, &bypass_test);
12691 compare_seq = get_insns ();
12694 if (second_test || bypass_test)
12697 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12698 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12699 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12701 code = GET_CODE (compare_op);
12703 if (code != LTU && code != GEU)
12706 emit_insn (compare_seq);
12711 if (!INTEGRAL_MODE_P (mode))
12720 /* Convert a==0 into (unsigned)a<1. */
12723 if (op1 != const0_rtx)
12726 code = (code == EQ ? LTU : GEU);
12729 /* Convert a>b into b<a or a>=b-1. */
12732 if (CONST_INT_P (op1))
12734 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12735 /* Bail out on overflow. We still can swap operands but that
12736 would force loading of the constant into register. */
12737 if (op1 == const0_rtx
12738 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12740 code = (code == GTU ? GEU : LTU);
12747 code = (code == GTU ? LTU : GEU);
12751 /* Convert a>=0 into (unsigned)a<0x80000000. */
12754 if (mode == DImode || op1 != const0_rtx)
12756 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12757 code = (code == LT ? GEU : LTU);
12761 if (mode == DImode || op1 != constm1_rtx)
12763 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12764 code = (code == LE ? GEU : LTU);
12770 /* Swapping operands may cause constant to appear as first operand. */
12771 if (!nonimmediate_operand (op0, VOIDmode))
12773 if (!can_create_pseudo_p ())
12775 op0 = force_reg (mode, op0);
12777 ix86_compare_op0 = op0;
12778 ix86_compare_op1 = op1;
12779 *pop = ix86_expand_compare (code, NULL, NULL);
12780 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12785 ix86_expand_int_movcc (rtx operands[])
12787 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12788 rtx compare_seq, compare_op;
12789 rtx second_test, bypass_test;
12790 enum machine_mode mode = GET_MODE (operands[0]);
12791 bool sign_bit_compare_p = false;;
12794 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12795 compare_seq = get_insns ();
12798 compare_code = GET_CODE (compare_op);
12800 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12801 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12802 sign_bit_compare_p = true;
12804 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12805 HImode insns, we'd be swallowed in word prefix ops. */
12807 if ((mode != HImode || TARGET_FAST_PREFIX)
12808 && (mode != (TARGET_64BIT ? TImode : DImode))
12809 && CONST_INT_P (operands[2])
12810 && CONST_INT_P (operands[3]))
12812 rtx out = operands[0];
12813 HOST_WIDE_INT ct = INTVAL (operands[2]);
12814 HOST_WIDE_INT cf = INTVAL (operands[3]);
12815 HOST_WIDE_INT diff;
12818 /* Sign bit compares are better done using shifts than we do by using
12820 if (sign_bit_compare_p
12821 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12822 ix86_compare_op1, &compare_op))
12824 /* Detect overlap between destination and compare sources. */
12827 if (!sign_bit_compare_p)
12829 bool fpcmp = false;
12831 compare_code = GET_CODE (compare_op);
12833 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12834 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12837 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12840 /* To simplify rest of code, restrict to the GEU case. */
12841 if (compare_code == LTU)
12843 HOST_WIDE_INT tmp = ct;
12846 compare_code = reverse_condition (compare_code);
12847 code = reverse_condition (code);
12852 PUT_CODE (compare_op,
12853 reverse_condition_maybe_unordered
12854 (GET_CODE (compare_op)));
12856 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12860 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12861 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12862 tmp = gen_reg_rtx (mode);
12864 if (mode == DImode)
12865 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12867 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12871 if (code == GT || code == GE)
12872 code = reverse_condition (code);
12875 HOST_WIDE_INT tmp = ct;
12880 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12881 ix86_compare_op1, VOIDmode, 0, -1);
12894 tmp = expand_simple_binop (mode, PLUS,
12896 copy_rtx (tmp), 1, OPTAB_DIRECT);
12907 tmp = expand_simple_binop (mode, IOR,
12909 copy_rtx (tmp), 1, OPTAB_DIRECT);
12911 else if (diff == -1 && ct)
12921 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12923 tmp = expand_simple_binop (mode, PLUS,
12924 copy_rtx (tmp), GEN_INT (cf),
12925 copy_rtx (tmp), 1, OPTAB_DIRECT);
12933 * andl cf - ct, dest
12943 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12946 tmp = expand_simple_binop (mode, AND,
12948 gen_int_mode (cf - ct, mode),
12949 copy_rtx (tmp), 1, OPTAB_DIRECT);
12951 tmp = expand_simple_binop (mode, PLUS,
12952 copy_rtx (tmp), GEN_INT (ct),
12953 copy_rtx (tmp), 1, OPTAB_DIRECT);
12956 if (!rtx_equal_p (tmp, out))
12957 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12959 return 1; /* DONE */
12964 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12967 tmp = ct, ct = cf, cf = tmp;
12970 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12972 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12974 /* We may be reversing unordered compare to normal compare, that
12975 is not valid in general (we may convert non-trapping condition
12976 to trapping one), however on i386 we currently emit all
12977 comparisons unordered. */
12978 compare_code = reverse_condition_maybe_unordered (compare_code);
12979 code = reverse_condition_maybe_unordered (code);
12983 compare_code = reverse_condition (compare_code);
12984 code = reverse_condition (code);
12988 compare_code = UNKNOWN;
12989 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12990 && CONST_INT_P (ix86_compare_op1))
12992 if (ix86_compare_op1 == const0_rtx
12993 && (code == LT || code == GE))
12994 compare_code = code;
12995 else if (ix86_compare_op1 == constm1_rtx)
12999 else if (code == GT)
13004 /* Optimize dest = (op0 < 0) ? -1 : cf. */
13005 if (compare_code != UNKNOWN
13006 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13007 && (cf == -1 || ct == -1))
13009 /* If lea code below could be used, only optimize
13010 if it results in a 2 insn sequence. */
13012 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13013 || diff == 3 || diff == 5 || diff == 9)
13014 || (compare_code == LT && ct == -1)
13015 || (compare_code == GE && cf == -1))
13018 * notl op1 (if necessary)
13026 code = reverse_condition (code);
13029 out = emit_store_flag (out, code, ix86_compare_op0,
13030 ix86_compare_op1, VOIDmode, 0, -1);
13032 out = expand_simple_binop (mode, IOR,
13034 out, 1, OPTAB_DIRECT);
13035 if (out != operands[0])
13036 emit_move_insn (operands[0], out);
13038 return 1; /* DONE */
13043 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13044 || diff == 3 || diff == 5 || diff == 9)
13045 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
13047 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
13053 * lea cf(dest*(ct-cf)),dest
13057 * This also catches the degenerate setcc-only case.
13063 out = emit_store_flag (out, code, ix86_compare_op0,
13064 ix86_compare_op1, VOIDmode, 0, 1);
13067 /* On x86_64 the lea instruction operates on Pmode, so we need
13068 to get arithmetics done in proper mode to match. */
13070 tmp = copy_rtx (out);
13074 out1 = copy_rtx (out);
13075 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
13079 tmp = gen_rtx_PLUS (mode, tmp, out1);
13085 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
13088 if (!rtx_equal_p (tmp, out))
13091 out = force_operand (tmp, copy_rtx (out));
13093 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
13095 if (!rtx_equal_p (out, operands[0]))
13096 emit_move_insn (operands[0], copy_rtx (out));
13098 return 1; /* DONE */
13102 * General case: Jumpful:
13103 * xorl dest,dest cmpl op1, op2
13104 * cmpl op1, op2 movl ct, dest
13105 * setcc dest jcc 1f
13106 * decl dest movl cf, dest
13107 * andl (cf-ct),dest 1:
13110 * Size 20. Size 14.
13112 * This is reasonably steep, but branch mispredict costs are
13113 * high on modern cpus, so consider failing only if optimizing
13117 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13118 && BRANCH_COST >= 2)
13122 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13127 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13129 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13131 /* We may be reversing unordered compare to normal compare,
13132 that is not valid in general (we may convert non-trapping
13133 condition to trapping one), however on i386 we currently
13134 emit all comparisons unordered. */
13135 code = reverse_condition_maybe_unordered (code);
13139 code = reverse_condition (code);
13140 if (compare_code != UNKNOWN)
13141 compare_code = reverse_condition (compare_code);
13145 if (compare_code != UNKNOWN)
13147 /* notl op1 (if needed)
13152 For x < 0 (resp. x <= -1) there will be no notl,
13153 so if possible swap the constants to get rid of the
13155 True/false will be -1/0 while code below (store flag
13156 followed by decrement) is 0/-1, so the constants need
13157 to be exchanged once more. */
13159 if (compare_code == GE || !cf)
13161 code = reverse_condition (code);
13166 HOST_WIDE_INT tmp = cf;
13171 out = emit_store_flag (out, code, ix86_compare_op0,
13172 ix86_compare_op1, VOIDmode, 0, -1);
13176 out = emit_store_flag (out, code, ix86_compare_op0,
13177 ix86_compare_op1, VOIDmode, 0, 1);
13179 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13180 copy_rtx (out), 1, OPTAB_DIRECT);
13183 out = expand_simple_binop (mode, AND, copy_rtx (out),
13184 gen_int_mode (cf - ct, mode),
13185 copy_rtx (out), 1, OPTAB_DIRECT);
13187 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13188 copy_rtx (out), 1, OPTAB_DIRECT);
13189 if (!rtx_equal_p (out, operands[0]))
13190 emit_move_insn (operands[0], copy_rtx (out));
13192 return 1; /* DONE */
13196 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13198 /* Try a few things more with specific constants and a variable. */
13201 rtx var, orig_out, out, tmp;
13203 if (BRANCH_COST <= 2)
13204 return 0; /* FAIL */
13206 /* If one of the two operands is an interesting constant, load a
13207 constant with the above and mask it in with a logical operation. */
13209 if (CONST_INT_P (operands[2]))
13212 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13213 operands[3] = constm1_rtx, op = and_optab;
13214 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13215 operands[3] = const0_rtx, op = ior_optab;
13217 return 0; /* FAIL */
13219 else if (CONST_INT_P (operands[3]))
13222 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13223 operands[2] = constm1_rtx, op = and_optab;
13224 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13225 operands[2] = const0_rtx, op = ior_optab;
13227 return 0; /* FAIL */
13230 return 0; /* FAIL */
13232 orig_out = operands[0];
13233 tmp = gen_reg_rtx (mode);
13236 /* Recurse to get the constant loaded. */
13237 if (ix86_expand_int_movcc (operands) == 0)
13238 return 0; /* FAIL */
13240 /* Mask in the interesting variable. */
13241 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13243 if (!rtx_equal_p (out, orig_out))
13244 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13246 return 1; /* DONE */
13250 * For comparison with above,
13260 if (! nonimmediate_operand (operands[2], mode))
13261 operands[2] = force_reg (mode, operands[2]);
13262 if (! nonimmediate_operand (operands[3], mode))
13263 operands[3] = force_reg (mode, operands[3]);
13265 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13267 rtx tmp = gen_reg_rtx (mode);
13268 emit_move_insn (tmp, operands[3]);
13271 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13273 rtx tmp = gen_reg_rtx (mode);
13274 emit_move_insn (tmp, operands[2]);
13278 if (! register_operand (operands[2], VOIDmode)
13280 || ! register_operand (operands[3], VOIDmode)))
13281 operands[2] = force_reg (mode, operands[2]);
13284 && ! register_operand (operands[3], VOIDmode))
13285 operands[3] = force_reg (mode, operands[3]);
13287 emit_insn (compare_seq);
13288 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13289 gen_rtx_IF_THEN_ELSE (mode,
13290 compare_op, operands[2],
13293 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13294 gen_rtx_IF_THEN_ELSE (mode,
13296 copy_rtx (operands[3]),
13297 copy_rtx (operands[0]))));
13299 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13300 gen_rtx_IF_THEN_ELSE (mode,
13302 copy_rtx (operands[2]),
13303 copy_rtx (operands[0]))));
13305 return 1; /* DONE */
13308 /* Swap, force into registers, or otherwise massage the two operands
13309 to an sse comparison with a mask result. Thus we differ a bit from
13310 ix86_prepare_fp_compare_args which expects to produce a flags result.
13312 The DEST operand exists to help determine whether to commute commutative
13313 operators. The POP0/POP1 operands are updated in place. The new
13314 comparison code is returned, or UNKNOWN if not implementable. */
13316 static enum rtx_code
13317 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13318 rtx *pop0, rtx *pop1)
13326 /* We have no LTGT as an operator. We could implement it with
13327 NE & ORDERED, but this requires an extra temporary. It's
13328 not clear that it's worth it. */
13335 /* These are supported directly. */
13342 /* For commutative operators, try to canonicalize the destination
13343 operand to be first in the comparison - this helps reload to
13344 avoid extra moves. */
13345 if (!dest || !rtx_equal_p (dest, *pop1))
13353 /* These are not supported directly. Swap the comparison operands
13354 to transform into something that is supported. */
13358 code = swap_condition (code);
13362 gcc_unreachable ();
13368 /* Detect conditional moves that exactly match min/max operational
13369 semantics. Note that this is IEEE safe, as long as we don't
13370 interchange the operands.
13372 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13373 and TRUE if the operation is successful and instructions are emitted. */
13376 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13377 rtx cmp_op1, rtx if_true, rtx if_false)
13379 enum machine_mode mode;
13385 else if (code == UNGE)
13388 if_true = if_false;
13394 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13396 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13401 mode = GET_MODE (dest);
13403 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13404 but MODE may be a vector mode and thus not appropriate. */
13405 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13407 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13410 if_true = force_reg (mode, if_true);
13411 v = gen_rtvec (2, if_true, if_false);
13412 tmp = gen_rtx_UNSPEC (mode, v, u);
13416 code = is_min ? SMIN : SMAX;
13417 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13420 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13424 /* Expand an sse vector comparison. Return the register with the result. */
13427 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13428 rtx op_true, rtx op_false)
13430 enum machine_mode mode = GET_MODE (dest);
13433 cmp_op0 = force_reg (mode, cmp_op0);
13434 if (!nonimmediate_operand (cmp_op1, mode))
13435 cmp_op1 = force_reg (mode, cmp_op1);
13438 || reg_overlap_mentioned_p (dest, op_true)
13439 || reg_overlap_mentioned_p (dest, op_false))
13440 dest = gen_reg_rtx (mode);
13442 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13443 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13448 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13449 operations. This is used for both scalar and vector conditional moves. */
13452 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13454 enum machine_mode mode = GET_MODE (dest);
13457 if (op_false == CONST0_RTX (mode))
13459 op_true = force_reg (mode, op_true);
13460 x = gen_rtx_AND (mode, cmp, op_true);
13461 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13463 else if (op_true == CONST0_RTX (mode))
13465 op_false = force_reg (mode, op_false);
13466 x = gen_rtx_NOT (mode, cmp);
13467 x = gen_rtx_AND (mode, x, op_false);
13468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13470 else if (TARGET_SSE5)
13472 rtx pcmov = gen_rtx_SET (mode, dest,
13473 gen_rtx_IF_THEN_ELSE (mode, cmp,
13480 op_true = force_reg (mode, op_true);
13481 op_false = force_reg (mode, op_false);
13483 t2 = gen_reg_rtx (mode);
13485 t3 = gen_reg_rtx (mode);
13489 x = gen_rtx_AND (mode, op_true, cmp);
13490 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13492 x = gen_rtx_NOT (mode, cmp);
13493 x = gen_rtx_AND (mode, x, op_false);
13494 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13496 x = gen_rtx_IOR (mode, t3, t2);
13497 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13501 /* Expand a floating-point conditional move. Return true if successful. */
13504 ix86_expand_fp_movcc (rtx operands[])
13506 enum machine_mode mode = GET_MODE (operands[0]);
13507 enum rtx_code code = GET_CODE (operands[1]);
13508 rtx tmp, compare_op, second_test, bypass_test;
13510 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13512 enum machine_mode cmode;
13514 /* Since we've no cmove for sse registers, don't force bad register
13515 allocation just to gain access to it. Deny movcc when the
13516 comparison mode doesn't match the move mode. */
13517 cmode = GET_MODE (ix86_compare_op0);
13518 if (cmode == VOIDmode)
13519 cmode = GET_MODE (ix86_compare_op1);
13523 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13525 &ix86_compare_op1);
13526 if (code == UNKNOWN)
13529 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13530 ix86_compare_op1, operands[2],
13534 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13535 ix86_compare_op1, operands[2], operands[3]);
13536 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13540 /* The floating point conditional move instructions don't directly
13541 support conditions resulting from a signed integer comparison. */
13543 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13545 /* The floating point conditional move instructions don't directly
13546 support signed integer comparisons. */
13548 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13550 gcc_assert (!second_test && !bypass_test);
13551 tmp = gen_reg_rtx (QImode);
13552 ix86_expand_setcc (code, tmp);
13554 ix86_compare_op0 = tmp;
13555 ix86_compare_op1 = const0_rtx;
13556 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13558 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13560 tmp = gen_reg_rtx (mode);
13561 emit_move_insn (tmp, operands[3]);
13564 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13566 tmp = gen_reg_rtx (mode);
13567 emit_move_insn (tmp, operands[2]);
13571 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13572 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13573 operands[2], operands[3])));
13575 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13576 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13577 operands[3], operands[0])));
13579 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13580 gen_rtx_IF_THEN_ELSE (mode, second_test,
13581 operands[2], operands[0])));
13586 /* Expand a floating-point vector conditional move; a vcond operation
13587 rather than a movcc operation. */
13590 ix86_expand_fp_vcond (rtx operands[])
13592 enum rtx_code code = GET_CODE (operands[3]);
13595 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13596 &operands[4], &operands[5]);
13597 if (code == UNKNOWN)
13600 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13601 operands[5], operands[1], operands[2]))
13604 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13605 operands[1], operands[2]);
13606 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13610 /* Expand a signed/unsigned integral vector conditional move. */
13613 ix86_expand_int_vcond (rtx operands[])
13615 enum machine_mode mode = GET_MODE (operands[0]);
13616 enum rtx_code code = GET_CODE (operands[3]);
13617 bool negate = false;
13620 cop0 = operands[4];
13621 cop1 = operands[5];
13623 /* SSE5 supports all of the comparisons on all vector int types. */
13626 /* Canonicalize the comparison to EQ, GT, GTU. */
13637 code = reverse_condition (code);
13643 code = reverse_condition (code);
13649 code = swap_condition (code);
13650 x = cop0, cop0 = cop1, cop1 = x;
13654 gcc_unreachable ();
13657 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13658 if (mode == V2DImode)
13663 /* SSE4.1 supports EQ. */
13664 if (!TARGET_SSE4_1)
13670 /* SSE4.2 supports GT/GTU. */
13671 if (!TARGET_SSE4_2)
13676 gcc_unreachable ();
13680 /* Unsigned parallel compare is not supported by the hardware. Play some
13681 tricks to turn this into a signed comparison against 0. */
13684 cop0 = force_reg (mode, cop0);
13693 /* Perform a parallel modulo subtraction. */
13694 t1 = gen_reg_rtx (mode);
13695 emit_insn ((mode == V4SImode
13697 : gen_subv2di3) (t1, cop0, cop1));
13699 /* Extract the original sign bit of op0. */
13700 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13702 t2 = gen_reg_rtx (mode);
13703 emit_insn ((mode == V4SImode
13705 : gen_andv2di3) (t2, cop0, mask));
13707 /* XOR it back into the result of the subtraction. This results
13708 in the sign bit set iff we saw unsigned underflow. */
13709 x = gen_reg_rtx (mode);
13710 emit_insn ((mode == V4SImode
13712 : gen_xorv2di3) (x, t1, t2));
13720 /* Perform a parallel unsigned saturating subtraction. */
13721 x = gen_reg_rtx (mode);
13722 emit_insn (gen_rtx_SET (VOIDmode, x,
13723 gen_rtx_US_MINUS (mode, cop0, cop1)));
13730 gcc_unreachable ();
13734 cop1 = CONST0_RTX (mode);
13738 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13739 operands[1+negate], operands[2-negate]);
13741 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13742 operands[2-negate]);
13746 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13747 true if we should do zero extension, else sign extension. HIGH_P is
13748 true if we want the N/2 high elements, else the low elements. */
13751 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13753 enum machine_mode imode = GET_MODE (operands[1]);
13754 rtx (*unpack)(rtx, rtx, rtx);
13761 unpack = gen_vec_interleave_highv16qi;
13763 unpack = gen_vec_interleave_lowv16qi;
13767 unpack = gen_vec_interleave_highv8hi;
13769 unpack = gen_vec_interleave_lowv8hi;
13773 unpack = gen_vec_interleave_highv4si;
13775 unpack = gen_vec_interleave_lowv4si;
13778 gcc_unreachable ();
13781 dest = gen_lowpart (imode, operands[0]);
13784 se = force_reg (imode, CONST0_RTX (imode));
13786 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13787 operands[1], pc_rtx, pc_rtx);
13789 emit_insn (unpack (dest, operands[1], se));
13792 /* This function performs the same task as ix86_expand_sse_unpack,
13793 but with SSE4.1 instructions. */
13796 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13798 enum machine_mode imode = GET_MODE (operands[1]);
13799 rtx (*unpack)(rtx, rtx);
13806 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13808 unpack = gen_sse4_1_extendv8qiv8hi2;
13812 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13814 unpack = gen_sse4_1_extendv4hiv4si2;
13818 unpack = gen_sse4_1_zero_extendv2siv2di2;
13820 unpack = gen_sse4_1_extendv2siv2di2;
13823 gcc_unreachable ();
13826 dest = operands[0];
13829 /* Shift higher 8 bytes to lower 8 bytes. */
13830 src = gen_reg_rtx (imode);
13831 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13832 gen_lowpart (TImode, operands[1]),
13838 emit_insn (unpack (dest, src));
13841 /* This function performs the same task as ix86_expand_sse_unpack,
13842 but with sse5 instructions. */
13845 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13847 enum machine_mode imode = GET_MODE (operands[1]);
13848 int pperm_bytes[16];
13850 int h = (high_p) ? 8 : 0;
13853 rtvec v = rtvec_alloc (16);
13856 rtx op0 = operands[0], op1 = operands[1];
13861 vs = rtvec_alloc (8);
13862 h2 = (high_p) ? 8 : 0;
13863 for (i = 0; i < 8; i++)
13865 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13866 pperm_bytes[2*i+1] = ((unsigned_p)
13868 : PPERM_SIGN | PPERM_SRC2 | i | h);
13871 for (i = 0; i < 16; i++)
13872 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13874 for (i = 0; i < 8; i++)
13875 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13877 p = gen_rtx_PARALLEL (VOIDmode, vs);
13878 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13880 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13882 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13886 vs = rtvec_alloc (4);
13887 h2 = (high_p) ? 4 : 0;
13888 for (i = 0; i < 4; i++)
13890 sign_extend = ((unsigned_p)
13892 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13893 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13894 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13895 pperm_bytes[4*i+2] = sign_extend;
13896 pperm_bytes[4*i+3] = sign_extend;
13899 for (i = 0; i < 16; i++)
13900 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13902 for (i = 0; i < 4; i++)
13903 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13905 p = gen_rtx_PARALLEL (VOIDmode, vs);
13906 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13908 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13910 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13914 vs = rtvec_alloc (2);
13915 h2 = (high_p) ? 2 : 0;
13916 for (i = 0; i < 2; i++)
13918 sign_extend = ((unsigned_p)
13920 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13921 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13922 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13923 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13924 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13925 pperm_bytes[8*i+4] = sign_extend;
13926 pperm_bytes[8*i+5] = sign_extend;
13927 pperm_bytes[8*i+6] = sign_extend;
13928 pperm_bytes[8*i+7] = sign_extend;
13931 for (i = 0; i < 16; i++)
13932 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13934 for (i = 0; i < 2; i++)
13935 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13937 p = gen_rtx_PARALLEL (VOIDmode, vs);
13938 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13940 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13942 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13946 gcc_unreachable ();
13952 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13953 next narrower integer vector type */
13955 ix86_expand_sse5_pack (rtx operands[3])
13957 enum machine_mode imode = GET_MODE (operands[0]);
13958 int pperm_bytes[16];
13960 rtvec v = rtvec_alloc (16);
13962 rtx op0 = operands[0];
13963 rtx op1 = operands[1];
13964 rtx op2 = operands[2];
13969 for (i = 0; i < 8; i++)
13971 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13972 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13975 for (i = 0; i < 16; i++)
13976 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13978 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13979 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13983 for (i = 0; i < 4; i++)
13985 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13986 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13987 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13988 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13991 for (i = 0; i < 16; i++)
13992 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13994 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13995 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13999 for (i = 0; i < 2; i++)
14001 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14002 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14003 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14004 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14005 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14006 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14007 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14008 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14011 for (i = 0; i < 16; i++)
14012 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14014 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14015 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14019 gcc_unreachable ();
14025 /* Expand conditional increment or decrement using adb/sbb instructions.
14026 The default case using setcc followed by the conditional move can be
14027 done by generic code. */
14029 ix86_expand_int_addcc (rtx operands[])
14031 enum rtx_code code = GET_CODE (operands[1]);
14033 rtx val = const0_rtx;
14034 bool fpcmp = false;
14035 enum machine_mode mode = GET_MODE (operands[0]);
14037 if (operands[3] != const1_rtx
14038 && operands[3] != constm1_rtx)
14040 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14041 ix86_compare_op1, &compare_op))
14043 code = GET_CODE (compare_op);
14045 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14046 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14049 code = ix86_fp_compare_code_to_integer (code);
14056 PUT_CODE (compare_op,
14057 reverse_condition_maybe_unordered
14058 (GET_CODE (compare_op)));
14060 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14062 PUT_MODE (compare_op, mode);
14064 /* Construct either adc or sbb insn. */
14065 if ((code == LTU) == (operands[3] == constm1_rtx))
14067 switch (GET_MODE (operands[0]))
14070 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
14073 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
14076 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
14079 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14082 gcc_unreachable ();
14087 switch (GET_MODE (operands[0]))
14090 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
14093 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
14096 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
14099 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14102 gcc_unreachable ();
14105 return 1; /* DONE */
14109 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
14110 works for floating pointer parameters and nonoffsetable memories.
14111 For pushes, it returns just stack offsets; the values will be saved
14112 in the right order. Maximally three parts are generated. */
14115 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
14120 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
14122 size = (GET_MODE_SIZE (mode) + 4) / 8;
14124 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
14125 gcc_assert (size >= 2 && size <= 4);
14127 /* Optimize constant pool reference to immediates. This is used by fp
14128 moves, that force all constants to memory to allow combining. */
14129 if (MEM_P (operand) && MEM_READONLY_P (operand))
14131 rtx tmp = maybe_get_pool_constant (operand);
14136 if (MEM_P (operand) && !offsettable_memref_p (operand))
14138 /* The only non-offsetable memories we handle are pushes. */
14139 int ok = push_operand (operand, VOIDmode);
14143 operand = copy_rtx (operand);
14144 PUT_MODE (operand, Pmode);
14145 parts[0] = parts[1] = parts[2] = parts[3] = operand;
14149 if (GET_CODE (operand) == CONST_VECTOR)
14151 enum machine_mode imode = int_mode_for_mode (mode);
14152 /* Caution: if we looked through a constant pool memory above,
14153 the operand may actually have a different mode now. That's
14154 ok, since we want to pun this all the way back to an integer. */
14155 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14156 gcc_assert (operand != NULL);
14162 if (mode == DImode)
14163 split_di (&operand, 1, &parts[0], &parts[1]);
14168 if (REG_P (operand))
14170 gcc_assert (reload_completed);
14171 for (i = 0; i < size; i++)
14172 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
14174 else if (offsettable_memref_p (operand))
14176 operand = adjust_address (operand, SImode, 0);
14177 parts[0] = operand;
14178 for (i = 1; i < size; i++)
14179 parts[i] = adjust_address (operand, SImode, 4 * i);
14181 else if (GET_CODE (operand) == CONST_DOUBLE)
14186 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14190 real_to_target (l, &r, mode);
14191 parts[3] = gen_int_mode (l[3], SImode);
14192 parts[2] = gen_int_mode (l[2], SImode);
14195 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14196 parts[2] = gen_int_mode (l[2], SImode);
14199 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14202 gcc_unreachable ();
14204 parts[1] = gen_int_mode (l[1], SImode);
14205 parts[0] = gen_int_mode (l[0], SImode);
14208 gcc_unreachable ();
14213 if (mode == TImode)
14214 split_ti (&operand, 1, &parts[0], &parts[1]);
14215 if (mode == XFmode || mode == TFmode)
14217 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14218 if (REG_P (operand))
14220 gcc_assert (reload_completed);
14221 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14222 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14224 else if (offsettable_memref_p (operand))
14226 operand = adjust_address (operand, DImode, 0);
14227 parts[0] = operand;
14228 parts[1] = adjust_address (operand, upper_mode, 8);
14230 else if (GET_CODE (operand) == CONST_DOUBLE)
14235 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14236 real_to_target (l, &r, mode);
14238 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14239 if (HOST_BITS_PER_WIDE_INT >= 64)
14242 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14243 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14246 parts[0] = immed_double_const (l[0], l[1], DImode);
14248 if (upper_mode == SImode)
14249 parts[1] = gen_int_mode (l[2], SImode);
14250 else if (HOST_BITS_PER_WIDE_INT >= 64)
14253 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14254 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14257 parts[1] = immed_double_const (l[2], l[3], DImode);
14260 gcc_unreachable ();
14267 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
14268 Return false when normal moves are needed; true when all required
14269 insns have been emitted. Operands 2-4 contain the input values
14270 int the correct order; operands 5-7 contain the output values. */
14273 ix86_split_long_move (rtx operands[])
14278 int collisions = 0;
14279 enum machine_mode mode = GET_MODE (operands[0]);
14280 bool collisionparts[4];
14282 /* The DFmode expanders may ask us to move double.
14283 For 64bit target this is single move. By hiding the fact
14284 here we simplify i386.md splitters. */
14285 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14287 /* Optimize constant pool reference to immediates. This is used by
14288 fp moves, that force all constants to memory to allow combining. */
14290 if (MEM_P (operands[1])
14291 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14292 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14293 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14294 if (push_operand (operands[0], VOIDmode))
14296 operands[0] = copy_rtx (operands[0]);
14297 PUT_MODE (operands[0], Pmode);
14300 operands[0] = gen_lowpart (DImode, operands[0]);
14301 operands[1] = gen_lowpart (DImode, operands[1]);
14302 emit_move_insn (operands[0], operands[1]);
14306 /* The only non-offsettable memory we handle is push. */
14307 if (push_operand (operands[0], VOIDmode))
14310 gcc_assert (!MEM_P (operands[0])
14311 || offsettable_memref_p (operands[0]));
14313 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14314 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14316 /* When emitting push, take care for source operands on the stack. */
14317 if (push && MEM_P (operands[1])
14318 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14319 for (i = 0; i < nparts - 1; i++)
14320 part[1][i] = change_address (part[1][i],
14321 GET_MODE (part[1][i]),
14322 XEXP (part[1][i + 1], 0));
14324 /* We need to do copy in the right order in case an address register
14325 of the source overlaps the destination. */
14326 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14330 for (i = 0; i < nparts; i++)
14333 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
14334 if (collisionparts[i])
14338 /* Collision in the middle part can be handled by reordering. */
14339 if (collisions == 1 && nparts == 3 && collisionparts [1])
14341 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14342 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14344 else if (collisions == 1
14346 && (collisionparts [1] || collisionparts [2]))
14348 if (collisionparts [1])
14350 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14351 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14355 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
14356 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
14360 /* If there are more collisions, we can't handle it by reordering.
14361 Do an lea to the last part and use only one colliding move. */
14362 else if (collisions > 1)
14368 base = part[0][nparts - 1];
14370 /* Handle the case when the last part isn't valid for lea.
14371 Happens in 64-bit mode storing the 12-byte XFmode. */
14372 if (GET_MODE (base) != Pmode)
14373 base = gen_rtx_REG (Pmode, REGNO (base));
14375 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14376 part[1][0] = replace_equiv_address (part[1][0], base);
14377 for (i = 1; i < nparts; i++)
14379 tmp = plus_constant (base, UNITS_PER_WORD * i);
14380 part[1][i] = replace_equiv_address (part[1][i], tmp);
14391 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14392 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14393 emit_move_insn (part[0][2], part[1][2]);
14395 else if (nparts == 4)
14397 emit_move_insn (part[0][3], part[1][3]);
14398 emit_move_insn (part[0][2], part[1][2]);
14403 /* In 64bit mode we don't have 32bit push available. In case this is
14404 register, it is OK - we will just use larger counterpart. We also
14405 retype memory - these comes from attempt to avoid REX prefix on
14406 moving of second half of TFmode value. */
14407 if (GET_MODE (part[1][1]) == SImode)
14409 switch (GET_CODE (part[1][1]))
14412 part[1][1] = adjust_address (part[1][1], DImode, 0);
14416 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14420 gcc_unreachable ();
14423 if (GET_MODE (part[1][0]) == SImode)
14424 part[1][0] = part[1][1];
14427 emit_move_insn (part[0][1], part[1][1]);
14428 emit_move_insn (part[0][0], part[1][0]);
14432 /* Choose correct order to not overwrite the source before it is copied. */
14433 if ((REG_P (part[0][0])
14434 && REG_P (part[1][1])
14435 && (REGNO (part[0][0]) == REGNO (part[1][1])
14437 && REGNO (part[0][0]) == REGNO (part[1][2]))
14439 && REGNO (part[0][0]) == REGNO (part[1][3]))))
14441 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14443 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
14445 operands[2 + i] = part[0][j];
14446 operands[6 + i] = part[1][j];
14451 for (i = 0; i < nparts; i++)
14453 operands[2 + i] = part[0][i];
14454 operands[6 + i] = part[1][i];
14458 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14461 for (j = 0; j < nparts - 1; j++)
14462 if (CONST_INT_P (operands[6 + j])
14463 && operands[6 + j] != const0_rtx
14464 && REG_P (operands[2 + j]))
14465 for (i = j; i < nparts - 1; i++)
14466 if (CONST_INT_P (operands[7 + i])
14467 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
14468 operands[7 + i] = operands[2 + j];
14471 for (i = 0; i < nparts; i++)
14472 emit_move_insn (operands[2 + i], operands[6 + i]);
14477 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14478 left shift by a constant, either using a single shift or
14479 a sequence of add instructions. */
14482 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14486 emit_insn ((mode == DImode
14488 : gen_adddi3) (operand, operand, operand));
14490 else if (!optimize_size
14491 && count * ix86_cost->add <= ix86_cost->shift_const)
14494 for (i=0; i<count; i++)
14496 emit_insn ((mode == DImode
14498 : gen_adddi3) (operand, operand, operand));
14502 emit_insn ((mode == DImode
14504 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14508 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14510 rtx low[2], high[2];
14512 const int single_width = mode == DImode ? 32 : 64;
14514 if (CONST_INT_P (operands[2]))
14516 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14517 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14519 if (count >= single_width)
14521 emit_move_insn (high[0], low[1]);
14522 emit_move_insn (low[0], const0_rtx);
14524 if (count > single_width)
14525 ix86_expand_ashl_const (high[0], count - single_width, mode);
14529 if (!rtx_equal_p (operands[0], operands[1]))
14530 emit_move_insn (operands[0], operands[1]);
14531 emit_insn ((mode == DImode
14533 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14534 ix86_expand_ashl_const (low[0], count, mode);
14539 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14541 if (operands[1] == const1_rtx)
14543 /* Assuming we've chosen a QImode capable registers, then 1 << N
14544 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14545 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14547 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14549 ix86_expand_clear (low[0]);
14550 ix86_expand_clear (high[0]);
14551 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14553 d = gen_lowpart (QImode, low[0]);
14554 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14555 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14556 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14558 d = gen_lowpart (QImode, high[0]);
14559 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14560 s = gen_rtx_NE (QImode, flags, const0_rtx);
14561 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14564 /* Otherwise, we can get the same results by manually performing
14565 a bit extract operation on bit 5/6, and then performing the two
14566 shifts. The two methods of getting 0/1 into low/high are exactly
14567 the same size. Avoiding the shift in the bit extract case helps
14568 pentium4 a bit; no one else seems to care much either way. */
14573 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14574 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14576 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14577 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14579 emit_insn ((mode == DImode
14581 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14582 emit_insn ((mode == DImode
14584 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14585 emit_move_insn (low[0], high[0]);
14586 emit_insn ((mode == DImode
14588 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14591 emit_insn ((mode == DImode
14593 : gen_ashldi3) (low[0], low[0], operands[2]));
14594 emit_insn ((mode == DImode
14596 : gen_ashldi3) (high[0], high[0], operands[2]));
14600 if (operands[1] == constm1_rtx)
14602 /* For -1 << N, we can avoid the shld instruction, because we
14603 know that we're shifting 0...31/63 ones into a -1. */
14604 emit_move_insn (low[0], constm1_rtx);
14606 emit_move_insn (high[0], low[0]);
14608 emit_move_insn (high[0], constm1_rtx);
14612 if (!rtx_equal_p (operands[0], operands[1]))
14613 emit_move_insn (operands[0], operands[1]);
14615 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14616 emit_insn ((mode == DImode
14618 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14621 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14623 if (TARGET_CMOVE && scratch)
14625 ix86_expand_clear (scratch);
14626 emit_insn ((mode == DImode
14627 ? gen_x86_shift_adj_1
14628 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14631 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14635 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14637 rtx low[2], high[2];
14639 const int single_width = mode == DImode ? 32 : 64;
14641 if (CONST_INT_P (operands[2]))
14643 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14644 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14646 if (count == single_width * 2 - 1)
14648 emit_move_insn (high[0], high[1]);
14649 emit_insn ((mode == DImode
14651 : gen_ashrdi3) (high[0], high[0],
14652 GEN_INT (single_width - 1)));
14653 emit_move_insn (low[0], high[0]);
14656 else if (count >= single_width)
14658 emit_move_insn (low[0], high[1]);
14659 emit_move_insn (high[0], low[0]);
14660 emit_insn ((mode == DImode
14662 : gen_ashrdi3) (high[0], high[0],
14663 GEN_INT (single_width - 1)));
14664 if (count > single_width)
14665 emit_insn ((mode == DImode
14667 : gen_ashrdi3) (low[0], low[0],
14668 GEN_INT (count - single_width)));
14672 if (!rtx_equal_p (operands[0], operands[1]))
14673 emit_move_insn (operands[0], operands[1]);
14674 emit_insn ((mode == DImode
14676 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14677 emit_insn ((mode == DImode
14679 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14684 if (!rtx_equal_p (operands[0], operands[1]))
14685 emit_move_insn (operands[0], operands[1]);
14687 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14689 emit_insn ((mode == DImode
14691 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14692 emit_insn ((mode == DImode
14694 : gen_ashrdi3) (high[0], high[0], operands[2]));
14696 if (TARGET_CMOVE && scratch)
14698 emit_move_insn (scratch, high[0]);
14699 emit_insn ((mode == DImode
14701 : gen_ashrdi3) (scratch, scratch,
14702 GEN_INT (single_width - 1)));
14703 emit_insn ((mode == DImode
14704 ? gen_x86_shift_adj_1
14705 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14709 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14714 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14716 rtx low[2], high[2];
14718 const int single_width = mode == DImode ? 32 : 64;
14720 if (CONST_INT_P (operands[2]))
14722 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14723 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14725 if (count >= single_width)
14727 emit_move_insn (low[0], high[1]);
14728 ix86_expand_clear (high[0]);
14730 if (count > single_width)
14731 emit_insn ((mode == DImode
14733 : gen_lshrdi3) (low[0], low[0],
14734 GEN_INT (count - single_width)));
14738 if (!rtx_equal_p (operands[0], operands[1]))
14739 emit_move_insn (operands[0], operands[1]);
14740 emit_insn ((mode == DImode
14742 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14743 emit_insn ((mode == DImode
14745 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14750 if (!rtx_equal_p (operands[0], operands[1]))
14751 emit_move_insn (operands[0], operands[1]);
14753 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14755 emit_insn ((mode == DImode
14757 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14758 emit_insn ((mode == DImode
14760 : gen_lshrdi3) (high[0], high[0], operands[2]));
14762 /* Heh. By reversing the arguments, we can reuse this pattern. */
14763 if (TARGET_CMOVE && scratch)
14765 ix86_expand_clear (scratch);
14766 emit_insn ((mode == DImode
14767 ? gen_x86_shift_adj_1
14768 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14772 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14776 /* Predict just emitted jump instruction to be taken with probability PROB. */
14778 predict_jump (int prob)
14780 rtx insn = get_last_insn ();
14781 gcc_assert (JUMP_P (insn));
14783 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14788 /* Helper function for the string operations below. Dest VARIABLE whether
14789 it is aligned to VALUE bytes. If true, jump to the label. */
14791 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14793 rtx label = gen_label_rtx ();
14794 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14795 if (GET_MODE (variable) == DImode)
14796 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14798 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14799 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14802 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14804 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14808 /* Adjust COUNTER by the VALUE. */
14810 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14812 if (GET_MODE (countreg) == DImode)
14813 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14815 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14818 /* Zero extend possibly SImode EXP to Pmode register. */
14820 ix86_zero_extend_to_Pmode (rtx exp)
14823 if (GET_MODE (exp) == VOIDmode)
14824 return force_reg (Pmode, exp);
14825 if (GET_MODE (exp) == Pmode)
14826 return copy_to_mode_reg (Pmode, exp);
14827 r = gen_reg_rtx (Pmode);
14828 emit_insn (gen_zero_extendsidi2 (r, exp));
14832 /* Divide COUNTREG by SCALE. */
14834 scale_counter (rtx countreg, int scale)
14837 rtx piece_size_mask;
14841 if (CONST_INT_P (countreg))
14842 return GEN_INT (INTVAL (countreg) / scale);
14843 gcc_assert (REG_P (countreg));
14845 piece_size_mask = GEN_INT (scale - 1);
14846 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14847 GEN_INT (exact_log2 (scale)),
14848 NULL, 1, OPTAB_DIRECT);
14852 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14853 DImode for constant loop counts. */
14855 static enum machine_mode
14856 counter_mode (rtx count_exp)
14858 if (GET_MODE (count_exp) != VOIDmode)
14859 return GET_MODE (count_exp);
14860 if (GET_CODE (count_exp) != CONST_INT)
14862 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14867 /* When SRCPTR is non-NULL, output simple loop to move memory
14868 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14869 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14870 equivalent loop to set memory by VALUE (supposed to be in MODE).
14872 The size is rounded down to whole number of chunk size moved at once.
14873 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14877 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14878 rtx destptr, rtx srcptr, rtx value,
14879 rtx count, enum machine_mode mode, int unroll,
14882 rtx out_label, top_label, iter, tmp;
14883 enum machine_mode iter_mode = counter_mode (count);
14884 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14885 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14891 top_label = gen_label_rtx ();
14892 out_label = gen_label_rtx ();
14893 iter = gen_reg_rtx (iter_mode);
14895 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14896 NULL, 1, OPTAB_DIRECT);
14897 /* Those two should combine. */
14898 if (piece_size == const1_rtx)
14900 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14902 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14904 emit_move_insn (iter, const0_rtx);
14906 emit_label (top_label);
14908 tmp = convert_modes (Pmode, iter_mode, iter, true);
14909 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14910 destmem = change_address (destmem, mode, x_addr);
14914 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14915 srcmem = change_address (srcmem, mode, y_addr);
14917 /* When unrolling for chips that reorder memory reads and writes,
14918 we can save registers by using single temporary.
14919 Also using 4 temporaries is overkill in 32bit mode. */
14920 if (!TARGET_64BIT && 0)
14922 for (i = 0; i < unroll; i++)
14927 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14929 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14931 emit_move_insn (destmem, srcmem);
14937 gcc_assert (unroll <= 4);
14938 for (i = 0; i < unroll; i++)
14940 tmpreg[i] = gen_reg_rtx (mode);
14944 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14946 emit_move_insn (tmpreg[i], srcmem);
14948 for (i = 0; i < unroll; i++)
14953 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14955 emit_move_insn (destmem, tmpreg[i]);
14960 for (i = 0; i < unroll; i++)
14964 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14965 emit_move_insn (destmem, value);
14968 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14969 true, OPTAB_LIB_WIDEN);
14971 emit_move_insn (iter, tmp);
14973 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14975 if (expected_size != -1)
14977 expected_size /= GET_MODE_SIZE (mode) * unroll;
14978 if (expected_size == 0)
14980 else if (expected_size > REG_BR_PROB_BASE)
14981 predict_jump (REG_BR_PROB_BASE - 1);
14983 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14986 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14987 iter = ix86_zero_extend_to_Pmode (iter);
14988 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14989 true, OPTAB_LIB_WIDEN);
14990 if (tmp != destptr)
14991 emit_move_insn (destptr, tmp);
14994 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14995 true, OPTAB_LIB_WIDEN);
14997 emit_move_insn (srcptr, tmp);
14999 emit_label (out_label);
15002 /* Output "rep; mov" instruction.
15003 Arguments have same meaning as for previous function */
15005 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15006 rtx destptr, rtx srcptr,
15008 enum machine_mode mode)
15014 /* If the size is known, it is shorter to use rep movs. */
15015 if (mode == QImode && CONST_INT_P (count)
15016 && !(INTVAL (count) & 3))
15019 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15020 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15021 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15022 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15023 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15024 if (mode != QImode)
15026 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15027 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15028 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15029 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15030 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15031 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15035 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15036 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15038 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15042 /* Output "rep; stos" instruction.
15043 Arguments have same meaning as for previous function */
15045 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15047 enum machine_mode mode)
15052 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15053 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15054 value = force_reg (mode, gen_lowpart (mode, value));
15055 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15056 if (mode != QImode)
15058 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15059 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15060 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15063 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15064 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15068 emit_strmov (rtx destmem, rtx srcmem,
15069 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15071 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15072 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15073 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15076 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
15078 expand_movmem_epilogue (rtx destmem, rtx srcmem,
15079 rtx destptr, rtx srcptr, rtx count, int max_size)
15082 if (CONST_INT_P (count))
15084 HOST_WIDE_INT countval = INTVAL (count);
15087 if ((countval & 0x10) && max_size > 16)
15091 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15092 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
15095 gcc_unreachable ();
15098 if ((countval & 0x08) && max_size > 8)
15101 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15104 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15105 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
15109 if ((countval & 0x04) && max_size > 4)
15111 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15114 if ((countval & 0x02) && max_size > 2)
15116 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
15119 if ((countval & 0x01) && max_size > 1)
15121 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15128 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15129 count, 1, OPTAB_DIRECT);
15130 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15131 count, QImode, 1, 4);
15135 /* When there are stringops, we can cheaply increase dest and src pointers.
15136 Otherwise we save code size by maintaining offset (zero is readily
15137 available from preceding rep operation) and using x86 addressing modes.
15139 if (TARGET_SINGLE_STRINGOP)
15143 rtx label = ix86_expand_aligntest (count, 4, true);
15144 src = change_address (srcmem, SImode, srcptr);
15145 dest = change_address (destmem, SImode, destptr);
15146 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15147 emit_label (label);
15148 LABEL_NUSES (label) = 1;
15152 rtx label = ix86_expand_aligntest (count, 2, true);
15153 src = change_address (srcmem, HImode, srcptr);
15154 dest = change_address (destmem, HImode, destptr);
15155 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15156 emit_label (label);
15157 LABEL_NUSES (label) = 1;
15161 rtx label = ix86_expand_aligntest (count, 1, true);
15162 src = change_address (srcmem, QImode, srcptr);
15163 dest = change_address (destmem, QImode, destptr);
15164 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15165 emit_label (label);
15166 LABEL_NUSES (label) = 1;
15171 rtx offset = force_reg (Pmode, const0_rtx);
15176 rtx label = ix86_expand_aligntest (count, 4, true);
15177 src = change_address (srcmem, SImode, srcptr);
15178 dest = change_address (destmem, SImode, destptr);
15179 emit_move_insn (dest, src);
15180 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15181 true, OPTAB_LIB_WIDEN);
15183 emit_move_insn (offset, tmp);
15184 emit_label (label);
15185 LABEL_NUSES (label) = 1;
15189 rtx label = ix86_expand_aligntest (count, 2, true);
15190 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15191 src = change_address (srcmem, HImode, tmp);
15192 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15193 dest = change_address (destmem, HImode, tmp);
15194 emit_move_insn (dest, src);
15195 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15196 true, OPTAB_LIB_WIDEN);
15198 emit_move_insn (offset, tmp);
15199 emit_label (label);
15200 LABEL_NUSES (label) = 1;
15204 rtx label = ix86_expand_aligntest (count, 1, true);
15205 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15206 src = change_address (srcmem, QImode, tmp);
15207 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15208 dest = change_address (destmem, QImode, tmp);
15209 emit_move_insn (dest, src);
15210 emit_label (label);
15211 LABEL_NUSES (label) = 1;
15216 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15218 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15219 rtx count, int max_size)
15222 expand_simple_binop (counter_mode (count), AND, count,
15223 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15224 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15225 gen_lowpart (QImode, value), count, QImode,
15229 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15231 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15235 if (CONST_INT_P (count))
15237 HOST_WIDE_INT countval = INTVAL (count);
15240 if ((countval & 0x10) && max_size > 16)
15244 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15245 emit_insn (gen_strset (destptr, dest, value));
15246 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15247 emit_insn (gen_strset (destptr, dest, value));
15250 gcc_unreachable ();
15253 if ((countval & 0x08) && max_size > 8)
15257 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15258 emit_insn (gen_strset (destptr, dest, value));
15262 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15263 emit_insn (gen_strset (destptr, dest, value));
15264 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15265 emit_insn (gen_strset (destptr, dest, value));
15269 if ((countval & 0x04) && max_size > 4)
15271 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15272 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15275 if ((countval & 0x02) && max_size > 2)
15277 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15278 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15281 if ((countval & 0x01) && max_size > 1)
15283 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15284 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15291 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15296 rtx label = ix86_expand_aligntest (count, 16, true);
15299 dest = change_address (destmem, DImode, destptr);
15300 emit_insn (gen_strset (destptr, dest, value));
15301 emit_insn (gen_strset (destptr, dest, value));
15305 dest = change_address (destmem, SImode, destptr);
15306 emit_insn (gen_strset (destptr, dest, value));
15307 emit_insn (gen_strset (destptr, dest, value));
15308 emit_insn (gen_strset (destptr, dest, value));
15309 emit_insn (gen_strset (destptr, dest, value));
15311 emit_label (label);
15312 LABEL_NUSES (label) = 1;
15316 rtx label = ix86_expand_aligntest (count, 8, true);
15319 dest = change_address (destmem, DImode, destptr);
15320 emit_insn (gen_strset (destptr, dest, value));
15324 dest = change_address (destmem, SImode, destptr);
15325 emit_insn (gen_strset (destptr, dest, value));
15326 emit_insn (gen_strset (destptr, dest, value));
15328 emit_label (label);
15329 LABEL_NUSES (label) = 1;
15333 rtx label = ix86_expand_aligntest (count, 4, true);
15334 dest = change_address (destmem, SImode, destptr);
15335 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15336 emit_label (label);
15337 LABEL_NUSES (label) = 1;
15341 rtx label = ix86_expand_aligntest (count, 2, true);
15342 dest = change_address (destmem, HImode, destptr);
15343 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15344 emit_label (label);
15345 LABEL_NUSES (label) = 1;
15349 rtx label = ix86_expand_aligntest (count, 1, true);
15350 dest = change_address (destmem, QImode, destptr);
15351 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15352 emit_label (label);
15353 LABEL_NUSES (label) = 1;
15357 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15358 DESIRED_ALIGNMENT. */
15360 expand_movmem_prologue (rtx destmem, rtx srcmem,
15361 rtx destptr, rtx srcptr, rtx count,
15362 int align, int desired_alignment)
15364 if (align <= 1 && desired_alignment > 1)
15366 rtx label = ix86_expand_aligntest (destptr, 1, false);
15367 srcmem = change_address (srcmem, QImode, srcptr);
15368 destmem = change_address (destmem, QImode, destptr);
15369 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15370 ix86_adjust_counter (count, 1);
15371 emit_label (label);
15372 LABEL_NUSES (label) = 1;
15374 if (align <= 2 && desired_alignment > 2)
15376 rtx label = ix86_expand_aligntest (destptr, 2, false);
15377 srcmem = change_address (srcmem, HImode, srcptr);
15378 destmem = change_address (destmem, HImode, destptr);
15379 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15380 ix86_adjust_counter (count, 2);
15381 emit_label (label);
15382 LABEL_NUSES (label) = 1;
15384 if (align <= 4 && desired_alignment > 4)
15386 rtx label = ix86_expand_aligntest (destptr, 4, false);
15387 srcmem = change_address (srcmem, SImode, srcptr);
15388 destmem = change_address (destmem, SImode, destptr);
15389 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15390 ix86_adjust_counter (count, 4);
15391 emit_label (label);
15392 LABEL_NUSES (label) = 1;
15394 gcc_assert (desired_alignment <= 8);
15397 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15398 DESIRED_ALIGNMENT. */
15400 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15401 int align, int desired_alignment)
15403 if (align <= 1 && desired_alignment > 1)
15405 rtx label = ix86_expand_aligntest (destptr, 1, false);
15406 destmem = change_address (destmem, QImode, destptr);
15407 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15408 ix86_adjust_counter (count, 1);
15409 emit_label (label);
15410 LABEL_NUSES (label) = 1;
15412 if (align <= 2 && desired_alignment > 2)
15414 rtx label = ix86_expand_aligntest (destptr, 2, false);
15415 destmem = change_address (destmem, HImode, destptr);
15416 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15417 ix86_adjust_counter (count, 2);
15418 emit_label (label);
15419 LABEL_NUSES (label) = 1;
15421 if (align <= 4 && desired_alignment > 4)
15423 rtx label = ix86_expand_aligntest (destptr, 4, false);
15424 destmem = change_address (destmem, SImode, destptr);
15425 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15426 ix86_adjust_counter (count, 4);
15427 emit_label (label);
15428 LABEL_NUSES (label) = 1;
15430 gcc_assert (desired_alignment <= 8);
15433 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15434 static enum stringop_alg
15435 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15436 int *dynamic_check)
15438 const struct stringop_algs * algs;
15439 /* Algorithms using the rep prefix want at least edi and ecx;
15440 additionally, memset wants eax and memcpy wants esi. Don't
15441 consider such algorithms if the user has appropriated those
15442 registers for their own purposes. */
15443 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15445 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15447 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15448 || (alg != rep_prefix_1_byte \
15449 && alg != rep_prefix_4_byte \
15450 && alg != rep_prefix_8_byte))
15452 *dynamic_check = -1;
15454 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15456 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15457 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15458 return stringop_alg;
15459 /* rep; movq or rep; movl is the smallest variant. */
15460 else if (optimize_size)
15462 if (!count || (count & 3))
15463 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15465 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15467 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15469 else if (expected_size != -1 && expected_size < 4)
15470 return loop_1_byte;
15471 else if (expected_size != -1)
15474 enum stringop_alg alg = libcall;
15475 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15477 /* We get here if the algorithms that were not libcall-based
15478 were rep-prefix based and we are unable to use rep prefixes
15479 based on global register usage. Break out of the loop and
15480 use the heuristic below. */
15481 if (algs->size[i].max == 0)
15483 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15485 enum stringop_alg candidate = algs->size[i].alg;
15487 if (candidate != libcall && ALG_USABLE_P (candidate))
15489 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15490 last non-libcall inline algorithm. */
15491 if (TARGET_INLINE_ALL_STRINGOPS)
15493 /* When the current size is best to be copied by a libcall,
15494 but we are still forced to inline, run the heuristic below
15495 that will pick code for medium sized blocks. */
15496 if (alg != libcall)
15500 else if (ALG_USABLE_P (candidate))
15504 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15506 /* When asked to inline the call anyway, try to pick meaningful choice.
15507 We look for maximal size of block that is faster to copy by hand and
15508 take blocks of at most of that size guessing that average size will
15509 be roughly half of the block.
15511 If this turns out to be bad, we might simply specify the preferred
15512 choice in ix86_costs. */
15513 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15514 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15517 enum stringop_alg alg;
15519 bool any_alg_usable_p = true;
15521 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15523 enum stringop_alg candidate = algs->size[i].alg;
15524 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15526 if (candidate != libcall && candidate
15527 && ALG_USABLE_P (candidate))
15528 max = algs->size[i].max;
15530 /* If there aren't any usable algorithms, then recursing on
15531 smaller sizes isn't going to find anything. Just return the
15532 simple byte-at-a-time copy loop. */
15533 if (!any_alg_usable_p)
15535 /* Pick something reasonable. */
15536 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15537 *dynamic_check = 128;
15538 return loop_1_byte;
15542 alg = decide_alg (count, max / 2, memset, dynamic_check);
15543 gcc_assert (*dynamic_check == -1);
15544 gcc_assert (alg != libcall);
15545 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15546 *dynamic_check = max;
15549 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15550 #undef ALG_USABLE_P
15553 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15554 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15556 decide_alignment (int align,
15557 enum stringop_alg alg,
15560 int desired_align = 0;
15564 gcc_unreachable ();
15566 case unrolled_loop:
15567 desired_align = GET_MODE_SIZE (Pmode);
15569 case rep_prefix_8_byte:
15572 case rep_prefix_4_byte:
15573 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15574 copying whole cacheline at once. */
15575 if (TARGET_PENTIUMPRO)
15580 case rep_prefix_1_byte:
15581 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15582 copying whole cacheline at once. */
15583 if (TARGET_PENTIUMPRO)
15597 if (desired_align < align)
15598 desired_align = align;
15599 if (expected_size != -1 && expected_size < 4)
15600 desired_align = align;
15601 return desired_align;
15604 /* Return the smallest power of 2 greater than VAL. */
15606 smallest_pow2_greater_than (int val)
15614 /* Expand string move (memcpy) operation. Use i386 string operations when
15615 profitable. expand_setmem contains similar code. The code depends upon
15616 architecture, block size and alignment, but always has the same
15619 1) Prologue guard: Conditional that jumps up to epilogues for small
15620 blocks that can be handled by epilogue alone. This is faster but
15621 also needed for correctness, since prologue assume the block is larger
15622 than the desired alignment.
15624 Optional dynamic check for size and libcall for large
15625 blocks is emitted here too, with -minline-stringops-dynamically.
15627 2) Prologue: copy first few bytes in order to get destination aligned
15628 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15629 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15630 We emit either a jump tree on power of two sized blocks, or a byte loop.
15632 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15633 with specified algorithm.
15635 4) Epilogue: code copying tail of the block that is too small to be
15636 handled by main body (or up to size guarded by prologue guard). */
15639 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15640 rtx expected_align_exp, rtx expected_size_exp)
15646 rtx jump_around_label = NULL;
15647 HOST_WIDE_INT align = 1;
15648 unsigned HOST_WIDE_INT count = 0;
15649 HOST_WIDE_INT expected_size = -1;
15650 int size_needed = 0, epilogue_size_needed;
15651 int desired_align = 0;
15652 enum stringop_alg alg;
15655 if (CONST_INT_P (align_exp))
15656 align = INTVAL (align_exp);
15657 /* i386 can do misaligned access on reasonably increased cost. */
15658 if (CONST_INT_P (expected_align_exp)
15659 && INTVAL (expected_align_exp) > align)
15660 align = INTVAL (expected_align_exp);
15661 if (CONST_INT_P (count_exp))
15662 count = expected_size = INTVAL (count_exp);
15663 if (CONST_INT_P (expected_size_exp) && count == 0)
15664 expected_size = INTVAL (expected_size_exp);
15666 /* Make sure we don't need to care about overflow later on. */
15667 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15670 /* Step 0: Decide on preferred algorithm, desired alignment and
15671 size of chunks to be copied by main loop. */
15673 alg = decide_alg (count, expected_size, false, &dynamic_check);
15674 desired_align = decide_alignment (align, alg, expected_size);
15676 if (!TARGET_ALIGN_STRINGOPS)
15677 align = desired_align;
15679 if (alg == libcall)
15681 gcc_assert (alg != no_stringop);
15683 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15684 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15685 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15690 gcc_unreachable ();
15692 size_needed = GET_MODE_SIZE (Pmode);
15694 case unrolled_loop:
15695 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15697 case rep_prefix_8_byte:
15700 case rep_prefix_4_byte:
15703 case rep_prefix_1_byte:
15709 epilogue_size_needed = size_needed;
15711 /* Step 1: Prologue guard. */
15713 /* Alignment code needs count to be in register. */
15714 if (CONST_INT_P (count_exp) && desired_align > align)
15715 count_exp = force_reg (counter_mode (count_exp), count_exp);
15716 gcc_assert (desired_align >= 1 && align >= 1);
15718 /* Ensure that alignment prologue won't copy past end of block. */
15719 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15721 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15722 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15723 Make sure it is power of 2. */
15724 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15726 if (CONST_INT_P (count_exp))
15728 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15733 label = gen_label_rtx ();
15734 emit_cmp_and_jump_insns (count_exp,
15735 GEN_INT (epilogue_size_needed),
15736 LTU, 0, counter_mode (count_exp), 1, label);
15737 if (expected_size == -1 || expected_size < epilogue_size_needed)
15738 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15740 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15744 /* Emit code to decide on runtime whether library call or inline should be
15746 if (dynamic_check != -1)
15748 if (CONST_INT_P (count_exp))
15750 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15752 emit_block_move_via_libcall (dst, src, count_exp, false);
15753 count_exp = const0_rtx;
15759 rtx hot_label = gen_label_rtx ();
15760 jump_around_label = gen_label_rtx ();
15761 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15762 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15763 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15764 emit_block_move_via_libcall (dst, src, count_exp, false);
15765 emit_jump (jump_around_label);
15766 emit_label (hot_label);
15770 /* Step 2: Alignment prologue. */
15772 if (desired_align > align)
15774 /* Except for the first move in epilogue, we no longer know
15775 constant offset in aliasing info. It don't seems to worth
15776 the pain to maintain it for the first move, so throw away
15778 src = change_address (src, BLKmode, srcreg);
15779 dst = change_address (dst, BLKmode, destreg);
15780 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15783 if (label && size_needed == 1)
15785 emit_label (label);
15786 LABEL_NUSES (label) = 1;
15790 /* Step 3: Main loop. */
15796 gcc_unreachable ();
15798 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15799 count_exp, QImode, 1, expected_size);
15802 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15803 count_exp, Pmode, 1, expected_size);
15805 case unrolled_loop:
15806 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15807 registers for 4 temporaries anyway. */
15808 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15809 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15812 case rep_prefix_8_byte:
15813 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15816 case rep_prefix_4_byte:
15817 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15820 case rep_prefix_1_byte:
15821 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15825 /* Adjust properly the offset of src and dest memory for aliasing. */
15826 if (CONST_INT_P (count_exp))
15828 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15829 (count / size_needed) * size_needed);
15830 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15831 (count / size_needed) * size_needed);
15835 src = change_address (src, BLKmode, srcreg);
15836 dst = change_address (dst, BLKmode, destreg);
15839 /* Step 4: Epilogue to copy the remaining bytes. */
15843 /* When the main loop is done, COUNT_EXP might hold original count,
15844 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15845 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15846 bytes. Compensate if needed. */
15848 if (size_needed < epilogue_size_needed)
15851 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15852 GEN_INT (size_needed - 1), count_exp, 1,
15854 if (tmp != count_exp)
15855 emit_move_insn (count_exp, tmp);
15857 emit_label (label);
15858 LABEL_NUSES (label) = 1;
15861 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15862 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15863 epilogue_size_needed);
15864 if (jump_around_label)
15865 emit_label (jump_around_label);
15869 /* Helper function for memcpy. For QImode value 0xXY produce
15870 0xXYXYXYXY of wide specified by MODE. This is essentially
15871 a * 0x10101010, but we can do slightly better than
15872 synth_mult by unwinding the sequence by hand on CPUs with
15875 promote_duplicated_reg (enum machine_mode mode, rtx val)
15877 enum machine_mode valmode = GET_MODE (val);
15879 int nops = mode == DImode ? 3 : 2;
15881 gcc_assert (mode == SImode || mode == DImode);
15882 if (val == const0_rtx)
15883 return copy_to_mode_reg (mode, const0_rtx);
15884 if (CONST_INT_P (val))
15886 HOST_WIDE_INT v = INTVAL (val) & 255;
15890 if (mode == DImode)
15891 v |= (v << 16) << 16;
15892 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15895 if (valmode == VOIDmode)
15897 if (valmode != QImode)
15898 val = gen_lowpart (QImode, val);
15899 if (mode == QImode)
15901 if (!TARGET_PARTIAL_REG_STALL)
15903 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15904 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15905 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15906 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15908 rtx reg = convert_modes (mode, QImode, val, true);
15909 tmp = promote_duplicated_reg (mode, const1_rtx);
15910 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15915 rtx reg = convert_modes (mode, QImode, val, true);
15917 if (!TARGET_PARTIAL_REG_STALL)
15918 if (mode == SImode)
15919 emit_insn (gen_movsi_insv_1 (reg, reg));
15921 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15924 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15925 NULL, 1, OPTAB_DIRECT);
15927 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15929 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15930 NULL, 1, OPTAB_DIRECT);
15931 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15932 if (mode == SImode)
15934 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15935 NULL, 1, OPTAB_DIRECT);
15936 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15941 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15942 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15943 alignment from ALIGN to DESIRED_ALIGN. */
15945 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15950 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15951 promoted_val = promote_duplicated_reg (DImode, val);
15952 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15953 promoted_val = promote_duplicated_reg (SImode, val);
15954 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15955 promoted_val = promote_duplicated_reg (HImode, val);
15957 promoted_val = val;
15959 return promoted_val;
15962 /* Expand string clear operation (bzero). Use i386 string operations when
15963 profitable. See expand_movmem comment for explanation of individual
15964 steps performed. */
15966 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15967 rtx expected_align_exp, rtx expected_size_exp)
15972 rtx jump_around_label = NULL;
15973 HOST_WIDE_INT align = 1;
15974 unsigned HOST_WIDE_INT count = 0;
15975 HOST_WIDE_INT expected_size = -1;
15976 int size_needed = 0, epilogue_size_needed;
15977 int desired_align = 0;
15978 enum stringop_alg alg;
15979 rtx promoted_val = NULL;
15980 bool force_loopy_epilogue = false;
15983 if (CONST_INT_P (align_exp))
15984 align = INTVAL (align_exp);
15985 /* i386 can do misaligned access on reasonably increased cost. */
15986 if (CONST_INT_P (expected_align_exp)
15987 && INTVAL (expected_align_exp) > align)
15988 align = INTVAL (expected_align_exp);
15989 if (CONST_INT_P (count_exp))
15990 count = expected_size = INTVAL (count_exp);
15991 if (CONST_INT_P (expected_size_exp) && count == 0)
15992 expected_size = INTVAL (expected_size_exp);
15994 /* Make sure we don't need to care about overflow later on. */
15995 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15998 /* Step 0: Decide on preferred algorithm, desired alignment and
15999 size of chunks to be copied by main loop. */
16001 alg = decide_alg (count, expected_size, true, &dynamic_check);
16002 desired_align = decide_alignment (align, alg, expected_size);
16004 if (!TARGET_ALIGN_STRINGOPS)
16005 align = desired_align;
16007 if (alg == libcall)
16009 gcc_assert (alg != no_stringop);
16011 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16012 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16017 gcc_unreachable ();
16019 size_needed = GET_MODE_SIZE (Pmode);
16021 case unrolled_loop:
16022 size_needed = GET_MODE_SIZE (Pmode) * 4;
16024 case rep_prefix_8_byte:
16027 case rep_prefix_4_byte:
16030 case rep_prefix_1_byte:
16035 epilogue_size_needed = size_needed;
16037 /* Step 1: Prologue guard. */
16039 /* Alignment code needs count to be in register. */
16040 if (CONST_INT_P (count_exp) && desired_align > align)
16042 enum machine_mode mode = SImode;
16043 if (TARGET_64BIT && (count & ~0xffffffff))
16045 count_exp = force_reg (mode, count_exp);
16047 /* Do the cheap promotion to allow better CSE across the
16048 main loop and epilogue (ie one load of the big constant in the
16049 front of all code. */
16050 if (CONST_INT_P (val_exp))
16051 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16052 desired_align, align);
16053 /* Ensure that alignment prologue won't copy past end of block. */
16054 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16056 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16057 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16058 Make sure it is power of 2. */
16059 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16061 /* To improve performance of small blocks, we jump around the VAL
16062 promoting mode. This mean that if the promoted VAL is not constant,
16063 we might not use it in the epilogue and have to use byte
16065 if (epilogue_size_needed > 2 && !promoted_val)
16066 force_loopy_epilogue = true;
16067 label = gen_label_rtx ();
16068 emit_cmp_and_jump_insns (count_exp,
16069 GEN_INT (epilogue_size_needed),
16070 LTU, 0, counter_mode (count_exp), 1, label);
16071 if (GET_CODE (count_exp) == CONST_INT)
16073 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
16074 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16076 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16078 if (dynamic_check != -1)
16080 rtx hot_label = gen_label_rtx ();
16081 jump_around_label = gen_label_rtx ();
16082 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16083 LEU, 0, counter_mode (count_exp), 1, hot_label);
16084 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16085 set_storage_via_libcall (dst, count_exp, val_exp, false);
16086 emit_jump (jump_around_label);
16087 emit_label (hot_label);
16090 /* Step 2: Alignment prologue. */
16092 /* Do the expensive promotion once we branched off the small blocks. */
16094 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16095 desired_align, align);
16096 gcc_assert (desired_align >= 1 && align >= 1);
16098 if (desired_align > align)
16100 /* Except for the first move in epilogue, we no longer know
16101 constant offset in aliasing info. It don't seems to worth
16102 the pain to maintain it for the first move, so throw away
16104 dst = change_address (dst, BLKmode, destreg);
16105 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16108 if (label && size_needed == 1)
16110 emit_label (label);
16111 LABEL_NUSES (label) = 1;
16115 /* Step 3: Main loop. */
16121 gcc_unreachable ();
16123 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16124 count_exp, QImode, 1, expected_size);
16127 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16128 count_exp, Pmode, 1, expected_size);
16130 case unrolled_loop:
16131 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16132 count_exp, Pmode, 4, expected_size);
16134 case rep_prefix_8_byte:
16135 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16138 case rep_prefix_4_byte:
16139 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16142 case rep_prefix_1_byte:
16143 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16147 /* Adjust properly the offset of src and dest memory for aliasing. */
16148 if (CONST_INT_P (count_exp))
16149 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16150 (count / size_needed) * size_needed);
16152 dst = change_address (dst, BLKmode, destreg);
16154 /* Step 4: Epilogue to copy the remaining bytes. */
16158 /* When the main loop is done, COUNT_EXP might hold original count,
16159 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16160 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16161 bytes. Compensate if needed. */
16163 if (size_needed < desired_align - align)
16166 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16167 GEN_INT (size_needed - 1), count_exp, 1,
16169 size_needed = desired_align - align + 1;
16170 if (tmp != count_exp)
16171 emit_move_insn (count_exp, tmp);
16173 emit_label (label);
16174 LABEL_NUSES (label) = 1;
16176 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16178 if (force_loopy_epilogue)
16179 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16182 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16185 if (jump_around_label)
16186 emit_label (jump_around_label);
16190 /* Expand the appropriate insns for doing strlen if not just doing
16193 out = result, initialized with the start address
16194 align_rtx = alignment of the address.
16195 scratch = scratch register, initialized with the startaddress when
16196 not aligned, otherwise undefined
16198 This is just the body. It needs the initializations mentioned above and
16199 some address computing at the end. These things are done in i386.md. */
16202 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16206 rtx align_2_label = NULL_RTX;
16207 rtx align_3_label = NULL_RTX;
16208 rtx align_4_label = gen_label_rtx ();
16209 rtx end_0_label = gen_label_rtx ();
16211 rtx tmpreg = gen_reg_rtx (SImode);
16212 rtx scratch = gen_reg_rtx (SImode);
16216 if (CONST_INT_P (align_rtx))
16217 align = INTVAL (align_rtx);
16219 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16221 /* Is there a known alignment and is it less than 4? */
16224 rtx scratch1 = gen_reg_rtx (Pmode);
16225 emit_move_insn (scratch1, out);
16226 /* Is there a known alignment and is it not 2? */
16229 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16230 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16232 /* Leave just the 3 lower bits. */
16233 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16234 NULL_RTX, 0, OPTAB_WIDEN);
16236 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16237 Pmode, 1, align_4_label);
16238 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16239 Pmode, 1, align_2_label);
16240 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16241 Pmode, 1, align_3_label);
16245 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16246 check if is aligned to 4 - byte. */
16248 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16249 NULL_RTX, 0, OPTAB_WIDEN);
16251 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16252 Pmode, 1, align_4_label);
16255 mem = change_address (src, QImode, out);
16257 /* Now compare the bytes. */
16259 /* Compare the first n unaligned byte on a byte per byte basis. */
16260 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16261 QImode, 1, end_0_label);
16263 /* Increment the address. */
16264 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16266 /* Not needed with an alignment of 2 */
16269 emit_label (align_2_label);
16271 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16274 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16276 emit_label (align_3_label);
16279 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16282 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16285 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16286 align this loop. It gives only huge programs, but does not help to
16288 emit_label (align_4_label);
16290 mem = change_address (src, SImode, out);
16291 emit_move_insn (scratch, mem);
16292 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
16294 /* This formula yields a nonzero result iff one of the bytes is zero.
16295 This saves three branches inside loop and many cycles. */
16297 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16298 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16299 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16300 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16301 gen_int_mode (0x80808080, SImode)));
16302 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16307 rtx reg = gen_reg_rtx (SImode);
16308 rtx reg2 = gen_reg_rtx (Pmode);
16309 emit_move_insn (reg, tmpreg);
16310 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16312 /* If zero is not in the first two bytes, move two bytes forward. */
16313 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16314 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16315 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16316 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16317 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16320 /* Emit lea manually to avoid clobbering of flags. */
16321 emit_insn (gen_rtx_SET (SImode, reg2,
16322 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16324 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16325 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16326 emit_insn (gen_rtx_SET (VOIDmode, out,
16327 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16334 rtx end_2_label = gen_label_rtx ();
16335 /* Is zero in the first two bytes? */
16337 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16338 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16339 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16340 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16341 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16343 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16344 JUMP_LABEL (tmp) = end_2_label;
16346 /* Not in the first two. Move two bytes forward. */
16347 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16348 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
16350 emit_label (end_2_label);
16354 /* Avoid branch in fixing the byte. */
16355 tmpreg = gen_lowpart (QImode, tmpreg);
16356 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16357 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16358 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
16360 emit_label (end_0_label);
16363 /* Expand strlen. */
16366 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16368 rtx addr, scratch1, scratch2, scratch3, scratch4;
16370 /* The generic case of strlen expander is long. Avoid it's
16371 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16373 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16374 && !TARGET_INLINE_ALL_STRINGOPS
16376 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16379 addr = force_reg (Pmode, XEXP (src, 0));
16380 scratch1 = gen_reg_rtx (Pmode);
16382 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16385 /* Well it seems that some optimizer does not combine a call like
16386 foo(strlen(bar), strlen(bar));
16387 when the move and the subtraction is done here. It does calculate
16388 the length just once when these instructions are done inside of
16389 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16390 often used and I use one fewer register for the lifetime of
16391 output_strlen_unroll() this is better. */
16393 emit_move_insn (out, addr);
16395 ix86_expand_strlensi_unroll_1 (out, src, align);
16397 /* strlensi_unroll_1 returns the address of the zero at the end of
16398 the string, like memchr(), so compute the length by subtracting
16399 the start address. */
16400 emit_insn ((*ix86_gen_sub3) (out, out, addr));
16406 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16407 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16410 scratch2 = gen_reg_rtx (Pmode);
16411 scratch3 = gen_reg_rtx (Pmode);
16412 scratch4 = force_reg (Pmode, constm1_rtx);
16414 emit_move_insn (scratch3, addr);
16415 eoschar = force_reg (QImode, eoschar);
16417 src = replace_equiv_address_nv (src, scratch3);
16419 /* If .md starts supporting :P, this can be done in .md. */
16420 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16421 scratch4), UNSPEC_SCAS);
16422 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16423 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
16424 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
16429 /* For given symbol (function) construct code to compute address of it's PLT
16430 entry in large x86-64 PIC model. */
16432 construct_plt_address (rtx symbol)
16434 rtx tmp = gen_reg_rtx (Pmode);
16435 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16437 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16438 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16440 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16441 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16446 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16447 rtx callarg2 ATTRIBUTE_UNUSED,
16448 rtx pop, int sibcall)
16450 rtx use = NULL, call;
16452 if (pop == const0_rtx)
16454 gcc_assert (!TARGET_64BIT || !pop);
16456 if (TARGET_MACHO && !TARGET_64BIT)
16459 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16460 fnaddr = machopic_indirect_call_target (fnaddr);
16465 /* Static functions and indirect calls don't need the pic register. */
16466 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16467 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16468 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16469 use_reg (&use, pic_offset_table_rtx);
16472 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16474 rtx al = gen_rtx_REG (QImode, AX_REG);
16475 emit_move_insn (al, callarg2);
16476 use_reg (&use, al);
16479 if (ix86_cmodel == CM_LARGE_PIC
16480 && GET_CODE (fnaddr) == MEM
16481 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16482 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16483 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16484 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16486 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16487 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16489 if (sibcall && TARGET_64BIT
16490 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16493 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16494 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16495 emit_move_insn (fnaddr, addr);
16496 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16499 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16501 call = gen_rtx_SET (VOIDmode, retval, call);
16504 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16505 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16506 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16509 call = emit_call_insn (call);
16511 CALL_INSN_FUNCTION_USAGE (call) = use;
16515 /* Clear stack slot assignments remembered from previous functions.
16516 This is called from INIT_EXPANDERS once before RTL is emitted for each
16519 static struct machine_function *
16520 ix86_init_machine_status (void)
16522 struct machine_function *f;
16524 f = GGC_CNEW (struct machine_function);
16525 f->use_fast_prologue_epilogue_nregs = -1;
16526 f->tls_descriptor_call_expanded_p = 0;
16527 f->call_abi = DEFAULT_ABI;
16532 /* Return a MEM corresponding to a stack slot with mode MODE.
16533 Allocate a new slot if necessary.
16535 The RTL for a function can have several slots available: N is
16536 which slot to use. */
16539 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16541 struct stack_local_entry *s;
16543 gcc_assert (n < MAX_386_STACK_LOCALS);
16545 /* Virtual slot is valid only before vregs are instantiated. */
16546 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16548 for (s = ix86_stack_locals; s; s = s->next)
16549 if (s->mode == mode && s->n == n)
16550 return copy_rtx (s->rtl);
16552 s = (struct stack_local_entry *)
16553 ggc_alloc (sizeof (struct stack_local_entry));
16556 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16558 s->next = ix86_stack_locals;
16559 ix86_stack_locals = s;
16563 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16565 static GTY(()) rtx ix86_tls_symbol;
16567 ix86_tls_get_addr (void)
16570 if (!ix86_tls_symbol)
16572 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16573 (TARGET_ANY_GNU_TLS
16575 ? "___tls_get_addr"
16576 : "__tls_get_addr");
16579 return ix86_tls_symbol;
16582 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16584 static GTY(()) rtx ix86_tls_module_base_symbol;
16586 ix86_tls_module_base (void)
16589 if (!ix86_tls_module_base_symbol)
16591 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16592 "_TLS_MODULE_BASE_");
16593 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16594 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16597 return ix86_tls_module_base_symbol;
16600 /* Calculate the length of the memory address in the instruction
16601 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16604 memory_address_length (rtx addr)
16606 struct ix86_address parts;
16607 rtx base, index, disp;
16611 if (GET_CODE (addr) == PRE_DEC
16612 || GET_CODE (addr) == POST_INC
16613 || GET_CODE (addr) == PRE_MODIFY
16614 || GET_CODE (addr) == POST_MODIFY)
16617 ok = ix86_decompose_address (addr, &parts);
16620 if (parts.base && GET_CODE (parts.base) == SUBREG)
16621 parts.base = SUBREG_REG (parts.base);
16622 if (parts.index && GET_CODE (parts.index) == SUBREG)
16623 parts.index = SUBREG_REG (parts.index);
16626 index = parts.index;
16631 - esp as the base always wants an index,
16632 - ebp as the base always wants a displacement. */
16634 /* Register Indirect. */
16635 if (base && !index && !disp)
16637 /* esp (for its index) and ebp (for its displacement) need
16638 the two-byte modrm form. */
16639 if (addr == stack_pointer_rtx
16640 || addr == arg_pointer_rtx
16641 || addr == frame_pointer_rtx
16642 || addr == hard_frame_pointer_rtx)
16646 /* Direct Addressing. */
16647 else if (disp && !base && !index)
16652 /* Find the length of the displacement constant. */
16655 if (base && satisfies_constraint_K (disp))
16660 /* ebp always wants a displacement. */
16661 else if (base == hard_frame_pointer_rtx)
16664 /* An index requires the two-byte modrm form.... */
16666 /* ...like esp, which always wants an index. */
16667 || base == stack_pointer_rtx
16668 || base == arg_pointer_rtx
16669 || base == frame_pointer_rtx)
16676 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16677 is set, expect that insn have 8bit immediate alternative. */
16679 ix86_attr_length_immediate_default (rtx insn, int shortform)
16683 extract_insn_cached (insn);
16684 for (i = recog_data.n_operands - 1; i >= 0; --i)
16685 if (CONSTANT_P (recog_data.operand[i]))
16688 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16692 switch (get_attr_mode (insn))
16703 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16708 fatal_insn ("unknown insn mode", insn);
16714 /* Compute default value for "length_address" attribute. */
16716 ix86_attr_length_address_default (rtx insn)
16720 if (get_attr_type (insn) == TYPE_LEA)
16722 rtx set = PATTERN (insn);
16724 if (GET_CODE (set) == PARALLEL)
16725 set = XVECEXP (set, 0, 0);
16727 gcc_assert (GET_CODE (set) == SET);
16729 return memory_address_length (SET_SRC (set));
16732 extract_insn_cached (insn);
16733 for (i = recog_data.n_operands - 1; i >= 0; --i)
16734 if (MEM_P (recog_data.operand[i]))
16736 return memory_address_length (XEXP (recog_data.operand[i], 0));
16742 /* Return the maximum number of instructions a cpu can issue. */
16745 ix86_issue_rate (void)
16749 case PROCESSOR_PENTIUM:
16753 case PROCESSOR_PENTIUMPRO:
16754 case PROCESSOR_PENTIUM4:
16755 case PROCESSOR_ATHLON:
16757 case PROCESSOR_AMDFAM10:
16758 case PROCESSOR_NOCONA:
16759 case PROCESSOR_GENERIC32:
16760 case PROCESSOR_GENERIC64:
16763 case PROCESSOR_CORE2:
16771 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16772 by DEP_INSN and nothing set by DEP_INSN. */
16775 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16779 /* Simplify the test for uninteresting insns. */
16780 if (insn_type != TYPE_SETCC
16781 && insn_type != TYPE_ICMOV
16782 && insn_type != TYPE_FCMOV
16783 && insn_type != TYPE_IBR)
16786 if ((set = single_set (dep_insn)) != 0)
16788 set = SET_DEST (set);
16791 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16792 && XVECLEN (PATTERN (dep_insn), 0) == 2
16793 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16794 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16796 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16797 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16802 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16805 /* This test is true if the dependent insn reads the flags but
16806 not any other potentially set register. */
16807 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16810 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16816 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16817 address with operands set by DEP_INSN. */
16820 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16824 if (insn_type == TYPE_LEA
16827 addr = PATTERN (insn);
16829 if (GET_CODE (addr) == PARALLEL)
16830 addr = XVECEXP (addr, 0, 0);
16832 gcc_assert (GET_CODE (addr) == SET);
16834 addr = SET_SRC (addr);
16839 extract_insn_cached (insn);
16840 for (i = recog_data.n_operands - 1; i >= 0; --i)
16841 if (MEM_P (recog_data.operand[i]))
16843 addr = XEXP (recog_data.operand[i], 0);
16850 return modified_in_p (addr, dep_insn);
16854 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16856 enum attr_type insn_type, dep_insn_type;
16857 enum attr_memory memory;
16859 int dep_insn_code_number;
16861 /* Anti and output dependencies have zero cost on all CPUs. */
16862 if (REG_NOTE_KIND (link) != 0)
16865 dep_insn_code_number = recog_memoized (dep_insn);
16867 /* If we can't recognize the insns, we can't really do anything. */
16868 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16871 insn_type = get_attr_type (insn);
16872 dep_insn_type = get_attr_type (dep_insn);
16876 case PROCESSOR_PENTIUM:
16877 /* Address Generation Interlock adds a cycle of latency. */
16878 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16881 /* ??? Compares pair with jump/setcc. */
16882 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16885 /* Floating point stores require value to be ready one cycle earlier. */
16886 if (insn_type == TYPE_FMOV
16887 && get_attr_memory (insn) == MEMORY_STORE
16888 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16892 case PROCESSOR_PENTIUMPRO:
16893 memory = get_attr_memory (insn);
16895 /* INT->FP conversion is expensive. */
16896 if (get_attr_fp_int_src (dep_insn))
16899 /* There is one cycle extra latency between an FP op and a store. */
16900 if (insn_type == TYPE_FMOV
16901 && (set = single_set (dep_insn)) != NULL_RTX
16902 && (set2 = single_set (insn)) != NULL_RTX
16903 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16904 && MEM_P (SET_DEST (set2)))
16907 /* Show ability of reorder buffer to hide latency of load by executing
16908 in parallel with previous instruction in case
16909 previous instruction is not needed to compute the address. */
16910 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16911 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16913 /* Claim moves to take one cycle, as core can issue one load
16914 at time and the next load can start cycle later. */
16915 if (dep_insn_type == TYPE_IMOV
16916 || dep_insn_type == TYPE_FMOV)
16924 memory = get_attr_memory (insn);
16926 /* The esp dependency is resolved before the instruction is really
16928 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16929 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16932 /* INT->FP conversion is expensive. */
16933 if (get_attr_fp_int_src (dep_insn))
16936 /* Show ability of reorder buffer to hide latency of load by executing
16937 in parallel with previous instruction in case
16938 previous instruction is not needed to compute the address. */
16939 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16940 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16942 /* Claim moves to take one cycle, as core can issue one load
16943 at time and the next load can start cycle later. */
16944 if (dep_insn_type == TYPE_IMOV
16945 || dep_insn_type == TYPE_FMOV)
16954 case PROCESSOR_ATHLON:
16956 case PROCESSOR_AMDFAM10:
16957 case PROCESSOR_GENERIC32:
16958 case PROCESSOR_GENERIC64:
16959 memory = get_attr_memory (insn);
16961 /* Show ability of reorder buffer to hide latency of load by executing
16962 in parallel with previous instruction in case
16963 previous instruction is not needed to compute the address. */
16964 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16965 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16967 enum attr_unit unit = get_attr_unit (insn);
16970 /* Because of the difference between the length of integer and
16971 floating unit pipeline preparation stages, the memory operands
16972 for floating point are cheaper.
16974 ??? For Athlon it the difference is most probably 2. */
16975 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16978 loadcost = TARGET_ATHLON ? 2 : 0;
16980 if (cost >= loadcost)
16993 /* How many alternative schedules to try. This should be as wide as the
16994 scheduling freedom in the DFA, but no wider. Making this value too
16995 large results extra work for the scheduler. */
16998 ia32_multipass_dfa_lookahead (void)
17002 case PROCESSOR_PENTIUM:
17005 case PROCESSOR_PENTIUMPRO:
17015 /* Compute the alignment given to a constant that is being placed in memory.
17016 EXP is the constant and ALIGN is the alignment that the object would
17018 The value of this function is used instead of that alignment to align
17022 ix86_constant_alignment (tree exp, int align)
17024 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17025 || TREE_CODE (exp) == INTEGER_CST)
17027 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17029 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17032 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17033 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17034 return BITS_PER_WORD;
17039 /* Compute the alignment for a static variable.
17040 TYPE is the data type, and ALIGN is the alignment that
17041 the object would ordinarily have. The value of this function is used
17042 instead of that alignment to align the object. */
17045 ix86_data_alignment (tree type, int align)
17047 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
17049 if (AGGREGATE_TYPE_P (type)
17050 && TYPE_SIZE (type)
17051 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17052 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17053 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17054 && align < max_align)
17057 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17058 to 16byte boundary. */
17061 if (AGGREGATE_TYPE_P (type)
17062 && TYPE_SIZE (type)
17063 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17064 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17065 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17069 if (TREE_CODE (type) == ARRAY_TYPE)
17071 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17073 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17076 else if (TREE_CODE (type) == COMPLEX_TYPE)
17079 if (TYPE_MODE (type) == DCmode && align < 64)
17081 if (TYPE_MODE (type) == XCmode && align < 128)
17084 else if ((TREE_CODE (type) == RECORD_TYPE
17085 || TREE_CODE (type) == UNION_TYPE
17086 || TREE_CODE (type) == QUAL_UNION_TYPE)
17087 && TYPE_FIELDS (type))
17089 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17091 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17094 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17095 || TREE_CODE (type) == INTEGER_TYPE)
17097 if (TYPE_MODE (type) == DFmode && align < 64)
17099 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17106 /* Compute the alignment for a local variable or a stack slot. TYPE is
17107 the data type, MODE is the widest mode available and ALIGN is the
17108 alignment that the object would ordinarily have. The value of this
17109 macro is used instead of that alignment to align the object. */
17112 ix86_local_alignment (tree type, enum machine_mode mode,
17113 unsigned int align)
17115 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17116 register in MODE. We will return the largest alignment of XF
17120 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17121 align = GET_MODE_ALIGNMENT (DFmode);
17125 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17126 to 16byte boundary. */
17129 if (AGGREGATE_TYPE_P (type)
17130 && TYPE_SIZE (type)
17131 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17132 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17133 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17136 if (TREE_CODE (type) == ARRAY_TYPE)
17138 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17140 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17143 else if (TREE_CODE (type) == COMPLEX_TYPE)
17145 if (TYPE_MODE (type) == DCmode && align < 64)
17147 if (TYPE_MODE (type) == XCmode && align < 128)
17150 else if ((TREE_CODE (type) == RECORD_TYPE
17151 || TREE_CODE (type) == UNION_TYPE
17152 || TREE_CODE (type) == QUAL_UNION_TYPE)
17153 && TYPE_FIELDS (type))
17155 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17157 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17160 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17161 || TREE_CODE (type) == INTEGER_TYPE)
17164 if (TYPE_MODE (type) == DFmode && align < 64)
17166 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17172 /* Emit RTL insns to initialize the variable parts of a trampoline.
17173 FNADDR is an RTX for the address of the function's pure code.
17174 CXT is an RTX for the static chain value for the function. */
17176 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17180 /* Compute offset from the end of the jmp to the target function. */
17181 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17182 plus_constant (tramp, 10),
17183 NULL_RTX, 1, OPTAB_DIRECT);
17184 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17185 gen_int_mode (0xb9, QImode));
17186 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17187 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17188 gen_int_mode (0xe9, QImode));
17189 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17194 /* Try to load address using shorter movl instead of movabs.
17195 We may want to support movq for kernel mode, but kernel does not use
17196 trampolines at the moment. */
17197 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17199 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17200 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17201 gen_int_mode (0xbb41, HImode));
17202 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17203 gen_lowpart (SImode, fnaddr));
17208 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17209 gen_int_mode (0xbb49, HImode));
17210 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17214 /* Load static chain using movabs to r10. */
17215 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17216 gen_int_mode (0xba49, HImode));
17217 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17220 /* Jump to the r11 */
17221 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17222 gen_int_mode (0xff49, HImode));
17223 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17224 gen_int_mode (0xe3, QImode));
17226 gcc_assert (offset <= TRAMPOLINE_SIZE);
17229 #ifdef ENABLE_EXECUTE_STACK
17230 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17231 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17235 /* Codes for all the SSE/MMX builtins. */
17238 IX86_BUILTIN_ADDPS,
17239 IX86_BUILTIN_ADDSS,
17240 IX86_BUILTIN_DIVPS,
17241 IX86_BUILTIN_DIVSS,
17242 IX86_BUILTIN_MULPS,
17243 IX86_BUILTIN_MULSS,
17244 IX86_BUILTIN_SUBPS,
17245 IX86_BUILTIN_SUBSS,
17247 IX86_BUILTIN_CMPEQPS,
17248 IX86_BUILTIN_CMPLTPS,
17249 IX86_BUILTIN_CMPLEPS,
17250 IX86_BUILTIN_CMPGTPS,
17251 IX86_BUILTIN_CMPGEPS,
17252 IX86_BUILTIN_CMPNEQPS,
17253 IX86_BUILTIN_CMPNLTPS,
17254 IX86_BUILTIN_CMPNLEPS,
17255 IX86_BUILTIN_CMPNGTPS,
17256 IX86_BUILTIN_CMPNGEPS,
17257 IX86_BUILTIN_CMPORDPS,
17258 IX86_BUILTIN_CMPUNORDPS,
17259 IX86_BUILTIN_CMPEQSS,
17260 IX86_BUILTIN_CMPLTSS,
17261 IX86_BUILTIN_CMPLESS,
17262 IX86_BUILTIN_CMPNEQSS,
17263 IX86_BUILTIN_CMPNLTSS,
17264 IX86_BUILTIN_CMPNLESS,
17265 IX86_BUILTIN_CMPNGTSS,
17266 IX86_BUILTIN_CMPNGESS,
17267 IX86_BUILTIN_CMPORDSS,
17268 IX86_BUILTIN_CMPUNORDSS,
17270 IX86_BUILTIN_COMIEQSS,
17271 IX86_BUILTIN_COMILTSS,
17272 IX86_BUILTIN_COMILESS,
17273 IX86_BUILTIN_COMIGTSS,
17274 IX86_BUILTIN_COMIGESS,
17275 IX86_BUILTIN_COMINEQSS,
17276 IX86_BUILTIN_UCOMIEQSS,
17277 IX86_BUILTIN_UCOMILTSS,
17278 IX86_BUILTIN_UCOMILESS,
17279 IX86_BUILTIN_UCOMIGTSS,
17280 IX86_BUILTIN_UCOMIGESS,
17281 IX86_BUILTIN_UCOMINEQSS,
17283 IX86_BUILTIN_CVTPI2PS,
17284 IX86_BUILTIN_CVTPS2PI,
17285 IX86_BUILTIN_CVTSI2SS,
17286 IX86_BUILTIN_CVTSI642SS,
17287 IX86_BUILTIN_CVTSS2SI,
17288 IX86_BUILTIN_CVTSS2SI64,
17289 IX86_BUILTIN_CVTTPS2PI,
17290 IX86_BUILTIN_CVTTSS2SI,
17291 IX86_BUILTIN_CVTTSS2SI64,
17293 IX86_BUILTIN_MAXPS,
17294 IX86_BUILTIN_MAXSS,
17295 IX86_BUILTIN_MINPS,
17296 IX86_BUILTIN_MINSS,
17298 IX86_BUILTIN_LOADUPS,
17299 IX86_BUILTIN_STOREUPS,
17300 IX86_BUILTIN_MOVSS,
17302 IX86_BUILTIN_MOVHLPS,
17303 IX86_BUILTIN_MOVLHPS,
17304 IX86_BUILTIN_LOADHPS,
17305 IX86_BUILTIN_LOADLPS,
17306 IX86_BUILTIN_STOREHPS,
17307 IX86_BUILTIN_STORELPS,
17309 IX86_BUILTIN_MASKMOVQ,
17310 IX86_BUILTIN_MOVMSKPS,
17311 IX86_BUILTIN_PMOVMSKB,
17313 IX86_BUILTIN_MOVNTPS,
17314 IX86_BUILTIN_MOVNTQ,
17316 IX86_BUILTIN_LOADDQU,
17317 IX86_BUILTIN_STOREDQU,
17319 IX86_BUILTIN_PACKSSWB,
17320 IX86_BUILTIN_PACKSSDW,
17321 IX86_BUILTIN_PACKUSWB,
17323 IX86_BUILTIN_PADDB,
17324 IX86_BUILTIN_PADDW,
17325 IX86_BUILTIN_PADDD,
17326 IX86_BUILTIN_PADDQ,
17327 IX86_BUILTIN_PADDSB,
17328 IX86_BUILTIN_PADDSW,
17329 IX86_BUILTIN_PADDUSB,
17330 IX86_BUILTIN_PADDUSW,
17331 IX86_BUILTIN_PSUBB,
17332 IX86_BUILTIN_PSUBW,
17333 IX86_BUILTIN_PSUBD,
17334 IX86_BUILTIN_PSUBQ,
17335 IX86_BUILTIN_PSUBSB,
17336 IX86_BUILTIN_PSUBSW,
17337 IX86_BUILTIN_PSUBUSB,
17338 IX86_BUILTIN_PSUBUSW,
17341 IX86_BUILTIN_PANDN,
17345 IX86_BUILTIN_PAVGB,
17346 IX86_BUILTIN_PAVGW,
17348 IX86_BUILTIN_PCMPEQB,
17349 IX86_BUILTIN_PCMPEQW,
17350 IX86_BUILTIN_PCMPEQD,
17351 IX86_BUILTIN_PCMPGTB,
17352 IX86_BUILTIN_PCMPGTW,
17353 IX86_BUILTIN_PCMPGTD,
17355 IX86_BUILTIN_PMADDWD,
17357 IX86_BUILTIN_PMAXSW,
17358 IX86_BUILTIN_PMAXUB,
17359 IX86_BUILTIN_PMINSW,
17360 IX86_BUILTIN_PMINUB,
17362 IX86_BUILTIN_PMULHUW,
17363 IX86_BUILTIN_PMULHW,
17364 IX86_BUILTIN_PMULLW,
17366 IX86_BUILTIN_PSADBW,
17367 IX86_BUILTIN_PSHUFW,
17369 IX86_BUILTIN_PSLLW,
17370 IX86_BUILTIN_PSLLD,
17371 IX86_BUILTIN_PSLLQ,
17372 IX86_BUILTIN_PSRAW,
17373 IX86_BUILTIN_PSRAD,
17374 IX86_BUILTIN_PSRLW,
17375 IX86_BUILTIN_PSRLD,
17376 IX86_BUILTIN_PSRLQ,
17377 IX86_BUILTIN_PSLLWI,
17378 IX86_BUILTIN_PSLLDI,
17379 IX86_BUILTIN_PSLLQI,
17380 IX86_BUILTIN_PSRAWI,
17381 IX86_BUILTIN_PSRADI,
17382 IX86_BUILTIN_PSRLWI,
17383 IX86_BUILTIN_PSRLDI,
17384 IX86_BUILTIN_PSRLQI,
17386 IX86_BUILTIN_PUNPCKHBW,
17387 IX86_BUILTIN_PUNPCKHWD,
17388 IX86_BUILTIN_PUNPCKHDQ,
17389 IX86_BUILTIN_PUNPCKLBW,
17390 IX86_BUILTIN_PUNPCKLWD,
17391 IX86_BUILTIN_PUNPCKLDQ,
17393 IX86_BUILTIN_SHUFPS,
17395 IX86_BUILTIN_RCPPS,
17396 IX86_BUILTIN_RCPSS,
17397 IX86_BUILTIN_RSQRTPS,
17398 IX86_BUILTIN_RSQRTPS_NR,
17399 IX86_BUILTIN_RSQRTSS,
17400 IX86_BUILTIN_RSQRTF,
17401 IX86_BUILTIN_SQRTPS,
17402 IX86_BUILTIN_SQRTPS_NR,
17403 IX86_BUILTIN_SQRTSS,
17405 IX86_BUILTIN_UNPCKHPS,
17406 IX86_BUILTIN_UNPCKLPS,
17408 IX86_BUILTIN_ANDPS,
17409 IX86_BUILTIN_ANDNPS,
17411 IX86_BUILTIN_XORPS,
17414 IX86_BUILTIN_LDMXCSR,
17415 IX86_BUILTIN_STMXCSR,
17416 IX86_BUILTIN_SFENCE,
17418 /* 3DNow! Original */
17419 IX86_BUILTIN_FEMMS,
17420 IX86_BUILTIN_PAVGUSB,
17421 IX86_BUILTIN_PF2ID,
17422 IX86_BUILTIN_PFACC,
17423 IX86_BUILTIN_PFADD,
17424 IX86_BUILTIN_PFCMPEQ,
17425 IX86_BUILTIN_PFCMPGE,
17426 IX86_BUILTIN_PFCMPGT,
17427 IX86_BUILTIN_PFMAX,
17428 IX86_BUILTIN_PFMIN,
17429 IX86_BUILTIN_PFMUL,
17430 IX86_BUILTIN_PFRCP,
17431 IX86_BUILTIN_PFRCPIT1,
17432 IX86_BUILTIN_PFRCPIT2,
17433 IX86_BUILTIN_PFRSQIT1,
17434 IX86_BUILTIN_PFRSQRT,
17435 IX86_BUILTIN_PFSUB,
17436 IX86_BUILTIN_PFSUBR,
17437 IX86_BUILTIN_PI2FD,
17438 IX86_BUILTIN_PMULHRW,
17440 /* 3DNow! Athlon Extensions */
17441 IX86_BUILTIN_PF2IW,
17442 IX86_BUILTIN_PFNACC,
17443 IX86_BUILTIN_PFPNACC,
17444 IX86_BUILTIN_PI2FW,
17445 IX86_BUILTIN_PSWAPDSI,
17446 IX86_BUILTIN_PSWAPDSF,
17449 IX86_BUILTIN_ADDPD,
17450 IX86_BUILTIN_ADDSD,
17451 IX86_BUILTIN_DIVPD,
17452 IX86_BUILTIN_DIVSD,
17453 IX86_BUILTIN_MULPD,
17454 IX86_BUILTIN_MULSD,
17455 IX86_BUILTIN_SUBPD,
17456 IX86_BUILTIN_SUBSD,
17458 IX86_BUILTIN_CMPEQPD,
17459 IX86_BUILTIN_CMPLTPD,
17460 IX86_BUILTIN_CMPLEPD,
17461 IX86_BUILTIN_CMPGTPD,
17462 IX86_BUILTIN_CMPGEPD,
17463 IX86_BUILTIN_CMPNEQPD,
17464 IX86_BUILTIN_CMPNLTPD,
17465 IX86_BUILTIN_CMPNLEPD,
17466 IX86_BUILTIN_CMPNGTPD,
17467 IX86_BUILTIN_CMPNGEPD,
17468 IX86_BUILTIN_CMPORDPD,
17469 IX86_BUILTIN_CMPUNORDPD,
17470 IX86_BUILTIN_CMPEQSD,
17471 IX86_BUILTIN_CMPLTSD,
17472 IX86_BUILTIN_CMPLESD,
17473 IX86_BUILTIN_CMPNEQSD,
17474 IX86_BUILTIN_CMPNLTSD,
17475 IX86_BUILTIN_CMPNLESD,
17476 IX86_BUILTIN_CMPORDSD,
17477 IX86_BUILTIN_CMPUNORDSD,
17479 IX86_BUILTIN_COMIEQSD,
17480 IX86_BUILTIN_COMILTSD,
17481 IX86_BUILTIN_COMILESD,
17482 IX86_BUILTIN_COMIGTSD,
17483 IX86_BUILTIN_COMIGESD,
17484 IX86_BUILTIN_COMINEQSD,
17485 IX86_BUILTIN_UCOMIEQSD,
17486 IX86_BUILTIN_UCOMILTSD,
17487 IX86_BUILTIN_UCOMILESD,
17488 IX86_BUILTIN_UCOMIGTSD,
17489 IX86_BUILTIN_UCOMIGESD,
17490 IX86_BUILTIN_UCOMINEQSD,
17492 IX86_BUILTIN_MAXPD,
17493 IX86_BUILTIN_MAXSD,
17494 IX86_BUILTIN_MINPD,
17495 IX86_BUILTIN_MINSD,
17497 IX86_BUILTIN_ANDPD,
17498 IX86_BUILTIN_ANDNPD,
17500 IX86_BUILTIN_XORPD,
17502 IX86_BUILTIN_SQRTPD,
17503 IX86_BUILTIN_SQRTSD,
17505 IX86_BUILTIN_UNPCKHPD,
17506 IX86_BUILTIN_UNPCKLPD,
17508 IX86_BUILTIN_SHUFPD,
17510 IX86_BUILTIN_LOADUPD,
17511 IX86_BUILTIN_STOREUPD,
17512 IX86_BUILTIN_MOVSD,
17514 IX86_BUILTIN_LOADHPD,
17515 IX86_BUILTIN_LOADLPD,
17517 IX86_BUILTIN_CVTDQ2PD,
17518 IX86_BUILTIN_CVTDQ2PS,
17520 IX86_BUILTIN_CVTPD2DQ,
17521 IX86_BUILTIN_CVTPD2PI,
17522 IX86_BUILTIN_CVTPD2PS,
17523 IX86_BUILTIN_CVTTPD2DQ,
17524 IX86_BUILTIN_CVTTPD2PI,
17526 IX86_BUILTIN_CVTPI2PD,
17527 IX86_BUILTIN_CVTSI2SD,
17528 IX86_BUILTIN_CVTSI642SD,
17530 IX86_BUILTIN_CVTSD2SI,
17531 IX86_BUILTIN_CVTSD2SI64,
17532 IX86_BUILTIN_CVTSD2SS,
17533 IX86_BUILTIN_CVTSS2SD,
17534 IX86_BUILTIN_CVTTSD2SI,
17535 IX86_BUILTIN_CVTTSD2SI64,
17537 IX86_BUILTIN_CVTPS2DQ,
17538 IX86_BUILTIN_CVTPS2PD,
17539 IX86_BUILTIN_CVTTPS2DQ,
17541 IX86_BUILTIN_MOVNTI,
17542 IX86_BUILTIN_MOVNTPD,
17543 IX86_BUILTIN_MOVNTDQ,
17546 IX86_BUILTIN_MASKMOVDQU,
17547 IX86_BUILTIN_MOVMSKPD,
17548 IX86_BUILTIN_PMOVMSKB128,
17550 IX86_BUILTIN_PACKSSWB128,
17551 IX86_BUILTIN_PACKSSDW128,
17552 IX86_BUILTIN_PACKUSWB128,
17554 IX86_BUILTIN_PADDB128,
17555 IX86_BUILTIN_PADDW128,
17556 IX86_BUILTIN_PADDD128,
17557 IX86_BUILTIN_PADDQ128,
17558 IX86_BUILTIN_PADDSB128,
17559 IX86_BUILTIN_PADDSW128,
17560 IX86_BUILTIN_PADDUSB128,
17561 IX86_BUILTIN_PADDUSW128,
17562 IX86_BUILTIN_PSUBB128,
17563 IX86_BUILTIN_PSUBW128,
17564 IX86_BUILTIN_PSUBD128,
17565 IX86_BUILTIN_PSUBQ128,
17566 IX86_BUILTIN_PSUBSB128,
17567 IX86_BUILTIN_PSUBSW128,
17568 IX86_BUILTIN_PSUBUSB128,
17569 IX86_BUILTIN_PSUBUSW128,
17571 IX86_BUILTIN_PAND128,
17572 IX86_BUILTIN_PANDN128,
17573 IX86_BUILTIN_POR128,
17574 IX86_BUILTIN_PXOR128,
17576 IX86_BUILTIN_PAVGB128,
17577 IX86_BUILTIN_PAVGW128,
17579 IX86_BUILTIN_PCMPEQB128,
17580 IX86_BUILTIN_PCMPEQW128,
17581 IX86_BUILTIN_PCMPEQD128,
17582 IX86_BUILTIN_PCMPGTB128,
17583 IX86_BUILTIN_PCMPGTW128,
17584 IX86_BUILTIN_PCMPGTD128,
17586 IX86_BUILTIN_PMADDWD128,
17588 IX86_BUILTIN_PMAXSW128,
17589 IX86_BUILTIN_PMAXUB128,
17590 IX86_BUILTIN_PMINSW128,
17591 IX86_BUILTIN_PMINUB128,
17593 IX86_BUILTIN_PMULUDQ,
17594 IX86_BUILTIN_PMULUDQ128,
17595 IX86_BUILTIN_PMULHUW128,
17596 IX86_BUILTIN_PMULHW128,
17597 IX86_BUILTIN_PMULLW128,
17599 IX86_BUILTIN_PSADBW128,
17600 IX86_BUILTIN_PSHUFHW,
17601 IX86_BUILTIN_PSHUFLW,
17602 IX86_BUILTIN_PSHUFD,
17604 IX86_BUILTIN_PSLLDQI128,
17605 IX86_BUILTIN_PSLLWI128,
17606 IX86_BUILTIN_PSLLDI128,
17607 IX86_BUILTIN_PSLLQI128,
17608 IX86_BUILTIN_PSRAWI128,
17609 IX86_BUILTIN_PSRADI128,
17610 IX86_BUILTIN_PSRLDQI128,
17611 IX86_BUILTIN_PSRLWI128,
17612 IX86_BUILTIN_PSRLDI128,
17613 IX86_BUILTIN_PSRLQI128,
17615 IX86_BUILTIN_PSLLDQ128,
17616 IX86_BUILTIN_PSLLW128,
17617 IX86_BUILTIN_PSLLD128,
17618 IX86_BUILTIN_PSLLQ128,
17619 IX86_BUILTIN_PSRAW128,
17620 IX86_BUILTIN_PSRAD128,
17621 IX86_BUILTIN_PSRLW128,
17622 IX86_BUILTIN_PSRLD128,
17623 IX86_BUILTIN_PSRLQ128,
17625 IX86_BUILTIN_PUNPCKHBW128,
17626 IX86_BUILTIN_PUNPCKHWD128,
17627 IX86_BUILTIN_PUNPCKHDQ128,
17628 IX86_BUILTIN_PUNPCKHQDQ128,
17629 IX86_BUILTIN_PUNPCKLBW128,
17630 IX86_BUILTIN_PUNPCKLWD128,
17631 IX86_BUILTIN_PUNPCKLDQ128,
17632 IX86_BUILTIN_PUNPCKLQDQ128,
17634 IX86_BUILTIN_CLFLUSH,
17635 IX86_BUILTIN_MFENCE,
17636 IX86_BUILTIN_LFENCE,
17639 IX86_BUILTIN_ADDSUBPS,
17640 IX86_BUILTIN_HADDPS,
17641 IX86_BUILTIN_HSUBPS,
17642 IX86_BUILTIN_MOVSHDUP,
17643 IX86_BUILTIN_MOVSLDUP,
17644 IX86_BUILTIN_ADDSUBPD,
17645 IX86_BUILTIN_HADDPD,
17646 IX86_BUILTIN_HSUBPD,
17647 IX86_BUILTIN_LDDQU,
17649 IX86_BUILTIN_MONITOR,
17650 IX86_BUILTIN_MWAIT,
17653 IX86_BUILTIN_PHADDW,
17654 IX86_BUILTIN_PHADDD,
17655 IX86_BUILTIN_PHADDSW,
17656 IX86_BUILTIN_PHSUBW,
17657 IX86_BUILTIN_PHSUBD,
17658 IX86_BUILTIN_PHSUBSW,
17659 IX86_BUILTIN_PMADDUBSW,
17660 IX86_BUILTIN_PMULHRSW,
17661 IX86_BUILTIN_PSHUFB,
17662 IX86_BUILTIN_PSIGNB,
17663 IX86_BUILTIN_PSIGNW,
17664 IX86_BUILTIN_PSIGND,
17665 IX86_BUILTIN_PALIGNR,
17666 IX86_BUILTIN_PABSB,
17667 IX86_BUILTIN_PABSW,
17668 IX86_BUILTIN_PABSD,
17670 IX86_BUILTIN_PHADDW128,
17671 IX86_BUILTIN_PHADDD128,
17672 IX86_BUILTIN_PHADDSW128,
17673 IX86_BUILTIN_PHSUBW128,
17674 IX86_BUILTIN_PHSUBD128,
17675 IX86_BUILTIN_PHSUBSW128,
17676 IX86_BUILTIN_PMADDUBSW128,
17677 IX86_BUILTIN_PMULHRSW128,
17678 IX86_BUILTIN_PSHUFB128,
17679 IX86_BUILTIN_PSIGNB128,
17680 IX86_BUILTIN_PSIGNW128,
17681 IX86_BUILTIN_PSIGND128,
17682 IX86_BUILTIN_PALIGNR128,
17683 IX86_BUILTIN_PABSB128,
17684 IX86_BUILTIN_PABSW128,
17685 IX86_BUILTIN_PABSD128,
17687 /* AMDFAM10 - SSE4A New Instructions. */
17688 IX86_BUILTIN_MOVNTSD,
17689 IX86_BUILTIN_MOVNTSS,
17690 IX86_BUILTIN_EXTRQI,
17691 IX86_BUILTIN_EXTRQ,
17692 IX86_BUILTIN_INSERTQI,
17693 IX86_BUILTIN_INSERTQ,
17696 IX86_BUILTIN_BLENDPD,
17697 IX86_BUILTIN_BLENDPS,
17698 IX86_BUILTIN_BLENDVPD,
17699 IX86_BUILTIN_BLENDVPS,
17700 IX86_BUILTIN_PBLENDVB128,
17701 IX86_BUILTIN_PBLENDW128,
17706 IX86_BUILTIN_INSERTPS128,
17708 IX86_BUILTIN_MOVNTDQA,
17709 IX86_BUILTIN_MPSADBW128,
17710 IX86_BUILTIN_PACKUSDW128,
17711 IX86_BUILTIN_PCMPEQQ,
17712 IX86_BUILTIN_PHMINPOSUW128,
17714 IX86_BUILTIN_PMAXSB128,
17715 IX86_BUILTIN_PMAXSD128,
17716 IX86_BUILTIN_PMAXUD128,
17717 IX86_BUILTIN_PMAXUW128,
17719 IX86_BUILTIN_PMINSB128,
17720 IX86_BUILTIN_PMINSD128,
17721 IX86_BUILTIN_PMINUD128,
17722 IX86_BUILTIN_PMINUW128,
17724 IX86_BUILTIN_PMOVSXBW128,
17725 IX86_BUILTIN_PMOVSXBD128,
17726 IX86_BUILTIN_PMOVSXBQ128,
17727 IX86_BUILTIN_PMOVSXWD128,
17728 IX86_BUILTIN_PMOVSXWQ128,
17729 IX86_BUILTIN_PMOVSXDQ128,
17731 IX86_BUILTIN_PMOVZXBW128,
17732 IX86_BUILTIN_PMOVZXBD128,
17733 IX86_BUILTIN_PMOVZXBQ128,
17734 IX86_BUILTIN_PMOVZXWD128,
17735 IX86_BUILTIN_PMOVZXWQ128,
17736 IX86_BUILTIN_PMOVZXDQ128,
17738 IX86_BUILTIN_PMULDQ128,
17739 IX86_BUILTIN_PMULLD128,
17741 IX86_BUILTIN_ROUNDPD,
17742 IX86_BUILTIN_ROUNDPS,
17743 IX86_BUILTIN_ROUNDSD,
17744 IX86_BUILTIN_ROUNDSS,
17746 IX86_BUILTIN_PTESTZ,
17747 IX86_BUILTIN_PTESTC,
17748 IX86_BUILTIN_PTESTNZC,
17750 IX86_BUILTIN_VEC_INIT_V2SI,
17751 IX86_BUILTIN_VEC_INIT_V4HI,
17752 IX86_BUILTIN_VEC_INIT_V8QI,
17753 IX86_BUILTIN_VEC_EXT_V2DF,
17754 IX86_BUILTIN_VEC_EXT_V2DI,
17755 IX86_BUILTIN_VEC_EXT_V4SF,
17756 IX86_BUILTIN_VEC_EXT_V4SI,
17757 IX86_BUILTIN_VEC_EXT_V8HI,
17758 IX86_BUILTIN_VEC_EXT_V2SI,
17759 IX86_BUILTIN_VEC_EXT_V4HI,
17760 IX86_BUILTIN_VEC_EXT_V16QI,
17761 IX86_BUILTIN_VEC_SET_V2DI,
17762 IX86_BUILTIN_VEC_SET_V4SF,
17763 IX86_BUILTIN_VEC_SET_V4SI,
17764 IX86_BUILTIN_VEC_SET_V8HI,
17765 IX86_BUILTIN_VEC_SET_V4HI,
17766 IX86_BUILTIN_VEC_SET_V16QI,
17768 IX86_BUILTIN_VEC_PACK_SFIX,
17771 IX86_BUILTIN_CRC32QI,
17772 IX86_BUILTIN_CRC32HI,
17773 IX86_BUILTIN_CRC32SI,
17774 IX86_BUILTIN_CRC32DI,
17776 IX86_BUILTIN_PCMPESTRI128,
17777 IX86_BUILTIN_PCMPESTRM128,
17778 IX86_BUILTIN_PCMPESTRA128,
17779 IX86_BUILTIN_PCMPESTRC128,
17780 IX86_BUILTIN_PCMPESTRO128,
17781 IX86_BUILTIN_PCMPESTRS128,
17782 IX86_BUILTIN_PCMPESTRZ128,
17783 IX86_BUILTIN_PCMPISTRI128,
17784 IX86_BUILTIN_PCMPISTRM128,
17785 IX86_BUILTIN_PCMPISTRA128,
17786 IX86_BUILTIN_PCMPISTRC128,
17787 IX86_BUILTIN_PCMPISTRO128,
17788 IX86_BUILTIN_PCMPISTRS128,
17789 IX86_BUILTIN_PCMPISTRZ128,
17791 IX86_BUILTIN_PCMPGTQ,
17793 /* AES instructions */
17794 IX86_BUILTIN_AESENC128,
17795 IX86_BUILTIN_AESENCLAST128,
17796 IX86_BUILTIN_AESDEC128,
17797 IX86_BUILTIN_AESDECLAST128,
17798 IX86_BUILTIN_AESIMC128,
17799 IX86_BUILTIN_AESKEYGENASSIST128,
17801 /* PCLMUL instruction */
17802 IX86_BUILTIN_PCLMULQDQ128,
17804 /* TFmode support builtins. */
17806 IX86_BUILTIN_FABSQ,
17807 IX86_BUILTIN_COPYSIGNQ,
17809 /* SSE5 instructions */
17810 IX86_BUILTIN_FMADDSS,
17811 IX86_BUILTIN_FMADDSD,
17812 IX86_BUILTIN_FMADDPS,
17813 IX86_BUILTIN_FMADDPD,
17814 IX86_BUILTIN_FMSUBSS,
17815 IX86_BUILTIN_FMSUBSD,
17816 IX86_BUILTIN_FMSUBPS,
17817 IX86_BUILTIN_FMSUBPD,
17818 IX86_BUILTIN_FNMADDSS,
17819 IX86_BUILTIN_FNMADDSD,
17820 IX86_BUILTIN_FNMADDPS,
17821 IX86_BUILTIN_FNMADDPD,
17822 IX86_BUILTIN_FNMSUBSS,
17823 IX86_BUILTIN_FNMSUBSD,
17824 IX86_BUILTIN_FNMSUBPS,
17825 IX86_BUILTIN_FNMSUBPD,
17826 IX86_BUILTIN_PCMOV_V2DI,
17827 IX86_BUILTIN_PCMOV_V4SI,
17828 IX86_BUILTIN_PCMOV_V8HI,
17829 IX86_BUILTIN_PCMOV_V16QI,
17830 IX86_BUILTIN_PCMOV_V4SF,
17831 IX86_BUILTIN_PCMOV_V2DF,
17832 IX86_BUILTIN_PPERM,
17833 IX86_BUILTIN_PERMPS,
17834 IX86_BUILTIN_PERMPD,
17835 IX86_BUILTIN_PMACSSWW,
17836 IX86_BUILTIN_PMACSWW,
17837 IX86_BUILTIN_PMACSSWD,
17838 IX86_BUILTIN_PMACSWD,
17839 IX86_BUILTIN_PMACSSDD,
17840 IX86_BUILTIN_PMACSDD,
17841 IX86_BUILTIN_PMACSSDQL,
17842 IX86_BUILTIN_PMACSSDQH,
17843 IX86_BUILTIN_PMACSDQL,
17844 IX86_BUILTIN_PMACSDQH,
17845 IX86_BUILTIN_PMADCSSWD,
17846 IX86_BUILTIN_PMADCSWD,
17847 IX86_BUILTIN_PHADDBW,
17848 IX86_BUILTIN_PHADDBD,
17849 IX86_BUILTIN_PHADDBQ,
17850 IX86_BUILTIN_PHADDWD,
17851 IX86_BUILTIN_PHADDWQ,
17852 IX86_BUILTIN_PHADDDQ,
17853 IX86_BUILTIN_PHADDUBW,
17854 IX86_BUILTIN_PHADDUBD,
17855 IX86_BUILTIN_PHADDUBQ,
17856 IX86_BUILTIN_PHADDUWD,
17857 IX86_BUILTIN_PHADDUWQ,
17858 IX86_BUILTIN_PHADDUDQ,
17859 IX86_BUILTIN_PHSUBBW,
17860 IX86_BUILTIN_PHSUBWD,
17861 IX86_BUILTIN_PHSUBDQ,
17862 IX86_BUILTIN_PROTB,
17863 IX86_BUILTIN_PROTW,
17864 IX86_BUILTIN_PROTD,
17865 IX86_BUILTIN_PROTQ,
17866 IX86_BUILTIN_PROTB_IMM,
17867 IX86_BUILTIN_PROTW_IMM,
17868 IX86_BUILTIN_PROTD_IMM,
17869 IX86_BUILTIN_PROTQ_IMM,
17870 IX86_BUILTIN_PSHLB,
17871 IX86_BUILTIN_PSHLW,
17872 IX86_BUILTIN_PSHLD,
17873 IX86_BUILTIN_PSHLQ,
17874 IX86_BUILTIN_PSHAB,
17875 IX86_BUILTIN_PSHAW,
17876 IX86_BUILTIN_PSHAD,
17877 IX86_BUILTIN_PSHAQ,
17878 IX86_BUILTIN_FRCZSS,
17879 IX86_BUILTIN_FRCZSD,
17880 IX86_BUILTIN_FRCZPS,
17881 IX86_BUILTIN_FRCZPD,
17882 IX86_BUILTIN_CVTPH2PS,
17883 IX86_BUILTIN_CVTPS2PH,
17885 IX86_BUILTIN_COMEQSS,
17886 IX86_BUILTIN_COMNESS,
17887 IX86_BUILTIN_COMLTSS,
17888 IX86_BUILTIN_COMLESS,
17889 IX86_BUILTIN_COMGTSS,
17890 IX86_BUILTIN_COMGESS,
17891 IX86_BUILTIN_COMUEQSS,
17892 IX86_BUILTIN_COMUNESS,
17893 IX86_BUILTIN_COMULTSS,
17894 IX86_BUILTIN_COMULESS,
17895 IX86_BUILTIN_COMUGTSS,
17896 IX86_BUILTIN_COMUGESS,
17897 IX86_BUILTIN_COMORDSS,
17898 IX86_BUILTIN_COMUNORDSS,
17899 IX86_BUILTIN_COMFALSESS,
17900 IX86_BUILTIN_COMTRUESS,
17902 IX86_BUILTIN_COMEQSD,
17903 IX86_BUILTIN_COMNESD,
17904 IX86_BUILTIN_COMLTSD,
17905 IX86_BUILTIN_COMLESD,
17906 IX86_BUILTIN_COMGTSD,
17907 IX86_BUILTIN_COMGESD,
17908 IX86_BUILTIN_COMUEQSD,
17909 IX86_BUILTIN_COMUNESD,
17910 IX86_BUILTIN_COMULTSD,
17911 IX86_BUILTIN_COMULESD,
17912 IX86_BUILTIN_COMUGTSD,
17913 IX86_BUILTIN_COMUGESD,
17914 IX86_BUILTIN_COMORDSD,
17915 IX86_BUILTIN_COMUNORDSD,
17916 IX86_BUILTIN_COMFALSESD,
17917 IX86_BUILTIN_COMTRUESD,
17919 IX86_BUILTIN_COMEQPS,
17920 IX86_BUILTIN_COMNEPS,
17921 IX86_BUILTIN_COMLTPS,
17922 IX86_BUILTIN_COMLEPS,
17923 IX86_BUILTIN_COMGTPS,
17924 IX86_BUILTIN_COMGEPS,
17925 IX86_BUILTIN_COMUEQPS,
17926 IX86_BUILTIN_COMUNEPS,
17927 IX86_BUILTIN_COMULTPS,
17928 IX86_BUILTIN_COMULEPS,
17929 IX86_BUILTIN_COMUGTPS,
17930 IX86_BUILTIN_COMUGEPS,
17931 IX86_BUILTIN_COMORDPS,
17932 IX86_BUILTIN_COMUNORDPS,
17933 IX86_BUILTIN_COMFALSEPS,
17934 IX86_BUILTIN_COMTRUEPS,
17936 IX86_BUILTIN_COMEQPD,
17937 IX86_BUILTIN_COMNEPD,
17938 IX86_BUILTIN_COMLTPD,
17939 IX86_BUILTIN_COMLEPD,
17940 IX86_BUILTIN_COMGTPD,
17941 IX86_BUILTIN_COMGEPD,
17942 IX86_BUILTIN_COMUEQPD,
17943 IX86_BUILTIN_COMUNEPD,
17944 IX86_BUILTIN_COMULTPD,
17945 IX86_BUILTIN_COMULEPD,
17946 IX86_BUILTIN_COMUGTPD,
17947 IX86_BUILTIN_COMUGEPD,
17948 IX86_BUILTIN_COMORDPD,
17949 IX86_BUILTIN_COMUNORDPD,
17950 IX86_BUILTIN_COMFALSEPD,
17951 IX86_BUILTIN_COMTRUEPD,
17953 IX86_BUILTIN_PCOMEQUB,
17954 IX86_BUILTIN_PCOMNEUB,
17955 IX86_BUILTIN_PCOMLTUB,
17956 IX86_BUILTIN_PCOMLEUB,
17957 IX86_BUILTIN_PCOMGTUB,
17958 IX86_BUILTIN_PCOMGEUB,
17959 IX86_BUILTIN_PCOMFALSEUB,
17960 IX86_BUILTIN_PCOMTRUEUB,
17961 IX86_BUILTIN_PCOMEQUW,
17962 IX86_BUILTIN_PCOMNEUW,
17963 IX86_BUILTIN_PCOMLTUW,
17964 IX86_BUILTIN_PCOMLEUW,
17965 IX86_BUILTIN_PCOMGTUW,
17966 IX86_BUILTIN_PCOMGEUW,
17967 IX86_BUILTIN_PCOMFALSEUW,
17968 IX86_BUILTIN_PCOMTRUEUW,
17969 IX86_BUILTIN_PCOMEQUD,
17970 IX86_BUILTIN_PCOMNEUD,
17971 IX86_BUILTIN_PCOMLTUD,
17972 IX86_BUILTIN_PCOMLEUD,
17973 IX86_BUILTIN_PCOMGTUD,
17974 IX86_BUILTIN_PCOMGEUD,
17975 IX86_BUILTIN_PCOMFALSEUD,
17976 IX86_BUILTIN_PCOMTRUEUD,
17977 IX86_BUILTIN_PCOMEQUQ,
17978 IX86_BUILTIN_PCOMNEUQ,
17979 IX86_BUILTIN_PCOMLTUQ,
17980 IX86_BUILTIN_PCOMLEUQ,
17981 IX86_BUILTIN_PCOMGTUQ,
17982 IX86_BUILTIN_PCOMGEUQ,
17983 IX86_BUILTIN_PCOMFALSEUQ,
17984 IX86_BUILTIN_PCOMTRUEUQ,
17986 IX86_BUILTIN_PCOMEQB,
17987 IX86_BUILTIN_PCOMNEB,
17988 IX86_BUILTIN_PCOMLTB,
17989 IX86_BUILTIN_PCOMLEB,
17990 IX86_BUILTIN_PCOMGTB,
17991 IX86_BUILTIN_PCOMGEB,
17992 IX86_BUILTIN_PCOMFALSEB,
17993 IX86_BUILTIN_PCOMTRUEB,
17994 IX86_BUILTIN_PCOMEQW,
17995 IX86_BUILTIN_PCOMNEW,
17996 IX86_BUILTIN_PCOMLTW,
17997 IX86_BUILTIN_PCOMLEW,
17998 IX86_BUILTIN_PCOMGTW,
17999 IX86_BUILTIN_PCOMGEW,
18000 IX86_BUILTIN_PCOMFALSEW,
18001 IX86_BUILTIN_PCOMTRUEW,
18002 IX86_BUILTIN_PCOMEQD,
18003 IX86_BUILTIN_PCOMNED,
18004 IX86_BUILTIN_PCOMLTD,
18005 IX86_BUILTIN_PCOMLED,
18006 IX86_BUILTIN_PCOMGTD,
18007 IX86_BUILTIN_PCOMGED,
18008 IX86_BUILTIN_PCOMFALSED,
18009 IX86_BUILTIN_PCOMTRUED,
18010 IX86_BUILTIN_PCOMEQQ,
18011 IX86_BUILTIN_PCOMNEQ,
18012 IX86_BUILTIN_PCOMLTQ,
18013 IX86_BUILTIN_PCOMLEQ,
18014 IX86_BUILTIN_PCOMGTQ,
18015 IX86_BUILTIN_PCOMGEQ,
18016 IX86_BUILTIN_PCOMFALSEQ,
18017 IX86_BUILTIN_PCOMTRUEQ,
18022 /* Table for the ix86 builtin decls. */
18023 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18025 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
18026 * if the target_flags include one of MASK. Stores the function decl
18027 * in the ix86_builtins array.
18028 * Returns the function decl or NULL_TREE, if the builtin was not added. */
18031 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18033 tree decl = NULL_TREE;
18035 if (mask & ix86_isa_flags
18036 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
18038 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18040 ix86_builtins[(int) code] = decl;
18046 /* Like def_builtin, but also marks the function decl "const". */
18049 def_builtin_const (int mask, const char *name, tree type,
18050 enum ix86_builtins code)
18052 tree decl = def_builtin (mask, name, type, code);
18054 TREE_READONLY (decl) = 1;
18058 /* Bits for builtin_description.flag. */
18060 /* Set when we don't support the comparison natively, and should
18061 swap_comparison in order to support it. */
18062 #define BUILTIN_DESC_SWAP_OPERANDS 1
18064 struct builtin_description
18066 const unsigned int mask;
18067 const enum insn_code icode;
18068 const char *const name;
18069 const enum ix86_builtins code;
18070 const enum rtx_code comparison;
18074 static const struct builtin_description bdesc_comi[] =
18076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
18102 static const struct builtin_description bdesc_pcmpestr[] =
18105 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18107 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18108 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18109 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18110 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18111 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18114 static const struct builtin_description bdesc_pcmpistr[] =
18117 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18119 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18120 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18121 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18122 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18123 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18126 /* Special builtin types */
18127 enum ix86_special_builtin_type
18129 SPECIAL_FTYPE_UNKNOWN,
18131 V16QI_FTYPE_PCCHAR,
18132 V4SF_FTYPE_PCFLOAT,
18133 V2DF_FTYPE_PCDOUBLE,
18134 V4SF_FTYPE_V4SF_PCV2SF,
18135 V2DF_FTYPE_V2DF_PCDOUBLE,
18137 VOID_FTYPE_PV2SF_V4SF,
18138 VOID_FTYPE_PV2DI_V2DI,
18139 VOID_FTYPE_PCHAR_V16QI,
18140 VOID_FTYPE_PFLOAT_V4SF,
18141 VOID_FTYPE_PDOUBLE_V2DF,
18143 VOID_FTYPE_PINT_INT
18146 /* Builtin types */
18147 enum ix86_builtin_type
18150 FLOAT128_FTYPE_FLOAT128,
18152 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18153 INT_FTYPE_V2DI_V2DI_PTEST,
18171 V4SF_FTYPE_V4SF_VEC_MERGE,
18179 V2DF_FTYPE_V2DF_VEC_MERGE,
18189 V16QI_FTYPE_V16QI_V16QI,
18190 V16QI_FTYPE_V8HI_V8HI,
18191 V8QI_FTYPE_V8QI_V8QI,
18192 V8QI_FTYPE_V4HI_V4HI,
18193 V8HI_FTYPE_V8HI_V8HI,
18194 V8HI_FTYPE_V8HI_V8HI_COUNT,
18195 V8HI_FTYPE_V16QI_V16QI,
18196 V8HI_FTYPE_V4SI_V4SI,
18197 V8HI_FTYPE_V8HI_SI_COUNT,
18198 V4SI_FTYPE_V4SI_V4SI,
18199 V4SI_FTYPE_V4SI_V4SI_COUNT,
18200 V4SI_FTYPE_V8HI_V8HI,
18201 V4SI_FTYPE_V4SF_V4SF,
18202 V4SI_FTYPE_V2DF_V2DF,
18203 V4SI_FTYPE_V4SI_SI_COUNT,
18204 V4HI_FTYPE_V4HI_V4HI,
18205 V4HI_FTYPE_V4HI_V4HI_COUNT,
18206 V4HI_FTYPE_V8QI_V8QI,
18207 V4HI_FTYPE_V2SI_V2SI,
18208 V4HI_FTYPE_V4HI_SI_COUNT,
18209 V4SF_FTYPE_V4SF_V4SF,
18210 V4SF_FTYPE_V4SF_V4SF_SWAP,
18211 V4SF_FTYPE_V4SF_V2SI,
18212 V4SF_FTYPE_V4SF_V2DF,
18213 V4SF_FTYPE_V4SF_DI,
18214 V4SF_FTYPE_V4SF_SI,
18215 V2DI_FTYPE_V2DI_V2DI,
18216 V2DI_FTYPE_V2DI_V2DI_COUNT,
18217 V2DI_FTYPE_V16QI_V16QI,
18218 V2DI_FTYPE_V4SI_V4SI,
18219 V2DI_FTYPE_V2DI_V16QI,
18220 V2DI_FTYPE_V2DF_V2DF,
18221 V2DI_FTYPE_V2DI_SI_COUNT,
18222 V2SI_FTYPE_V2SI_V2SI,
18223 V2SI_FTYPE_V2SI_V2SI_COUNT,
18224 V2SI_FTYPE_V4HI_V4HI,
18225 V2SI_FTYPE_V2SF_V2SF,
18226 V2SI_FTYPE_V2SI_SI_COUNT,
18227 V2DF_FTYPE_V2DF_V2DF,
18228 V2DF_FTYPE_V2DF_V2DF_SWAP,
18229 V2DF_FTYPE_V2DF_V4SF,
18230 V2DF_FTYPE_V2DF_DI,
18231 V2DF_FTYPE_V2DF_SI,
18232 V2SF_FTYPE_V2SF_V2SF,
18233 V1DI_FTYPE_V1DI_V1DI,
18234 V1DI_FTYPE_V1DI_V1DI_COUNT,
18235 V1DI_FTYPE_V8QI_V8QI,
18236 V1DI_FTYPE_V2SI_V2SI,
18237 V1DI_FTYPE_V1DI_SI_COUNT,
18238 UINT64_FTYPE_UINT64_UINT64,
18239 UINT_FTYPE_UINT_UINT,
18240 UINT_FTYPE_UINT_USHORT,
18241 UINT_FTYPE_UINT_UCHAR,
18242 V8HI_FTYPE_V8HI_INT,
18243 V4SI_FTYPE_V4SI_INT,
18244 V4HI_FTYPE_V4HI_INT,
18245 V4SF_FTYPE_V4SF_INT,
18246 V2DI_FTYPE_V2DI_INT,
18247 V2DI2TI_FTYPE_V2DI_INT,
18248 V2DF_FTYPE_V2DF_INT,
18249 V16QI_FTYPE_V16QI_V16QI_V16QI,
18250 V4SF_FTYPE_V4SF_V4SF_V4SF,
18251 V2DF_FTYPE_V2DF_V2DF_V2DF,
18252 V16QI_FTYPE_V16QI_V16QI_INT,
18253 V8HI_FTYPE_V8HI_V8HI_INT,
18254 V4SI_FTYPE_V4SI_V4SI_INT,
18255 V4SF_FTYPE_V4SF_V4SF_INT,
18256 V2DI_FTYPE_V2DI_V2DI_INT,
18257 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18258 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18259 V2DF_FTYPE_V2DF_V2DF_INT,
18260 V2DI_FTYPE_V2DI_UINT_UINT,
18261 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18264 /* Special builtins with variable number of arguments. */
18265 static const struct builtin_description bdesc_special_args[] =
18268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18283 /* SSE or 3DNow!A */
18284 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18285 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18302 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18305 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18308 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18309 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18312 /* Builtins with variable number of arguments. */
18313 static const struct builtin_description bdesc_args[] =
18316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18391 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18395 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18396 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18397 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18398 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18399 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18402 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18403 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18404 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18405 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18406 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18407 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18412 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18414 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18418 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18421 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18425 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18426 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18427 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18457 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18458 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18462 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18464 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18465 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18475 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18477 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18483 /* SSE MMX or 3Dnow!A */
18484 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18486 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18488 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18489 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18490 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18491 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18493 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18494 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18496 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18517 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18518 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18524 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18525 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18526 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18527 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18555 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18559 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18561 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18562 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18568 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18570 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18571 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18572 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18573 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18574 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18575 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18576 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18577 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18588 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18589 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18591 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18593 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18594 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18606 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18607 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18608 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18624 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18633 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18640 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18641 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18642 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18643 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18646 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18647 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18648 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18649 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18650 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18651 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18654 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18655 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18656 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18665 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18668 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18669 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18672 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18673 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18675 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18676 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18677 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18678 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18679 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18680 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18758 /* SSE4.1 and SSE5 */
18759 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18760 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18761 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18762 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18764 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18765 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18766 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18769 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18770 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18771 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18772 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18773 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18776 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18777 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18778 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18779 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18782 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18783 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18785 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18786 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18787 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18788 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18791 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18795 enum multi_arg_type {
18805 MULTI_ARG_3_PERMPS,
18806 MULTI_ARG_3_PERMPD,
18813 MULTI_ARG_2_DI_IMM,
18814 MULTI_ARG_2_SI_IMM,
18815 MULTI_ARG_2_HI_IMM,
18816 MULTI_ARG_2_QI_IMM,
18817 MULTI_ARG_2_SF_CMP,
18818 MULTI_ARG_2_DF_CMP,
18819 MULTI_ARG_2_DI_CMP,
18820 MULTI_ARG_2_SI_CMP,
18821 MULTI_ARG_2_HI_CMP,
18822 MULTI_ARG_2_QI_CMP,
18845 static const struct builtin_description bdesc_multi_arg[] =
18847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
19019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
19020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
19021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
19023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
19024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
19027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
19028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
19029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
19031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
19032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
19035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
19036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
19037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
19039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
19040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
19043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
19044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
19045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
19047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
19051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
19052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
19053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
19055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
19056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
19057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
19058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
19059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
19060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
19061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
19062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
19064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19083 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
19084 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
19087 ix86_init_mmx_sse_builtins (void)
19089 const struct builtin_description * d;
19092 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
19093 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19094 tree V1DI_type_node
19095 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
19096 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
19097 tree V2DI_type_node
19098 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
19099 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19100 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19101 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19102 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19103 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
19104 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19106 tree pchar_type_node = build_pointer_type (char_type_node);
19107 tree pcchar_type_node
19108 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19109 tree pfloat_type_node = build_pointer_type (float_type_node);
19110 tree pcfloat_type_node
19111 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19112 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19113 tree pcv2sf_type_node
19114 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19115 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19116 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19119 tree int_ftype_v4sf_v4sf
19120 = build_function_type_list (integer_type_node,
19121 V4SF_type_node, V4SF_type_node, NULL_TREE);
19122 tree v4si_ftype_v4sf_v4sf
19123 = build_function_type_list (V4SI_type_node,
19124 V4SF_type_node, V4SF_type_node, NULL_TREE);
19125 /* MMX/SSE/integer conversions. */
19126 tree int_ftype_v4sf
19127 = build_function_type_list (integer_type_node,
19128 V4SF_type_node, NULL_TREE);
19129 tree int64_ftype_v4sf
19130 = build_function_type_list (long_long_integer_type_node,
19131 V4SF_type_node, NULL_TREE);
19132 tree int_ftype_v8qi
19133 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19134 tree v4sf_ftype_v4sf_int
19135 = build_function_type_list (V4SF_type_node,
19136 V4SF_type_node, integer_type_node, NULL_TREE);
19137 tree v4sf_ftype_v4sf_int64
19138 = build_function_type_list (V4SF_type_node,
19139 V4SF_type_node, long_long_integer_type_node,
19141 tree v4sf_ftype_v4sf_v2si
19142 = build_function_type_list (V4SF_type_node,
19143 V4SF_type_node, V2SI_type_node, NULL_TREE);
19145 /* Miscellaneous. */
19146 tree v8qi_ftype_v4hi_v4hi
19147 = build_function_type_list (V8QI_type_node,
19148 V4HI_type_node, V4HI_type_node, NULL_TREE);
19149 tree v4hi_ftype_v2si_v2si
19150 = build_function_type_list (V4HI_type_node,
19151 V2SI_type_node, V2SI_type_node, NULL_TREE);
19152 tree v4sf_ftype_v4sf_v4sf_int
19153 = build_function_type_list (V4SF_type_node,
19154 V4SF_type_node, V4SF_type_node,
19155 integer_type_node, NULL_TREE);
19156 tree v2si_ftype_v4hi_v4hi
19157 = build_function_type_list (V2SI_type_node,
19158 V4HI_type_node, V4HI_type_node, NULL_TREE);
19159 tree v4hi_ftype_v4hi_int
19160 = build_function_type_list (V4HI_type_node,
19161 V4HI_type_node, integer_type_node, NULL_TREE);
19162 tree v2si_ftype_v2si_int
19163 = build_function_type_list (V2SI_type_node,
19164 V2SI_type_node, integer_type_node, NULL_TREE);
19165 tree v1di_ftype_v1di_int
19166 = build_function_type_list (V1DI_type_node,
19167 V1DI_type_node, integer_type_node, NULL_TREE);
19169 tree void_ftype_void
19170 = build_function_type (void_type_node, void_list_node);
19171 tree void_ftype_unsigned
19172 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19173 tree void_ftype_unsigned_unsigned
19174 = build_function_type_list (void_type_node, unsigned_type_node,
19175 unsigned_type_node, NULL_TREE);
19176 tree void_ftype_pcvoid_unsigned_unsigned
19177 = build_function_type_list (void_type_node, const_ptr_type_node,
19178 unsigned_type_node, unsigned_type_node,
19180 tree unsigned_ftype_void
19181 = build_function_type (unsigned_type_node, void_list_node);
19182 tree v2si_ftype_v4sf
19183 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19184 /* Loads/stores. */
19185 tree void_ftype_v8qi_v8qi_pchar
19186 = build_function_type_list (void_type_node,
19187 V8QI_type_node, V8QI_type_node,
19188 pchar_type_node, NULL_TREE);
19189 tree v4sf_ftype_pcfloat
19190 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19191 tree v4sf_ftype_v4sf_pcv2sf
19192 = build_function_type_list (V4SF_type_node,
19193 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19194 tree void_ftype_pv2sf_v4sf
19195 = build_function_type_list (void_type_node,
19196 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19197 tree void_ftype_pfloat_v4sf
19198 = build_function_type_list (void_type_node,
19199 pfloat_type_node, V4SF_type_node, NULL_TREE);
19200 tree void_ftype_pdi_di
19201 = build_function_type_list (void_type_node,
19202 pdi_type_node, long_long_unsigned_type_node,
19204 tree void_ftype_pv2di_v2di
19205 = build_function_type_list (void_type_node,
19206 pv2di_type_node, V2DI_type_node, NULL_TREE);
19207 /* Normal vector unops. */
19208 tree v4sf_ftype_v4sf
19209 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19210 tree v16qi_ftype_v16qi
19211 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19212 tree v8hi_ftype_v8hi
19213 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19214 tree v4si_ftype_v4si
19215 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19216 tree v8qi_ftype_v8qi
19217 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19218 tree v4hi_ftype_v4hi
19219 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19221 /* Normal vector binops. */
19222 tree v4sf_ftype_v4sf_v4sf
19223 = build_function_type_list (V4SF_type_node,
19224 V4SF_type_node, V4SF_type_node, NULL_TREE);
19225 tree v8qi_ftype_v8qi_v8qi
19226 = build_function_type_list (V8QI_type_node,
19227 V8QI_type_node, V8QI_type_node, NULL_TREE);
19228 tree v4hi_ftype_v4hi_v4hi
19229 = build_function_type_list (V4HI_type_node,
19230 V4HI_type_node, V4HI_type_node, NULL_TREE);
19231 tree v2si_ftype_v2si_v2si
19232 = build_function_type_list (V2SI_type_node,
19233 V2SI_type_node, V2SI_type_node, NULL_TREE);
19234 tree v1di_ftype_v1di_v1di
19235 = build_function_type_list (V1DI_type_node,
19236 V1DI_type_node, V1DI_type_node, NULL_TREE);
19237 tree v1di_ftype_v1di_v1di_int
19238 = build_function_type_list (V1DI_type_node,
19239 V1DI_type_node, V1DI_type_node,
19240 integer_type_node, NULL_TREE);
19241 tree v2si_ftype_v2sf
19242 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19243 tree v2sf_ftype_v2si
19244 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19245 tree v2si_ftype_v2si
19246 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19247 tree v2sf_ftype_v2sf
19248 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19249 tree v2sf_ftype_v2sf_v2sf
19250 = build_function_type_list (V2SF_type_node,
19251 V2SF_type_node, V2SF_type_node, NULL_TREE);
19252 tree v2si_ftype_v2sf_v2sf
19253 = build_function_type_list (V2SI_type_node,
19254 V2SF_type_node, V2SF_type_node, NULL_TREE);
19255 tree pint_type_node = build_pointer_type (integer_type_node);
19256 tree pdouble_type_node = build_pointer_type (double_type_node);
19257 tree pcdouble_type_node = build_pointer_type (
19258 build_type_variant (double_type_node, 1, 0));
19259 tree int_ftype_v2df_v2df
19260 = build_function_type_list (integer_type_node,
19261 V2DF_type_node, V2DF_type_node, NULL_TREE);
19263 tree void_ftype_pcvoid
19264 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19265 tree v4sf_ftype_v4si
19266 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19267 tree v4si_ftype_v4sf
19268 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19269 tree v2df_ftype_v4si
19270 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19271 tree v4si_ftype_v2df
19272 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19273 tree v4si_ftype_v2df_v2df
19274 = build_function_type_list (V4SI_type_node,
19275 V2DF_type_node, V2DF_type_node, NULL_TREE);
19276 tree v2si_ftype_v2df
19277 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19278 tree v4sf_ftype_v2df
19279 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19280 tree v2df_ftype_v2si
19281 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19282 tree v2df_ftype_v4sf
19283 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19284 tree int_ftype_v2df
19285 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19286 tree int64_ftype_v2df
19287 = build_function_type_list (long_long_integer_type_node,
19288 V2DF_type_node, NULL_TREE);
19289 tree v2df_ftype_v2df_int
19290 = build_function_type_list (V2DF_type_node,
19291 V2DF_type_node, integer_type_node, NULL_TREE);
19292 tree v2df_ftype_v2df_int64
19293 = build_function_type_list (V2DF_type_node,
19294 V2DF_type_node, long_long_integer_type_node,
19296 tree v4sf_ftype_v4sf_v2df
19297 = build_function_type_list (V4SF_type_node,
19298 V4SF_type_node, V2DF_type_node, NULL_TREE);
19299 tree v2df_ftype_v2df_v4sf
19300 = build_function_type_list (V2DF_type_node,
19301 V2DF_type_node, V4SF_type_node, NULL_TREE);
19302 tree v2df_ftype_v2df_v2df_int
19303 = build_function_type_list (V2DF_type_node,
19304 V2DF_type_node, V2DF_type_node,
19307 tree v2df_ftype_v2df_pcdouble
19308 = build_function_type_list (V2DF_type_node,
19309 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19310 tree void_ftype_pdouble_v2df
19311 = build_function_type_list (void_type_node,
19312 pdouble_type_node, V2DF_type_node, NULL_TREE);
19313 tree void_ftype_pint_int
19314 = build_function_type_list (void_type_node,
19315 pint_type_node, integer_type_node, NULL_TREE);
19316 tree void_ftype_v16qi_v16qi_pchar
19317 = build_function_type_list (void_type_node,
19318 V16QI_type_node, V16QI_type_node,
19319 pchar_type_node, NULL_TREE);
19320 tree v2df_ftype_pcdouble
19321 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19322 tree v2df_ftype_v2df_v2df
19323 = build_function_type_list (V2DF_type_node,
19324 V2DF_type_node, V2DF_type_node, NULL_TREE);
19325 tree v16qi_ftype_v16qi_v16qi
19326 = build_function_type_list (V16QI_type_node,
19327 V16QI_type_node, V16QI_type_node, NULL_TREE);
19328 tree v8hi_ftype_v8hi_v8hi
19329 = build_function_type_list (V8HI_type_node,
19330 V8HI_type_node, V8HI_type_node, NULL_TREE);
19331 tree v4si_ftype_v4si_v4si
19332 = build_function_type_list (V4SI_type_node,
19333 V4SI_type_node, V4SI_type_node, NULL_TREE);
19334 tree v2di_ftype_v2di_v2di
19335 = build_function_type_list (V2DI_type_node,
19336 V2DI_type_node, V2DI_type_node, NULL_TREE);
19337 tree v2di_ftype_v2df_v2df
19338 = build_function_type_list (V2DI_type_node,
19339 V2DF_type_node, V2DF_type_node, NULL_TREE);
19340 tree v2df_ftype_v2df
19341 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19342 tree v2di_ftype_v2di_int
19343 = build_function_type_list (V2DI_type_node,
19344 V2DI_type_node, integer_type_node, NULL_TREE);
19345 tree v2di_ftype_v2di_v2di_int
19346 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19347 V2DI_type_node, integer_type_node, NULL_TREE);
19348 tree v4si_ftype_v4si_int
19349 = build_function_type_list (V4SI_type_node,
19350 V4SI_type_node, integer_type_node, NULL_TREE);
19351 tree v8hi_ftype_v8hi_int
19352 = build_function_type_list (V8HI_type_node,
19353 V8HI_type_node, integer_type_node, NULL_TREE);
19354 tree v4si_ftype_v8hi_v8hi
19355 = build_function_type_list (V4SI_type_node,
19356 V8HI_type_node, V8HI_type_node, NULL_TREE);
19357 tree v1di_ftype_v8qi_v8qi
19358 = build_function_type_list (V1DI_type_node,
19359 V8QI_type_node, V8QI_type_node, NULL_TREE);
19360 tree v1di_ftype_v2si_v2si
19361 = build_function_type_list (V1DI_type_node,
19362 V2SI_type_node, V2SI_type_node, NULL_TREE);
19363 tree v2di_ftype_v16qi_v16qi
19364 = build_function_type_list (V2DI_type_node,
19365 V16QI_type_node, V16QI_type_node, NULL_TREE);
19366 tree v2di_ftype_v4si_v4si
19367 = build_function_type_list (V2DI_type_node,
19368 V4SI_type_node, V4SI_type_node, NULL_TREE);
19369 tree int_ftype_v16qi
19370 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19371 tree v16qi_ftype_pcchar
19372 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19373 tree void_ftype_pchar_v16qi
19374 = build_function_type_list (void_type_node,
19375 pchar_type_node, V16QI_type_node, NULL_TREE);
19377 tree v2di_ftype_v2di_unsigned_unsigned
19378 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19379 unsigned_type_node, unsigned_type_node,
19381 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19382 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19383 unsigned_type_node, unsigned_type_node,
19385 tree v2di_ftype_v2di_v16qi
19386 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19388 tree v2df_ftype_v2df_v2df_v2df
19389 = build_function_type_list (V2DF_type_node,
19390 V2DF_type_node, V2DF_type_node,
19391 V2DF_type_node, NULL_TREE);
19392 tree v4sf_ftype_v4sf_v4sf_v4sf
19393 = build_function_type_list (V4SF_type_node,
19394 V4SF_type_node, V4SF_type_node,
19395 V4SF_type_node, NULL_TREE);
19396 tree v8hi_ftype_v16qi
19397 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19399 tree v4si_ftype_v16qi
19400 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19402 tree v2di_ftype_v16qi
19403 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19405 tree v4si_ftype_v8hi
19406 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19408 tree v2di_ftype_v8hi
19409 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19411 tree v2di_ftype_v4si
19412 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19414 tree v2di_ftype_pv2di
19415 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19417 tree v16qi_ftype_v16qi_v16qi_int
19418 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19419 V16QI_type_node, integer_type_node,
19421 tree v16qi_ftype_v16qi_v16qi_v16qi
19422 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19423 V16QI_type_node, V16QI_type_node,
19425 tree v8hi_ftype_v8hi_v8hi_int
19426 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19427 V8HI_type_node, integer_type_node,
19429 tree v4si_ftype_v4si_v4si_int
19430 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19431 V4SI_type_node, integer_type_node,
19433 tree int_ftype_v2di_v2di
19434 = build_function_type_list (integer_type_node,
19435 V2DI_type_node, V2DI_type_node,
19437 tree int_ftype_v16qi_int_v16qi_int_int
19438 = build_function_type_list (integer_type_node,
19445 tree v16qi_ftype_v16qi_int_v16qi_int_int
19446 = build_function_type_list (V16QI_type_node,
19453 tree int_ftype_v16qi_v16qi_int
19454 = build_function_type_list (integer_type_node,
19460 /* SSE5 instructions */
19461 tree v2di_ftype_v2di_v2di_v2di
19462 = build_function_type_list (V2DI_type_node,
19468 tree v4si_ftype_v4si_v4si_v4si
19469 = build_function_type_list (V4SI_type_node,
19475 tree v4si_ftype_v4si_v4si_v2di
19476 = build_function_type_list (V4SI_type_node,
19482 tree v8hi_ftype_v8hi_v8hi_v8hi
19483 = build_function_type_list (V8HI_type_node,
19489 tree v8hi_ftype_v8hi_v8hi_v4si
19490 = build_function_type_list (V8HI_type_node,
19496 tree v2df_ftype_v2df_v2df_v16qi
19497 = build_function_type_list (V2DF_type_node,
19503 tree v4sf_ftype_v4sf_v4sf_v16qi
19504 = build_function_type_list (V4SF_type_node,
19510 tree v2di_ftype_v2di_si
19511 = build_function_type_list (V2DI_type_node,
19516 tree v4si_ftype_v4si_si
19517 = build_function_type_list (V4SI_type_node,
19522 tree v8hi_ftype_v8hi_si
19523 = build_function_type_list (V8HI_type_node,
19528 tree v16qi_ftype_v16qi_si
19529 = build_function_type_list (V16QI_type_node,
19533 tree v4sf_ftype_v4hi
19534 = build_function_type_list (V4SF_type_node,
19538 tree v4hi_ftype_v4sf
19539 = build_function_type_list (V4HI_type_node,
19543 tree v2di_ftype_v2di
19544 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19546 tree v16qi_ftype_v8hi_v8hi
19547 = build_function_type_list (V16QI_type_node,
19548 V8HI_type_node, V8HI_type_node,
19550 tree v8hi_ftype_v4si_v4si
19551 = build_function_type_list (V8HI_type_node,
19552 V4SI_type_node, V4SI_type_node,
19554 tree v8hi_ftype_v16qi_v16qi
19555 = build_function_type_list (V8HI_type_node,
19556 V16QI_type_node, V16QI_type_node,
19558 tree v4hi_ftype_v8qi_v8qi
19559 = build_function_type_list (V4HI_type_node,
19560 V8QI_type_node, V8QI_type_node,
19562 tree unsigned_ftype_unsigned_uchar
19563 = build_function_type_list (unsigned_type_node,
19564 unsigned_type_node,
19565 unsigned_char_type_node,
19567 tree unsigned_ftype_unsigned_ushort
19568 = build_function_type_list (unsigned_type_node,
19569 unsigned_type_node,
19570 short_unsigned_type_node,
19572 tree unsigned_ftype_unsigned_unsigned
19573 = build_function_type_list (unsigned_type_node,
19574 unsigned_type_node,
19575 unsigned_type_node,
19577 tree uint64_ftype_uint64_uint64
19578 = build_function_type_list (long_long_unsigned_type_node,
19579 long_long_unsigned_type_node,
19580 long_long_unsigned_type_node,
19582 tree float_ftype_float
19583 = build_function_type_list (float_type_node,
19589 /* Add all special builtins with variable number of operands. */
19590 for (i = 0, d = bdesc_special_args;
19591 i < ARRAY_SIZE (bdesc_special_args);
19599 switch ((enum ix86_special_builtin_type) d->flag)
19601 case VOID_FTYPE_VOID:
19602 type = void_ftype_void;
19604 case V16QI_FTYPE_PCCHAR:
19605 type = v16qi_ftype_pcchar;
19607 case V4SF_FTYPE_PCFLOAT:
19608 type = v4sf_ftype_pcfloat;
19610 case V2DI_FTYPE_PV2DI:
19611 type = v2di_ftype_pv2di;
19613 case V2DF_FTYPE_PCDOUBLE:
19614 type = v2df_ftype_pcdouble;
19616 case V4SF_FTYPE_V4SF_PCV2SF:
19617 type = v4sf_ftype_v4sf_pcv2sf;
19619 case V2DF_FTYPE_V2DF_PCDOUBLE:
19620 type = v2df_ftype_v2df_pcdouble;
19622 case VOID_FTYPE_PV2SF_V4SF:
19623 type = void_ftype_pv2sf_v4sf;
19625 case VOID_FTYPE_PV2DI_V2DI:
19626 type = void_ftype_pv2di_v2di;
19628 case VOID_FTYPE_PCHAR_V16QI:
19629 type = void_ftype_pchar_v16qi;
19631 case VOID_FTYPE_PFLOAT_V4SF:
19632 type = void_ftype_pfloat_v4sf;
19634 case VOID_FTYPE_PDOUBLE_V2DF:
19635 type = void_ftype_pdouble_v2df;
19637 case VOID_FTYPE_PDI_DI:
19638 type = void_ftype_pdi_di;
19640 case VOID_FTYPE_PINT_INT:
19641 type = void_ftype_pint_int;
19644 gcc_unreachable ();
19647 def_builtin (d->mask, d->name, type, d->code);
19650 /* Add all builtins with variable number of operands. */
19651 for (i = 0, d = bdesc_args;
19652 i < ARRAY_SIZE (bdesc_args);
19660 switch ((enum ix86_builtin_type) d->flag)
19662 case FLOAT_FTYPE_FLOAT:
19663 type = float_ftype_float;
19665 case INT_FTYPE_V2DI_V2DI_PTEST:
19666 type = int_ftype_v2di_v2di;
19668 case INT64_FTYPE_V4SF:
19669 type = int64_ftype_v4sf;
19671 case INT64_FTYPE_V2DF:
19672 type = int64_ftype_v2df;
19674 case INT_FTYPE_V16QI:
19675 type = int_ftype_v16qi;
19677 case INT_FTYPE_V8QI:
19678 type = int_ftype_v8qi;
19680 case INT_FTYPE_V4SF:
19681 type = int_ftype_v4sf;
19683 case INT_FTYPE_V2DF:
19684 type = int_ftype_v2df;
19686 case V16QI_FTYPE_V16QI:
19687 type = v16qi_ftype_v16qi;
19689 case V8HI_FTYPE_V8HI:
19690 type = v8hi_ftype_v8hi;
19692 case V8HI_FTYPE_V16QI:
19693 type = v8hi_ftype_v16qi;
19695 case V8QI_FTYPE_V8QI:
19696 type = v8qi_ftype_v8qi;
19698 case V4SI_FTYPE_V4SI:
19699 type = v4si_ftype_v4si;
19701 case V4SI_FTYPE_V16QI:
19702 type = v4si_ftype_v16qi;
19704 case V4SI_FTYPE_V8HI:
19705 type = v4si_ftype_v8hi;
19707 case V4SI_FTYPE_V4SF:
19708 type = v4si_ftype_v4sf;
19710 case V4SI_FTYPE_V2DF:
19711 type = v4si_ftype_v2df;
19713 case V4HI_FTYPE_V4HI:
19714 type = v4hi_ftype_v4hi;
19716 case V4SF_FTYPE_V4SF:
19717 case V4SF_FTYPE_V4SF_VEC_MERGE:
19718 type = v4sf_ftype_v4sf;
19720 case V4SF_FTYPE_V4SI:
19721 type = v4sf_ftype_v4si;
19723 case V4SF_FTYPE_V2DF:
19724 type = v4sf_ftype_v2df;
19726 case V2DI_FTYPE_V2DI:
19727 type = v2di_ftype_v2di;
19729 case V2DI_FTYPE_V16QI:
19730 type = v2di_ftype_v16qi;
19732 case V2DI_FTYPE_V8HI:
19733 type = v2di_ftype_v8hi;
19735 case V2DI_FTYPE_V4SI:
19736 type = v2di_ftype_v4si;
19738 case V2SI_FTYPE_V2SI:
19739 type = v2si_ftype_v2si;
19741 case V2SI_FTYPE_V4SF:
19742 type = v2si_ftype_v4sf;
19744 case V2SI_FTYPE_V2DF:
19745 type = v2si_ftype_v2df;
19747 case V2SI_FTYPE_V2SF:
19748 type = v2si_ftype_v2sf;
19750 case V2DF_FTYPE_V4SF:
19751 type = v2df_ftype_v4sf;
19753 case V2DF_FTYPE_V2DF:
19754 case V2DF_FTYPE_V2DF_VEC_MERGE:
19755 type = v2df_ftype_v2df;
19757 case V2DF_FTYPE_V2SI:
19758 type = v2df_ftype_v2si;
19760 case V2DF_FTYPE_V4SI:
19761 type = v2df_ftype_v4si;
19763 case V2SF_FTYPE_V2SF:
19764 type = v2sf_ftype_v2sf;
19766 case V2SF_FTYPE_V2SI:
19767 type = v2sf_ftype_v2si;
19769 case V16QI_FTYPE_V16QI_V16QI:
19770 type = v16qi_ftype_v16qi_v16qi;
19772 case V16QI_FTYPE_V8HI_V8HI:
19773 type = v16qi_ftype_v8hi_v8hi;
19775 case V8QI_FTYPE_V8QI_V8QI:
19776 type = v8qi_ftype_v8qi_v8qi;
19778 case V8QI_FTYPE_V4HI_V4HI:
19779 type = v8qi_ftype_v4hi_v4hi;
19781 case V8HI_FTYPE_V8HI_V8HI:
19782 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19783 type = v8hi_ftype_v8hi_v8hi;
19785 case V8HI_FTYPE_V16QI_V16QI:
19786 type = v8hi_ftype_v16qi_v16qi;
19788 case V8HI_FTYPE_V4SI_V4SI:
19789 type = v8hi_ftype_v4si_v4si;
19791 case V8HI_FTYPE_V8HI_SI_COUNT:
19792 type = v8hi_ftype_v8hi_int;
19794 case V4SI_FTYPE_V4SI_V4SI:
19795 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19796 type = v4si_ftype_v4si_v4si;
19798 case V4SI_FTYPE_V8HI_V8HI:
19799 type = v4si_ftype_v8hi_v8hi;
19801 case V4SI_FTYPE_V4SF_V4SF:
19802 type = v4si_ftype_v4sf_v4sf;
19804 case V4SI_FTYPE_V2DF_V2DF:
19805 type = v4si_ftype_v2df_v2df;
19807 case V4SI_FTYPE_V4SI_SI_COUNT:
19808 type = v4si_ftype_v4si_int;
19810 case V4HI_FTYPE_V4HI_V4HI:
19811 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19812 type = v4hi_ftype_v4hi_v4hi;
19814 case V4HI_FTYPE_V8QI_V8QI:
19815 type = v4hi_ftype_v8qi_v8qi;
19817 case V4HI_FTYPE_V2SI_V2SI:
19818 type = v4hi_ftype_v2si_v2si;
19820 case V4HI_FTYPE_V4HI_SI_COUNT:
19821 type = v4hi_ftype_v4hi_int;
19823 case V4SF_FTYPE_V4SF_V4SF:
19824 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19825 type = v4sf_ftype_v4sf_v4sf;
19827 case V4SF_FTYPE_V4SF_V2SI:
19828 type = v4sf_ftype_v4sf_v2si;
19830 case V4SF_FTYPE_V4SF_V2DF:
19831 type = v4sf_ftype_v4sf_v2df;
19833 case V4SF_FTYPE_V4SF_DI:
19834 type = v4sf_ftype_v4sf_int64;
19836 case V4SF_FTYPE_V4SF_SI:
19837 type = v4sf_ftype_v4sf_int;
19839 case V2DI_FTYPE_V2DI_V2DI:
19840 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19841 type = v2di_ftype_v2di_v2di;
19843 case V2DI_FTYPE_V16QI_V16QI:
19844 type = v2di_ftype_v16qi_v16qi;
19846 case V2DI_FTYPE_V4SI_V4SI:
19847 type = v2di_ftype_v4si_v4si;
19849 case V2DI_FTYPE_V2DI_V16QI:
19850 type = v2di_ftype_v2di_v16qi;
19852 case V2DI_FTYPE_V2DF_V2DF:
19853 type = v2di_ftype_v2df_v2df;
19855 case V2DI_FTYPE_V2DI_SI_COUNT:
19856 type = v2di_ftype_v2di_int;
19858 case V2SI_FTYPE_V2SI_V2SI:
19859 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19860 type = v2si_ftype_v2si_v2si;
19862 case V2SI_FTYPE_V4HI_V4HI:
19863 type = v2si_ftype_v4hi_v4hi;
19865 case V2SI_FTYPE_V2SF_V2SF:
19866 type = v2si_ftype_v2sf_v2sf;
19868 case V2SI_FTYPE_V2SI_SI_COUNT:
19869 type = v2si_ftype_v2si_int;
19871 case V2DF_FTYPE_V2DF_V2DF:
19872 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19873 type = v2df_ftype_v2df_v2df;
19875 case V2DF_FTYPE_V2DF_V4SF:
19876 type = v2df_ftype_v2df_v4sf;
19878 case V2DF_FTYPE_V2DF_DI:
19879 type = v2df_ftype_v2df_int64;
19881 case V2DF_FTYPE_V2DF_SI:
19882 type = v2df_ftype_v2df_int;
19884 case V2SF_FTYPE_V2SF_V2SF:
19885 type = v2sf_ftype_v2sf_v2sf;
19887 case V1DI_FTYPE_V1DI_V1DI:
19888 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19889 type = v1di_ftype_v1di_v1di;
19891 case V1DI_FTYPE_V8QI_V8QI:
19892 type = v1di_ftype_v8qi_v8qi;
19894 case V1DI_FTYPE_V2SI_V2SI:
19895 type = v1di_ftype_v2si_v2si;
19897 case V1DI_FTYPE_V1DI_SI_COUNT:
19898 type = v1di_ftype_v1di_int;
19900 case UINT64_FTYPE_UINT64_UINT64:
19901 type = uint64_ftype_uint64_uint64;
19903 case UINT_FTYPE_UINT_UINT:
19904 type = unsigned_ftype_unsigned_unsigned;
19906 case UINT_FTYPE_UINT_USHORT:
19907 type = unsigned_ftype_unsigned_ushort;
19909 case UINT_FTYPE_UINT_UCHAR:
19910 type = unsigned_ftype_unsigned_uchar;
19912 case V8HI_FTYPE_V8HI_INT:
19913 type = v8hi_ftype_v8hi_int;
19915 case V4SI_FTYPE_V4SI_INT:
19916 type = v4si_ftype_v4si_int;
19918 case V4HI_FTYPE_V4HI_INT:
19919 type = v4hi_ftype_v4hi_int;
19921 case V4SF_FTYPE_V4SF_INT:
19922 type = v4sf_ftype_v4sf_int;
19924 case V2DI_FTYPE_V2DI_INT:
19925 case V2DI2TI_FTYPE_V2DI_INT:
19926 type = v2di_ftype_v2di_int;
19928 case V2DF_FTYPE_V2DF_INT:
19929 type = v2df_ftype_v2df_int;
19931 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19932 type = v16qi_ftype_v16qi_v16qi_v16qi;
19934 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19935 type = v4sf_ftype_v4sf_v4sf_v4sf;
19937 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19938 type = v2df_ftype_v2df_v2df_v2df;
19940 case V16QI_FTYPE_V16QI_V16QI_INT:
19941 type = v16qi_ftype_v16qi_v16qi_int;
19943 case V8HI_FTYPE_V8HI_V8HI_INT:
19944 type = v8hi_ftype_v8hi_v8hi_int;
19946 case V4SI_FTYPE_V4SI_V4SI_INT:
19947 type = v4si_ftype_v4si_v4si_int;
19949 case V4SF_FTYPE_V4SF_V4SF_INT:
19950 type = v4sf_ftype_v4sf_v4sf_int;
19952 case V2DI_FTYPE_V2DI_V2DI_INT:
19953 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
19954 type = v2di_ftype_v2di_v2di_int;
19956 case V2DF_FTYPE_V2DF_V2DF_INT:
19957 type = v2df_ftype_v2df_v2df_int;
19959 case V2DI_FTYPE_V2DI_UINT_UINT:
19960 type = v2di_ftype_v2di_unsigned_unsigned;
19962 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
19963 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
19965 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
19966 type = v1di_ftype_v1di_v1di_int;
19969 gcc_unreachable ();
19972 def_builtin_const (d->mask, d->name, type, d->code);
19975 /* pcmpestr[im] insns. */
19976 for (i = 0, d = bdesc_pcmpestr;
19977 i < ARRAY_SIZE (bdesc_pcmpestr);
19980 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19981 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19983 ftype = int_ftype_v16qi_int_v16qi_int_int;
19984 def_builtin_const (d->mask, d->name, ftype, d->code);
19987 /* pcmpistr[im] insns. */
19988 for (i = 0, d = bdesc_pcmpistr;
19989 i < ARRAY_SIZE (bdesc_pcmpistr);
19992 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19993 ftype = v16qi_ftype_v16qi_v16qi_int;
19995 ftype = int_ftype_v16qi_v16qi_int;
19996 def_builtin_const (d->mask, d->name, ftype, d->code);
19999 /* comi/ucomi insns. */
20000 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20001 if (d->mask == OPTION_MASK_ISA_SSE2)
20002 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20004 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20007 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20008 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20010 /* SSE or 3DNow!A */
20011 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20014 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
20016 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
20017 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
20020 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20021 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
20026 /* Define AES built-in functions only if AES is enabled. */
20027 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20028 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20029 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20030 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20031 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20032 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20038 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
20039 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20042 /* Access to the vec_init patterns. */
20043 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20044 integer_type_node, NULL_TREE);
20045 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
20047 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20048 short_integer_type_node,
20049 short_integer_type_node,
20050 short_integer_type_node, NULL_TREE);
20051 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
20053 ftype = build_function_type_list (V8QI_type_node, char_type_node,
20054 char_type_node, char_type_node,
20055 char_type_node, char_type_node,
20056 char_type_node, char_type_node,
20057 char_type_node, NULL_TREE);
20058 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
20060 /* Access to the vec_extract patterns. */
20061 ftype = build_function_type_list (double_type_node, V2DF_type_node,
20062 integer_type_node, NULL_TREE);
20063 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
20065 ftype = build_function_type_list (long_long_integer_type_node,
20066 V2DI_type_node, integer_type_node,
20068 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20070 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20071 integer_type_node, NULL_TREE);
20072 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20074 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20075 integer_type_node, NULL_TREE);
20076 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20078 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20079 integer_type_node, NULL_TREE);
20080 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20082 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20083 integer_type_node, NULL_TREE);
20084 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20086 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20087 integer_type_node, NULL_TREE);
20088 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20090 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20091 integer_type_node, NULL_TREE);
20092 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20094 /* Access to the vec_set patterns. */
20095 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20097 integer_type_node, NULL_TREE);
20098 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20100 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20102 integer_type_node, NULL_TREE);
20103 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20105 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20107 integer_type_node, NULL_TREE);
20108 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20110 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20112 integer_type_node, NULL_TREE);
20113 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20115 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20117 integer_type_node, NULL_TREE);
20118 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20120 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20122 integer_type_node, NULL_TREE);
20123 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20125 /* Add SSE5 multi-arg argument instructions */
20126 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20128 tree mtype = NULL_TREE;
20133 switch ((enum multi_arg_type)d->flag)
20135 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20136 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20137 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20138 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20139 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20140 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20141 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20142 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20143 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20144 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20145 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20146 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20147 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20148 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20149 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20150 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20151 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20152 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20153 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20154 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20155 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20156 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20157 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20158 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20159 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20160 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20161 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20162 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20163 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20164 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20165 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20166 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20167 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20168 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20169 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20170 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20171 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20172 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20173 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20174 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20175 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20176 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20177 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20178 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20179 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20180 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20181 case MULTI_ARG_UNKNOWN:
20183 gcc_unreachable ();
20187 def_builtin_const (d->mask, d->name, mtype, d->code);
20192 ix86_init_builtins (void)
20194 tree float128_type_node = make_node (REAL_TYPE);
20197 /* The __float80 type. */
20198 if (TYPE_MODE (long_double_type_node) == XFmode)
20199 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
20203 /* The __float80 type. */
20204 tree float80_type_node = make_node (REAL_TYPE);
20206 TYPE_PRECISION (float80_type_node) = 80;
20207 layout_type (float80_type_node);
20208 (*lang_hooks.types.register_builtin_type) (float80_type_node,
20212 /* The __float128 type. */
20213 TYPE_PRECISION (float128_type_node) = 128;
20214 layout_type (float128_type_node);
20215 (*lang_hooks.types.register_builtin_type) (float128_type_node,
20218 /* TFmode support builtins. */
20219 ftype = build_function_type (float128_type_node, void_list_node);
20220 decl = add_builtin_function ("__builtin_infq", ftype,
20221 IX86_BUILTIN_INFQ, BUILT_IN_MD,
20223 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
20225 if (HOST_BITS_PER_WIDE_INT >= 64)
20227 /* Those builtins need TImode to compile. */
20228 ftype = build_function_type_list (float128_type_node,
20229 float128_type_node,
20231 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
20233 ftype = build_function_type_list (float128_type_node,
20234 float128_type_node,
20235 float128_type_node,
20237 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
20241 ix86_init_mmx_sse_builtins ();
20244 /* Errors in the source file can cause expand_expr to return const0_rtx
20245 where we expect a vector. To avoid crashing, use one of the vector
20246 clear instructions. */
20248 safe_vector_operand (rtx x, enum machine_mode mode)
20250 if (x == const0_rtx)
20251 x = CONST0_RTX (mode);
20255 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20258 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20261 tree arg0 = CALL_EXPR_ARG (exp, 0);
20262 tree arg1 = CALL_EXPR_ARG (exp, 1);
20263 rtx op0 = expand_normal (arg0);
20264 rtx op1 = expand_normal (arg1);
20265 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20266 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20267 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20269 if (VECTOR_MODE_P (mode0))
20270 op0 = safe_vector_operand (op0, mode0);
20271 if (VECTOR_MODE_P (mode1))
20272 op1 = safe_vector_operand (op1, mode1);
20274 if (optimize || !target
20275 || GET_MODE (target) != tmode
20276 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20277 target = gen_reg_rtx (tmode);
20279 if (GET_MODE (op1) == SImode && mode1 == TImode)
20281 rtx x = gen_reg_rtx (V4SImode);
20282 emit_insn (gen_sse2_loadd (x, op1));
20283 op1 = gen_lowpart (TImode, x);
20286 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20287 op0 = copy_to_mode_reg (mode0, op0);
20288 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20289 op1 = copy_to_mode_reg (mode1, op1);
20291 pat = GEN_FCN (icode) (target, op0, op1);
20300 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20303 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20304 enum multi_arg_type m_type,
20305 enum insn_code sub_code)
20310 bool comparison_p = false;
20312 bool last_arg_constant = false;
20313 int num_memory = 0;
20316 enum machine_mode mode;
20319 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20323 case MULTI_ARG_3_SF:
20324 case MULTI_ARG_3_DF:
20325 case MULTI_ARG_3_DI:
20326 case MULTI_ARG_3_SI:
20327 case MULTI_ARG_3_SI_DI:
20328 case MULTI_ARG_3_HI:
20329 case MULTI_ARG_3_HI_SI:
20330 case MULTI_ARG_3_QI:
20331 case MULTI_ARG_3_PERMPS:
20332 case MULTI_ARG_3_PERMPD:
20336 case MULTI_ARG_2_SF:
20337 case MULTI_ARG_2_DF:
20338 case MULTI_ARG_2_DI:
20339 case MULTI_ARG_2_SI:
20340 case MULTI_ARG_2_HI:
20341 case MULTI_ARG_2_QI:
20345 case MULTI_ARG_2_DI_IMM:
20346 case MULTI_ARG_2_SI_IMM:
20347 case MULTI_ARG_2_HI_IMM:
20348 case MULTI_ARG_2_QI_IMM:
20350 last_arg_constant = true;
20353 case MULTI_ARG_1_SF:
20354 case MULTI_ARG_1_DF:
20355 case MULTI_ARG_1_DI:
20356 case MULTI_ARG_1_SI:
20357 case MULTI_ARG_1_HI:
20358 case MULTI_ARG_1_QI:
20359 case MULTI_ARG_1_SI_DI:
20360 case MULTI_ARG_1_HI_DI:
20361 case MULTI_ARG_1_HI_SI:
20362 case MULTI_ARG_1_QI_DI:
20363 case MULTI_ARG_1_QI_SI:
20364 case MULTI_ARG_1_QI_HI:
20365 case MULTI_ARG_1_PH2PS:
20366 case MULTI_ARG_1_PS2PH:
20370 case MULTI_ARG_2_SF_CMP:
20371 case MULTI_ARG_2_DF_CMP:
20372 case MULTI_ARG_2_DI_CMP:
20373 case MULTI_ARG_2_SI_CMP:
20374 case MULTI_ARG_2_HI_CMP:
20375 case MULTI_ARG_2_QI_CMP:
20377 comparison_p = true;
20380 case MULTI_ARG_2_SF_TF:
20381 case MULTI_ARG_2_DF_TF:
20382 case MULTI_ARG_2_DI_TF:
20383 case MULTI_ARG_2_SI_TF:
20384 case MULTI_ARG_2_HI_TF:
20385 case MULTI_ARG_2_QI_TF:
20390 case MULTI_ARG_UNKNOWN:
20392 gcc_unreachable ();
20395 if (optimize || !target
20396 || GET_MODE (target) != tmode
20397 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20398 target = gen_reg_rtx (tmode);
20400 gcc_assert (nargs <= 4);
20402 for (i = 0; i < nargs; i++)
20404 tree arg = CALL_EXPR_ARG (exp, i);
20405 rtx op = expand_normal (arg);
20406 int adjust = (comparison_p) ? 1 : 0;
20407 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20409 if (last_arg_constant && i == nargs-1)
20411 if (GET_CODE (op) != CONST_INT)
20413 error ("last argument must be an immediate");
20414 return gen_reg_rtx (tmode);
20419 if (VECTOR_MODE_P (mode))
20420 op = safe_vector_operand (op, mode);
20422 /* If we aren't optimizing, only allow one memory operand to be
20424 if (memory_operand (op, mode))
20427 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20430 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20432 op = force_reg (mode, op);
20436 args[i].mode = mode;
20442 pat = GEN_FCN (icode) (target, args[0].op);
20447 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20448 GEN_INT ((int)sub_code));
20449 else if (! comparison_p)
20450 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20453 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20457 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20462 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20466 gcc_unreachable ();
20476 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20477 insns with vec_merge. */
20480 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20484 tree arg0 = CALL_EXPR_ARG (exp, 0);
20485 rtx op1, op0 = expand_normal (arg0);
20486 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20487 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20489 if (optimize || !target
20490 || GET_MODE (target) != tmode
20491 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20492 target = gen_reg_rtx (tmode);
20494 if (VECTOR_MODE_P (mode0))
20495 op0 = safe_vector_operand (op0, mode0);
20497 if ((optimize && !register_operand (op0, mode0))
20498 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20499 op0 = copy_to_mode_reg (mode0, op0);
20502 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20503 op1 = copy_to_mode_reg (mode0, op1);
20505 pat = GEN_FCN (icode) (target, op0, op1);
20512 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20515 ix86_expand_sse_compare (const struct builtin_description *d,
20516 tree exp, rtx target, bool swap)
20519 tree arg0 = CALL_EXPR_ARG (exp, 0);
20520 tree arg1 = CALL_EXPR_ARG (exp, 1);
20521 rtx op0 = expand_normal (arg0);
20522 rtx op1 = expand_normal (arg1);
20524 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20525 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20526 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20527 enum rtx_code comparison = d->comparison;
20529 if (VECTOR_MODE_P (mode0))
20530 op0 = safe_vector_operand (op0, mode0);
20531 if (VECTOR_MODE_P (mode1))
20532 op1 = safe_vector_operand (op1, mode1);
20534 /* Swap operands if we have a comparison that isn't available in
20538 rtx tmp = gen_reg_rtx (mode1);
20539 emit_move_insn (tmp, op1);
20544 if (optimize || !target
20545 || GET_MODE (target) != tmode
20546 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20547 target = gen_reg_rtx (tmode);
20549 if ((optimize && !register_operand (op0, mode0))
20550 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20551 op0 = copy_to_mode_reg (mode0, op0);
20552 if ((optimize && !register_operand (op1, mode1))
20553 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20554 op1 = copy_to_mode_reg (mode1, op1);
20556 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20557 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20564 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20567 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20571 tree arg0 = CALL_EXPR_ARG (exp, 0);
20572 tree arg1 = CALL_EXPR_ARG (exp, 1);
20573 rtx op0 = expand_normal (arg0);
20574 rtx op1 = expand_normal (arg1);
20575 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20576 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20577 enum rtx_code comparison = d->comparison;
20579 if (VECTOR_MODE_P (mode0))
20580 op0 = safe_vector_operand (op0, mode0);
20581 if (VECTOR_MODE_P (mode1))
20582 op1 = safe_vector_operand (op1, mode1);
20584 /* Swap operands if we have a comparison that isn't available in
20586 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20593 target = gen_reg_rtx (SImode);
20594 emit_move_insn (target, const0_rtx);
20595 target = gen_rtx_SUBREG (QImode, target, 0);
20597 if ((optimize && !register_operand (op0, mode0))
20598 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20599 op0 = copy_to_mode_reg (mode0, op0);
20600 if ((optimize && !register_operand (op1, mode1))
20601 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20602 op1 = copy_to_mode_reg (mode1, op1);
20604 pat = GEN_FCN (d->icode) (op0, op1);
20608 emit_insn (gen_rtx_SET (VOIDmode,
20609 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20610 gen_rtx_fmt_ee (comparison, QImode,
20614 return SUBREG_REG (target);
20617 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20620 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20624 tree arg0 = CALL_EXPR_ARG (exp, 0);
20625 tree arg1 = CALL_EXPR_ARG (exp, 1);
20626 rtx op0 = expand_normal (arg0);
20627 rtx op1 = expand_normal (arg1);
20628 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20629 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20630 enum rtx_code comparison = d->comparison;
20632 if (VECTOR_MODE_P (mode0))
20633 op0 = safe_vector_operand (op0, mode0);
20634 if (VECTOR_MODE_P (mode1))
20635 op1 = safe_vector_operand (op1, mode1);
20637 target = gen_reg_rtx (SImode);
20638 emit_move_insn (target, const0_rtx);
20639 target = gen_rtx_SUBREG (QImode, target, 0);
20641 if ((optimize && !register_operand (op0, mode0))
20642 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20643 op0 = copy_to_mode_reg (mode0, op0);
20644 if ((optimize && !register_operand (op1, mode1))
20645 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20646 op1 = copy_to_mode_reg (mode1, op1);
20648 pat = GEN_FCN (d->icode) (op0, op1);
20652 emit_insn (gen_rtx_SET (VOIDmode,
20653 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20654 gen_rtx_fmt_ee (comparison, QImode,
20658 return SUBREG_REG (target);
20661 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20664 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20665 tree exp, rtx target)
20668 tree arg0 = CALL_EXPR_ARG (exp, 0);
20669 tree arg1 = CALL_EXPR_ARG (exp, 1);
20670 tree arg2 = CALL_EXPR_ARG (exp, 2);
20671 tree arg3 = CALL_EXPR_ARG (exp, 3);
20672 tree arg4 = CALL_EXPR_ARG (exp, 4);
20673 rtx scratch0, scratch1;
20674 rtx op0 = expand_normal (arg0);
20675 rtx op1 = expand_normal (arg1);
20676 rtx op2 = expand_normal (arg2);
20677 rtx op3 = expand_normal (arg3);
20678 rtx op4 = expand_normal (arg4);
20679 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20681 tmode0 = insn_data[d->icode].operand[0].mode;
20682 tmode1 = insn_data[d->icode].operand[1].mode;
20683 modev2 = insn_data[d->icode].operand[2].mode;
20684 modei3 = insn_data[d->icode].operand[3].mode;
20685 modev4 = insn_data[d->icode].operand[4].mode;
20686 modei5 = insn_data[d->icode].operand[5].mode;
20687 modeimm = insn_data[d->icode].operand[6].mode;
20689 if (VECTOR_MODE_P (modev2))
20690 op0 = safe_vector_operand (op0, modev2);
20691 if (VECTOR_MODE_P (modev4))
20692 op2 = safe_vector_operand (op2, modev4);
20694 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20695 op0 = copy_to_mode_reg (modev2, op0);
20696 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20697 op1 = copy_to_mode_reg (modei3, op1);
20698 if ((optimize && !register_operand (op2, modev4))
20699 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20700 op2 = copy_to_mode_reg (modev4, op2);
20701 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20702 op3 = copy_to_mode_reg (modei5, op3);
20704 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20706 error ("the fifth argument must be a 8-bit immediate");
20710 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20712 if (optimize || !target
20713 || GET_MODE (target) != tmode0
20714 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20715 target = gen_reg_rtx (tmode0);
20717 scratch1 = gen_reg_rtx (tmode1);
20719 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20721 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20723 if (optimize || !target
20724 || GET_MODE (target) != tmode1
20725 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20726 target = gen_reg_rtx (tmode1);
20728 scratch0 = gen_reg_rtx (tmode0);
20730 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20734 gcc_assert (d->flag);
20736 scratch0 = gen_reg_rtx (tmode0);
20737 scratch1 = gen_reg_rtx (tmode1);
20739 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20749 target = gen_reg_rtx (SImode);
20750 emit_move_insn (target, const0_rtx);
20751 target = gen_rtx_SUBREG (QImode, target, 0);
20754 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20755 gen_rtx_fmt_ee (EQ, QImode,
20756 gen_rtx_REG ((enum machine_mode) d->flag,
20759 return SUBREG_REG (target);
20766 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20769 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20770 tree exp, rtx target)
20773 tree arg0 = CALL_EXPR_ARG (exp, 0);
20774 tree arg1 = CALL_EXPR_ARG (exp, 1);
20775 tree arg2 = CALL_EXPR_ARG (exp, 2);
20776 rtx scratch0, scratch1;
20777 rtx op0 = expand_normal (arg0);
20778 rtx op1 = expand_normal (arg1);
20779 rtx op2 = expand_normal (arg2);
20780 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20782 tmode0 = insn_data[d->icode].operand[0].mode;
20783 tmode1 = insn_data[d->icode].operand[1].mode;
20784 modev2 = insn_data[d->icode].operand[2].mode;
20785 modev3 = insn_data[d->icode].operand[3].mode;
20786 modeimm = insn_data[d->icode].operand[4].mode;
20788 if (VECTOR_MODE_P (modev2))
20789 op0 = safe_vector_operand (op0, modev2);
20790 if (VECTOR_MODE_P (modev3))
20791 op1 = safe_vector_operand (op1, modev3);
20793 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20794 op0 = copy_to_mode_reg (modev2, op0);
20795 if ((optimize && !register_operand (op1, modev3))
20796 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20797 op1 = copy_to_mode_reg (modev3, op1);
20799 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20801 error ("the third argument must be a 8-bit immediate");
20805 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20807 if (optimize || !target
20808 || GET_MODE (target) != tmode0
20809 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20810 target = gen_reg_rtx (tmode0);
20812 scratch1 = gen_reg_rtx (tmode1);
20814 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20816 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20818 if (optimize || !target
20819 || GET_MODE (target) != tmode1
20820 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20821 target = gen_reg_rtx (tmode1);
20823 scratch0 = gen_reg_rtx (tmode0);
20825 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20829 gcc_assert (d->flag);
20831 scratch0 = gen_reg_rtx (tmode0);
20832 scratch1 = gen_reg_rtx (tmode1);
20834 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20844 target = gen_reg_rtx (SImode);
20845 emit_move_insn (target, const0_rtx);
20846 target = gen_rtx_SUBREG (QImode, target, 0);
20849 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20850 gen_rtx_fmt_ee (EQ, QImode,
20851 gen_rtx_REG ((enum machine_mode) d->flag,
20854 return SUBREG_REG (target);
20860 /* Subroutine of ix86_expand_builtin to take care of insns with
20861 variable number of operands. */
20864 ix86_expand_args_builtin (const struct builtin_description *d,
20865 tree exp, rtx target)
20867 rtx pat, real_target;
20868 unsigned int i, nargs;
20869 unsigned int nargs_constant = 0;
20870 int num_memory = 0;
20874 enum machine_mode mode;
20876 bool last_arg_count = false;
20877 enum insn_code icode = d->icode;
20878 const struct insn_data *insn_p = &insn_data[icode];
20879 enum machine_mode tmode = insn_p->operand[0].mode;
20880 enum machine_mode rmode = VOIDmode;
20882 enum rtx_code comparison = d->comparison;
20884 switch ((enum ix86_builtin_type) d->flag)
20886 case INT_FTYPE_V2DI_V2DI_PTEST:
20887 return ix86_expand_sse_ptest (d, exp, target);
20888 case FLOAT128_FTYPE_FLOAT128:
20889 case FLOAT_FTYPE_FLOAT:
20890 case INT64_FTYPE_V4SF:
20891 case INT64_FTYPE_V2DF:
20892 case INT_FTYPE_V16QI:
20893 case INT_FTYPE_V8QI:
20894 case INT_FTYPE_V4SF:
20895 case INT_FTYPE_V2DF:
20896 case V16QI_FTYPE_V16QI:
20897 case V8HI_FTYPE_V8HI:
20898 case V8HI_FTYPE_V16QI:
20899 case V8QI_FTYPE_V8QI:
20900 case V4SI_FTYPE_V4SI:
20901 case V4SI_FTYPE_V16QI:
20902 case V4SI_FTYPE_V4SF:
20903 case V4SI_FTYPE_V8HI:
20904 case V4SI_FTYPE_V2DF:
20905 case V4HI_FTYPE_V4HI:
20906 case V4SF_FTYPE_V4SF:
20907 case V4SF_FTYPE_V4SI:
20908 case V4SF_FTYPE_V2DF:
20909 case V2DI_FTYPE_V2DI:
20910 case V2DI_FTYPE_V16QI:
20911 case V2DI_FTYPE_V8HI:
20912 case V2DI_FTYPE_V4SI:
20913 case V2DF_FTYPE_V2DF:
20914 case V2DF_FTYPE_V4SI:
20915 case V2DF_FTYPE_V4SF:
20916 case V2DF_FTYPE_V2SI:
20917 case V2SI_FTYPE_V2SI:
20918 case V2SI_FTYPE_V4SF:
20919 case V2SI_FTYPE_V2SF:
20920 case V2SI_FTYPE_V2DF:
20921 case V2SF_FTYPE_V2SF:
20922 case V2SF_FTYPE_V2SI:
20925 case V4SF_FTYPE_V4SF_VEC_MERGE:
20926 case V2DF_FTYPE_V2DF_VEC_MERGE:
20927 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20928 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20929 case V16QI_FTYPE_V16QI_V16QI:
20930 case V16QI_FTYPE_V8HI_V8HI:
20931 case V8QI_FTYPE_V8QI_V8QI:
20932 case V8QI_FTYPE_V4HI_V4HI:
20933 case V8HI_FTYPE_V8HI_V8HI:
20934 case V8HI_FTYPE_V16QI_V16QI:
20935 case V8HI_FTYPE_V4SI_V4SI:
20936 case V4SI_FTYPE_V4SI_V4SI:
20937 case V4SI_FTYPE_V8HI_V8HI:
20938 case V4SI_FTYPE_V4SF_V4SF:
20939 case V4SI_FTYPE_V2DF_V2DF:
20940 case V4HI_FTYPE_V4HI_V4HI:
20941 case V4HI_FTYPE_V8QI_V8QI:
20942 case V4HI_FTYPE_V2SI_V2SI:
20943 case V4SF_FTYPE_V4SF_V4SF:
20944 case V4SF_FTYPE_V4SF_V2SI:
20945 case V4SF_FTYPE_V4SF_V2DF:
20946 case V4SF_FTYPE_V4SF_DI:
20947 case V4SF_FTYPE_V4SF_SI:
20948 case V2DI_FTYPE_V2DI_V2DI:
20949 case V2DI_FTYPE_V16QI_V16QI:
20950 case V2DI_FTYPE_V4SI_V4SI:
20951 case V2DI_FTYPE_V2DI_V16QI:
20952 case V2DI_FTYPE_V2DF_V2DF:
20953 case V2SI_FTYPE_V2SI_V2SI:
20954 case V2SI_FTYPE_V4HI_V4HI:
20955 case V2SI_FTYPE_V2SF_V2SF:
20956 case V2DF_FTYPE_V2DF_V2DF:
20957 case V2DF_FTYPE_V2DF_V4SF:
20958 case V2DF_FTYPE_V2DF_DI:
20959 case V2DF_FTYPE_V2DF_SI:
20960 case V2SF_FTYPE_V2SF_V2SF:
20961 case V1DI_FTYPE_V1DI_V1DI:
20962 case V1DI_FTYPE_V8QI_V8QI:
20963 case V1DI_FTYPE_V2SI_V2SI:
20964 if (comparison == UNKNOWN)
20965 return ix86_expand_binop_builtin (icode, exp, target);
20968 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20969 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20970 gcc_assert (comparison != UNKNOWN);
20974 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20975 case V8HI_FTYPE_V8HI_SI_COUNT:
20976 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20977 case V4SI_FTYPE_V4SI_SI_COUNT:
20978 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20979 case V4HI_FTYPE_V4HI_SI_COUNT:
20980 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20981 case V2DI_FTYPE_V2DI_SI_COUNT:
20982 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20983 case V2SI_FTYPE_V2SI_SI_COUNT:
20984 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20985 case V1DI_FTYPE_V1DI_SI_COUNT:
20987 last_arg_count = true;
20989 case UINT64_FTYPE_UINT64_UINT64:
20990 case UINT_FTYPE_UINT_UINT:
20991 case UINT_FTYPE_UINT_USHORT:
20992 case UINT_FTYPE_UINT_UCHAR:
20995 case V2DI2TI_FTYPE_V2DI_INT:
20998 nargs_constant = 1;
21000 case V8HI_FTYPE_V8HI_INT:
21001 case V4SI_FTYPE_V4SI_INT:
21002 case V4HI_FTYPE_V4HI_INT:
21003 case V4SF_FTYPE_V4SF_INT:
21004 case V2DI_FTYPE_V2DI_INT:
21005 case V2DF_FTYPE_V2DF_INT:
21007 nargs_constant = 1;
21009 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21010 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21011 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21014 case V16QI_FTYPE_V16QI_V16QI_INT:
21015 case V8HI_FTYPE_V8HI_V8HI_INT:
21016 case V4SI_FTYPE_V4SI_V4SI_INT:
21017 case V4SF_FTYPE_V4SF_V4SF_INT:
21018 case V2DI_FTYPE_V2DI_V2DI_INT:
21019 case V2DF_FTYPE_V2DF_V2DF_INT:
21021 nargs_constant = 1;
21023 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21026 nargs_constant = 1;
21028 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21031 nargs_constant = 1;
21033 case V2DI_FTYPE_V2DI_UINT_UINT:
21035 nargs_constant = 2;
21037 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21039 nargs_constant = 2;
21042 gcc_unreachable ();
21045 gcc_assert (nargs <= ARRAY_SIZE (args));
21047 if (comparison != UNKNOWN)
21049 gcc_assert (nargs == 2);
21050 return ix86_expand_sse_compare (d, exp, target, swap);
21053 if (rmode == VOIDmode || rmode == tmode)
21057 || GET_MODE (target) != tmode
21058 || ! (*insn_p->operand[0].predicate) (target, tmode))
21059 target = gen_reg_rtx (tmode);
21060 real_target = target;
21064 target = gen_reg_rtx (rmode);
21065 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21068 for (i = 0; i < nargs; i++)
21070 tree arg = CALL_EXPR_ARG (exp, i);
21071 rtx op = expand_normal (arg);
21072 enum machine_mode mode = insn_p->operand[i + 1].mode;
21073 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21075 if (last_arg_count && (i + 1) == nargs)
21077 /* SIMD shift insns take either an 8-bit immediate or
21078 register as count. But builtin functions take int as
21079 count. If count doesn't match, we put it in register. */
21082 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21083 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21084 op = copy_to_reg (op);
21087 else if ((nargs - i) <= nargs_constant)
21092 case CODE_FOR_sse4_1_roundpd:
21093 case CODE_FOR_sse4_1_roundps:
21094 case CODE_FOR_sse4_1_roundsd:
21095 case CODE_FOR_sse4_1_roundss:
21096 case CODE_FOR_sse4_1_blendps:
21097 error ("the last argument must be a 4-bit immediate");
21100 case CODE_FOR_sse4_1_blendpd:
21101 error ("the last argument must be a 2-bit immediate");
21105 switch (nargs_constant)
21108 if ((nargs - i) == nargs_constant)
21110 error ("the next to last argument must be an 8-bit immediate");
21114 error ("the last argument must be an 8-bit immediate");
21117 gcc_unreachable ();
21124 if (VECTOR_MODE_P (mode))
21125 op = safe_vector_operand (op, mode);
21127 /* If we aren't optimizing, only allow one memory operand to
21129 if (memory_operand (op, mode))
21132 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21134 if (optimize || !match || num_memory > 1)
21135 op = copy_to_mode_reg (mode, op);
21139 op = copy_to_reg (op);
21140 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21145 args[i].mode = mode;
21151 pat = GEN_FCN (icode) (real_target, args[0].op);
21154 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21157 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21161 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21162 args[2].op, args[3].op);
21165 gcc_unreachable ();
21175 /* Subroutine of ix86_expand_builtin to take care of special insns
21176 with variable number of operands. */
21179 ix86_expand_special_args_builtin (const struct builtin_description *d,
21180 tree exp, rtx target)
21184 unsigned int i, nargs, arg_adjust, memory;
21188 enum machine_mode mode;
21190 enum insn_code icode = d->icode;
21191 bool last_arg_constant = false;
21192 const struct insn_data *insn_p = &insn_data[icode];
21193 enum machine_mode tmode = insn_p->operand[0].mode;
21194 enum { load, store } class;
21196 switch ((enum ix86_special_builtin_type) d->flag)
21198 case VOID_FTYPE_VOID:
21199 emit_insn (GEN_FCN (icode) (target));
21201 case V2DI_FTYPE_PV2DI:
21202 case V16QI_FTYPE_PCCHAR:
21203 case V4SF_FTYPE_PCFLOAT:
21204 case V2DF_FTYPE_PCDOUBLE:
21209 case VOID_FTYPE_PV2SF_V4SF:
21210 case VOID_FTYPE_PV2DI_V2DI:
21211 case VOID_FTYPE_PCHAR_V16QI:
21212 case VOID_FTYPE_PFLOAT_V4SF:
21213 case VOID_FTYPE_PDOUBLE_V2DF:
21214 case VOID_FTYPE_PDI_DI:
21215 case VOID_FTYPE_PINT_INT:
21218 /* Reserve memory operand for target. */
21219 memory = ARRAY_SIZE (args);
21221 case V4SF_FTYPE_V4SF_PCV2SF:
21222 case V2DF_FTYPE_V2DF_PCDOUBLE:
21228 gcc_unreachable ();
21231 gcc_assert (nargs <= ARRAY_SIZE (args));
21233 if (class == store)
21235 arg = CALL_EXPR_ARG (exp, 0);
21236 op = expand_normal (arg);
21237 gcc_assert (target == 0);
21238 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21246 || GET_MODE (target) != tmode
21247 || ! (*insn_p->operand[0].predicate) (target, tmode))
21248 target = gen_reg_rtx (tmode);
21251 for (i = 0; i < nargs; i++)
21253 enum machine_mode mode = insn_p->operand[i + 1].mode;
21256 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21257 op = expand_normal (arg);
21258 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21260 if (last_arg_constant && (i + 1) == nargs)
21266 error ("the last argument must be an 8-bit immediate");
21274 /* This must be the memory operand. */
21275 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21276 gcc_assert (GET_MODE (op) == mode
21277 || GET_MODE (op) == VOIDmode);
21281 /* This must be register. */
21282 if (VECTOR_MODE_P (mode))
21283 op = safe_vector_operand (op, mode);
21285 gcc_assert (GET_MODE (op) == mode
21286 || GET_MODE (op) == VOIDmode);
21287 op = copy_to_mode_reg (mode, op);
21292 args[i].mode = mode;
21298 pat = GEN_FCN (icode) (target, args[0].op);
21301 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21304 gcc_unreachable ();
21310 return class == store ? 0 : target;
21313 /* Return the integer constant in ARG. Constrain it to be in the range
21314 of the subparts of VEC_TYPE; issue an error if not. */
21317 get_element_number (tree vec_type, tree arg)
21319 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21321 if (!host_integerp (arg, 1)
21322 || (elt = tree_low_cst (arg, 1), elt > max))
21324 error ("selector must be an integer constant in the range 0..%wi", max);
21331 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21332 ix86_expand_vector_init. We DO have language-level syntax for this, in
21333 the form of (type){ init-list }. Except that since we can't place emms
21334 instructions from inside the compiler, we can't allow the use of MMX
21335 registers unless the user explicitly asks for it. So we do *not* define
21336 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21337 we have builtins invoked by mmintrin.h that gives us license to emit
21338 these sorts of instructions. */
21341 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21343 enum machine_mode tmode = TYPE_MODE (type);
21344 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21345 int i, n_elt = GET_MODE_NUNITS (tmode);
21346 rtvec v = rtvec_alloc (n_elt);
21348 gcc_assert (VECTOR_MODE_P (tmode));
21349 gcc_assert (call_expr_nargs (exp) == n_elt);
21351 for (i = 0; i < n_elt; ++i)
21353 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21354 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21357 if (!target || !register_operand (target, tmode))
21358 target = gen_reg_rtx (tmode);
21360 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21364 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21365 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21366 had a language-level syntax for referencing vector elements. */
21369 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21371 enum machine_mode tmode, mode0;
21376 arg0 = CALL_EXPR_ARG (exp, 0);
21377 arg1 = CALL_EXPR_ARG (exp, 1);
21379 op0 = expand_normal (arg0);
21380 elt = get_element_number (TREE_TYPE (arg0), arg1);
21382 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21383 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21384 gcc_assert (VECTOR_MODE_P (mode0));
21386 op0 = force_reg (mode0, op0);
21388 if (optimize || !target || !register_operand (target, tmode))
21389 target = gen_reg_rtx (tmode);
21391 ix86_expand_vector_extract (true, target, op0, elt);
21396 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21397 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21398 a language-level syntax for referencing vector elements. */
21401 ix86_expand_vec_set_builtin (tree exp)
21403 enum machine_mode tmode, mode1;
21404 tree arg0, arg1, arg2;
21406 rtx op0, op1, target;
21408 arg0 = CALL_EXPR_ARG (exp, 0);
21409 arg1 = CALL_EXPR_ARG (exp, 1);
21410 arg2 = CALL_EXPR_ARG (exp, 2);
21412 tmode = TYPE_MODE (TREE_TYPE (arg0));
21413 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21414 gcc_assert (VECTOR_MODE_P (tmode));
21416 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21417 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21418 elt = get_element_number (TREE_TYPE (arg0), arg2);
21420 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21421 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21423 op0 = force_reg (tmode, op0);
21424 op1 = force_reg (mode1, op1);
21426 /* OP0 is the source of these builtin functions and shouldn't be
21427 modified. Create a copy, use it and return it as target. */
21428 target = gen_reg_rtx (tmode);
21429 emit_move_insn (target, op0);
21430 ix86_expand_vector_set (true, target, op1, elt);
21435 /* Expand an expression EXP that calls a built-in function,
21436 with result going to TARGET if that's convenient
21437 (and in mode MODE if that's convenient).
21438 SUBTARGET may be used as the target for computing one of EXP's operands.
21439 IGNORE is nonzero if the value is to be ignored. */
21442 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21443 enum machine_mode mode ATTRIBUTE_UNUSED,
21444 int ignore ATTRIBUTE_UNUSED)
21446 const struct builtin_description *d;
21448 enum insn_code icode;
21449 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21450 tree arg0, arg1, arg2;
21451 rtx op0, op1, op2, pat;
21452 enum machine_mode mode0, mode1, mode2;
21453 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21457 case IX86_BUILTIN_MASKMOVQ:
21458 case IX86_BUILTIN_MASKMOVDQU:
21459 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21460 ? CODE_FOR_mmx_maskmovq
21461 : CODE_FOR_sse2_maskmovdqu);
21462 /* Note the arg order is different from the operand order. */
21463 arg1 = CALL_EXPR_ARG (exp, 0);
21464 arg2 = CALL_EXPR_ARG (exp, 1);
21465 arg0 = CALL_EXPR_ARG (exp, 2);
21466 op0 = expand_normal (arg0);
21467 op1 = expand_normal (arg1);
21468 op2 = expand_normal (arg2);
21469 mode0 = insn_data[icode].operand[0].mode;
21470 mode1 = insn_data[icode].operand[1].mode;
21471 mode2 = insn_data[icode].operand[2].mode;
21473 op0 = force_reg (Pmode, op0);
21474 op0 = gen_rtx_MEM (mode1, op0);
21476 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21477 op0 = copy_to_mode_reg (mode0, op0);
21478 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21479 op1 = copy_to_mode_reg (mode1, op1);
21480 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21481 op2 = copy_to_mode_reg (mode2, op2);
21482 pat = GEN_FCN (icode) (op0, op1, op2);
21488 case IX86_BUILTIN_LDMXCSR:
21489 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21490 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21491 emit_move_insn (target, op0);
21492 emit_insn (gen_sse_ldmxcsr (target));
21495 case IX86_BUILTIN_STMXCSR:
21496 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21497 emit_insn (gen_sse_stmxcsr (target));
21498 return copy_to_mode_reg (SImode, target);
21500 case IX86_BUILTIN_CLFLUSH:
21501 arg0 = CALL_EXPR_ARG (exp, 0);
21502 op0 = expand_normal (arg0);
21503 icode = CODE_FOR_sse2_clflush;
21504 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21505 op0 = copy_to_mode_reg (Pmode, op0);
21507 emit_insn (gen_sse2_clflush (op0));
21510 case IX86_BUILTIN_MONITOR:
21511 arg0 = CALL_EXPR_ARG (exp, 0);
21512 arg1 = CALL_EXPR_ARG (exp, 1);
21513 arg2 = CALL_EXPR_ARG (exp, 2);
21514 op0 = expand_normal (arg0);
21515 op1 = expand_normal (arg1);
21516 op2 = expand_normal (arg2);
21518 op0 = copy_to_mode_reg (Pmode, op0);
21520 op1 = copy_to_mode_reg (SImode, op1);
21522 op2 = copy_to_mode_reg (SImode, op2);
21523 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
21526 case IX86_BUILTIN_MWAIT:
21527 arg0 = CALL_EXPR_ARG (exp, 0);
21528 arg1 = CALL_EXPR_ARG (exp, 1);
21529 op0 = expand_normal (arg0);
21530 op1 = expand_normal (arg1);
21532 op0 = copy_to_mode_reg (SImode, op0);
21534 op1 = copy_to_mode_reg (SImode, op1);
21535 emit_insn (gen_sse3_mwait (op0, op1));
21538 case IX86_BUILTIN_VEC_INIT_V2SI:
21539 case IX86_BUILTIN_VEC_INIT_V4HI:
21540 case IX86_BUILTIN_VEC_INIT_V8QI:
21541 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21543 case IX86_BUILTIN_VEC_EXT_V2DF:
21544 case IX86_BUILTIN_VEC_EXT_V2DI:
21545 case IX86_BUILTIN_VEC_EXT_V4SF:
21546 case IX86_BUILTIN_VEC_EXT_V4SI:
21547 case IX86_BUILTIN_VEC_EXT_V8HI:
21548 case IX86_BUILTIN_VEC_EXT_V2SI:
21549 case IX86_BUILTIN_VEC_EXT_V4HI:
21550 case IX86_BUILTIN_VEC_EXT_V16QI:
21551 return ix86_expand_vec_ext_builtin (exp, target);
21553 case IX86_BUILTIN_VEC_SET_V2DI:
21554 case IX86_BUILTIN_VEC_SET_V4SF:
21555 case IX86_BUILTIN_VEC_SET_V4SI:
21556 case IX86_BUILTIN_VEC_SET_V8HI:
21557 case IX86_BUILTIN_VEC_SET_V4HI:
21558 case IX86_BUILTIN_VEC_SET_V16QI:
21559 return ix86_expand_vec_set_builtin (exp);
21561 case IX86_BUILTIN_INFQ:
21563 REAL_VALUE_TYPE inf;
21567 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21569 tmp = validize_mem (force_const_mem (mode, tmp));
21572 target = gen_reg_rtx (mode);
21574 emit_move_insn (target, tmp);
21582 for (i = 0, d = bdesc_special_args;
21583 i < ARRAY_SIZE (bdesc_special_args);
21585 if (d->code == fcode)
21586 return ix86_expand_special_args_builtin (d, exp, target);
21588 for (i = 0, d = bdesc_args;
21589 i < ARRAY_SIZE (bdesc_args);
21591 if (d->code == fcode)
21592 return ix86_expand_args_builtin (d, exp, target);
21594 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21595 if (d->code == fcode)
21596 return ix86_expand_sse_comi (d, exp, target);
21598 for (i = 0, d = bdesc_pcmpestr;
21599 i < ARRAY_SIZE (bdesc_pcmpestr);
21601 if (d->code == fcode)
21602 return ix86_expand_sse_pcmpestr (d, exp, target);
21604 for (i = 0, d = bdesc_pcmpistr;
21605 i < ARRAY_SIZE (bdesc_pcmpistr);
21607 if (d->code == fcode)
21608 return ix86_expand_sse_pcmpistr (d, exp, target);
21610 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21611 if (d->code == fcode)
21612 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21613 (enum multi_arg_type)d->flag,
21616 gcc_unreachable ();
21619 /* Returns a function decl for a vectorized version of the builtin function
21620 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21621 if it is not available. */
21624 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21627 enum machine_mode in_mode, out_mode;
21630 if (TREE_CODE (type_out) != VECTOR_TYPE
21631 || TREE_CODE (type_in) != VECTOR_TYPE)
21634 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21635 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21636 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21637 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21641 case BUILT_IN_SQRT:
21642 if (out_mode == DFmode && out_n == 2
21643 && in_mode == DFmode && in_n == 2)
21644 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21647 case BUILT_IN_SQRTF:
21648 if (out_mode == SFmode && out_n == 4
21649 && in_mode == SFmode && in_n == 4)
21650 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21653 case BUILT_IN_LRINT:
21654 if (out_mode == SImode && out_n == 4
21655 && in_mode == DFmode && in_n == 2)
21656 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21659 case BUILT_IN_LRINTF:
21660 if (out_mode == SImode && out_n == 4
21661 && in_mode == SFmode && in_n == 4)
21662 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21669 /* Dispatch to a handler for a vectorization library. */
21670 if (ix86_veclib_handler)
21671 return (*ix86_veclib_handler)(fn, type_out, type_in);
21676 /* Handler for an SVML-style interface to
21677 a library with vectorized intrinsics. */
21680 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21683 tree fntype, new_fndecl, args;
21686 enum machine_mode el_mode, in_mode;
21689 /* The SVML is suitable for unsafe math only. */
21690 if (!flag_unsafe_math_optimizations)
21693 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21694 n = TYPE_VECTOR_SUBPARTS (type_out);
21695 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21696 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21697 if (el_mode != in_mode
21705 case BUILT_IN_LOG10:
21707 case BUILT_IN_TANH:
21709 case BUILT_IN_ATAN:
21710 case BUILT_IN_ATAN2:
21711 case BUILT_IN_ATANH:
21712 case BUILT_IN_CBRT:
21713 case BUILT_IN_SINH:
21715 case BUILT_IN_ASINH:
21716 case BUILT_IN_ASIN:
21717 case BUILT_IN_COSH:
21719 case BUILT_IN_ACOSH:
21720 case BUILT_IN_ACOS:
21721 if (el_mode != DFmode || n != 2)
21725 case BUILT_IN_EXPF:
21726 case BUILT_IN_LOGF:
21727 case BUILT_IN_LOG10F:
21728 case BUILT_IN_POWF:
21729 case BUILT_IN_TANHF:
21730 case BUILT_IN_TANF:
21731 case BUILT_IN_ATANF:
21732 case BUILT_IN_ATAN2F:
21733 case BUILT_IN_ATANHF:
21734 case BUILT_IN_CBRTF:
21735 case BUILT_IN_SINHF:
21736 case BUILT_IN_SINF:
21737 case BUILT_IN_ASINHF:
21738 case BUILT_IN_ASINF:
21739 case BUILT_IN_COSHF:
21740 case BUILT_IN_COSF:
21741 case BUILT_IN_ACOSHF:
21742 case BUILT_IN_ACOSF:
21743 if (el_mode != SFmode || n != 4)
21751 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21753 if (fn == BUILT_IN_LOGF)
21754 strcpy (name, "vmlsLn4");
21755 else if (fn == BUILT_IN_LOG)
21756 strcpy (name, "vmldLn2");
21759 sprintf (name, "vmls%s", bname+10);
21760 name[strlen (name)-1] = '4';
21763 sprintf (name, "vmld%s2", bname+10);
21765 /* Convert to uppercase. */
21769 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21770 args = TREE_CHAIN (args))
21774 fntype = build_function_type_list (type_out, type_in, NULL);
21776 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21778 /* Build a function declaration for the vectorized function. */
21779 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21780 TREE_PUBLIC (new_fndecl) = 1;
21781 DECL_EXTERNAL (new_fndecl) = 1;
21782 DECL_IS_NOVOPS (new_fndecl) = 1;
21783 TREE_READONLY (new_fndecl) = 1;
21788 /* Handler for an ACML-style interface to
21789 a library with vectorized intrinsics. */
21792 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21794 char name[20] = "__vr.._";
21795 tree fntype, new_fndecl, args;
21798 enum machine_mode el_mode, in_mode;
21801 /* The ACML is 64bits only and suitable for unsafe math only as
21802 it does not correctly support parts of IEEE with the required
21803 precision such as denormals. */
21805 || !flag_unsafe_math_optimizations)
21808 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21809 n = TYPE_VECTOR_SUBPARTS (type_out);
21810 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21811 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21812 if (el_mode != in_mode
21822 case BUILT_IN_LOG2:
21823 case BUILT_IN_LOG10:
21826 if (el_mode != DFmode
21831 case BUILT_IN_SINF:
21832 case BUILT_IN_COSF:
21833 case BUILT_IN_EXPF:
21834 case BUILT_IN_POWF:
21835 case BUILT_IN_LOGF:
21836 case BUILT_IN_LOG2F:
21837 case BUILT_IN_LOG10F:
21840 if (el_mode != SFmode
21849 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21850 sprintf (name + 7, "%s", bname+10);
21853 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21854 args = TREE_CHAIN (args))
21858 fntype = build_function_type_list (type_out, type_in, NULL);
21860 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21862 /* Build a function declaration for the vectorized function. */
21863 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21864 TREE_PUBLIC (new_fndecl) = 1;
21865 DECL_EXTERNAL (new_fndecl) = 1;
21866 DECL_IS_NOVOPS (new_fndecl) = 1;
21867 TREE_READONLY (new_fndecl) = 1;
21873 /* Returns a decl of a function that implements conversion of the
21874 input vector of type TYPE, or NULL_TREE if it is not available. */
21877 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21879 if (TREE_CODE (type) != VECTOR_TYPE)
21885 switch (TYPE_MODE (type))
21888 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21893 case FIX_TRUNC_EXPR:
21894 switch (TYPE_MODE (type))
21897 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21907 /* Returns a code for a target-specific builtin that implements
21908 reciprocal of the function, or NULL_TREE if not available. */
21911 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21912 bool sqrt ATTRIBUTE_UNUSED)
21914 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21915 && flag_finite_math_only && !flag_trapping_math
21916 && flag_unsafe_math_optimizations))
21920 /* Machine dependent builtins. */
21923 /* Vectorized version of sqrt to rsqrt conversion. */
21924 case IX86_BUILTIN_SQRTPS_NR:
21925 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21931 /* Normal builtins. */
21934 /* Sqrt to rsqrt conversion. */
21935 case BUILT_IN_SQRTF:
21936 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21943 /* Store OPERAND to the memory after reload is completed. This means
21944 that we can't easily use assign_stack_local. */
21946 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21950 gcc_assert (reload_completed);
21951 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
21953 result = gen_rtx_MEM (mode,
21954 gen_rtx_PLUS (Pmode,
21956 GEN_INT (-RED_ZONE_SIZE)));
21957 emit_move_insn (result, operand);
21959 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
21965 operand = gen_lowpart (DImode, operand);
21969 gen_rtx_SET (VOIDmode,
21970 gen_rtx_MEM (DImode,
21971 gen_rtx_PRE_DEC (DImode,
21972 stack_pointer_rtx)),
21976 gcc_unreachable ();
21978 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21987 split_di (&operand, 1, operands, operands + 1);
21989 gen_rtx_SET (VOIDmode,
21990 gen_rtx_MEM (SImode,
21991 gen_rtx_PRE_DEC (Pmode,
21992 stack_pointer_rtx)),
21995 gen_rtx_SET (VOIDmode,
21996 gen_rtx_MEM (SImode,
21997 gen_rtx_PRE_DEC (Pmode,
21998 stack_pointer_rtx)),
22003 /* Store HImodes as SImodes. */
22004 operand = gen_lowpart (SImode, operand);
22008 gen_rtx_SET (VOIDmode,
22009 gen_rtx_MEM (GET_MODE (operand),
22010 gen_rtx_PRE_DEC (SImode,
22011 stack_pointer_rtx)),
22015 gcc_unreachable ();
22017 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22022 /* Free operand from the memory. */
22024 ix86_free_from_memory (enum machine_mode mode)
22026 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
22030 if (mode == DImode || TARGET_64BIT)
22034 /* Use LEA to deallocate stack space. In peephole2 it will be converted
22035 to pop or add instruction if registers are available. */
22036 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22037 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22042 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22043 QImode must go into class Q_REGS.
22044 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
22045 movdf to do mem-to-mem moves through integer regs. */
22047 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
22049 enum machine_mode mode = GET_MODE (x);
22051 /* We're only allowed to return a subclass of CLASS. Many of the
22052 following checks fail for NO_REGS, so eliminate that early. */
22053 if (regclass == NO_REGS)
22056 /* All classes can load zeros. */
22057 if (x == CONST0_RTX (mode))
22060 /* Force constants into memory if we are loading a (nonzero) constant into
22061 an MMX or SSE register. This is because there are no MMX/SSE instructions
22062 to load from a constant. */
22064 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
22067 /* Prefer SSE regs only, if we can use them for math. */
22068 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
22069 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
22071 /* Floating-point constants need more complex checks. */
22072 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22074 /* General regs can load everything. */
22075 if (reg_class_subset_p (regclass, GENERAL_REGS))
22078 /* Floats can load 0 and 1 plus some others. Note that we eliminated
22079 zero above. We only want to wind up preferring 80387 registers if
22080 we plan on doing computation with them. */
22082 && standard_80387_constant_p (x))
22084 /* Limit class to non-sse. */
22085 if (regclass == FLOAT_SSE_REGS)
22087 if (regclass == FP_TOP_SSE_REGS)
22089 if (regclass == FP_SECOND_SSE_REGS)
22090 return FP_SECOND_REG;
22091 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22098 /* Generally when we see PLUS here, it's the function invariant
22099 (plus soft-fp const_int). Which can only be computed into general
22101 if (GET_CODE (x) == PLUS)
22102 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22104 /* QImode constants are easy to load, but non-constant QImode data
22105 must go into Q_REGS. */
22106 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22108 if (reg_class_subset_p (regclass, Q_REGS))
22110 if (reg_class_subset_p (Q_REGS, regclass))
22118 /* Discourage putting floating-point values in SSE registers unless
22119 SSE math is being used, and likewise for the 387 registers. */
22121 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22123 enum machine_mode mode = GET_MODE (x);
22125 /* Restrict the output reload class to the register bank that we are doing
22126 math on. If we would like not to return a subset of CLASS, reject this
22127 alternative: if reload cannot do this, it will still use its choice. */
22128 mode = GET_MODE (x);
22129 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22130 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22132 if (X87_FLOAT_MODE_P (mode))
22134 if (regclass == FP_TOP_SSE_REGS)
22136 else if (regclass == FP_SECOND_SSE_REGS)
22137 return FP_SECOND_REG;
22139 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22145 static enum reg_class
22146 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22147 enum machine_mode mode,
22148 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22150 /* QImode spills from non-QI registers require
22151 intermediate register on 32bit targets. */
22152 if (!in_p && mode == QImode && !TARGET_64BIT
22153 && (class == GENERAL_REGS
22154 || class == LEGACY_REGS
22155 || class == INDEX_REGS))
22164 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22165 regno = true_regnum (x);
22167 /* Return Q_REGS if the operand is in memory. */
22175 /* If we are copying between general and FP registers, we need a memory
22176 location. The same is true for SSE and MMX registers.
22178 To optimize register_move_cost performance, allow inline variant.
22180 The macro can't work reliably when one of the CLASSES is class containing
22181 registers from multiple units (SSE, MMX, integer). We avoid this by never
22182 combining those units in single alternative in the machine description.
22183 Ensure that this constraint holds to avoid unexpected surprises.
22185 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22186 enforce these sanity checks. */
22189 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22190 enum machine_mode mode, int strict)
22192 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22193 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22194 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22195 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22196 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22197 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22199 gcc_assert (!strict);
22203 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22206 /* ??? This is a lie. We do have moves between mmx/general, and for
22207 mmx/sse2. But by saying we need secondary memory we discourage the
22208 register allocator from using the mmx registers unless needed. */
22209 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22212 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22214 /* SSE1 doesn't have any direct moves from other classes. */
22218 /* If the target says that inter-unit moves are more expensive
22219 than moving through memory, then don't generate them. */
22220 if (!TARGET_INTER_UNIT_MOVES)
22223 /* Between SSE and general, we have moves no larger than word size. */
22224 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22232 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22233 enum machine_mode mode, int strict)
22235 return inline_secondary_memory_needed (class1, class2, mode, strict);
22238 /* Return true if the registers in CLASS cannot represent the change from
22239 modes FROM to TO. */
22242 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22243 enum reg_class regclass)
22248 /* x87 registers can't do subreg at all, as all values are reformatted
22249 to extended precision. */
22250 if (MAYBE_FLOAT_CLASS_P (regclass))
22253 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22255 /* Vector registers do not support QI or HImode loads. If we don't
22256 disallow a change to these modes, reload will assume it's ok to
22257 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22258 the vec_dupv4hi pattern. */
22259 if (GET_MODE_SIZE (from) < 4)
22262 /* Vector registers do not support subreg with nonzero offsets, which
22263 are otherwise valid for integer registers. Since we can't see
22264 whether we have a nonzero offset from here, prohibit all
22265 nonparadoxical subregs changing size. */
22266 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22273 /* Return the cost of moving data of mode M between a
22274 register and memory. A value of 2 is the default; this cost is
22275 relative to those in `REGISTER_MOVE_COST'.
22277 This function is used extensively by register_move_cost that is used to
22278 build tables at startup. Make it inline in this case.
22279 When IN is 2, return maximum of in and out move cost.
22281 If moving between registers and memory is more expensive than
22282 between two registers, you should define this macro to express the
22285 Model also increased moving costs of QImode registers in non
22289 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22293 if (FLOAT_CLASS_P (regclass))
22311 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22312 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22314 if (SSE_CLASS_P (regclass))
22317 switch (GET_MODE_SIZE (mode))
22332 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22333 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22335 if (MMX_CLASS_P (regclass))
22338 switch (GET_MODE_SIZE (mode))
22350 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22351 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22353 switch (GET_MODE_SIZE (mode))
22356 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22359 return ix86_cost->int_store[0];
22360 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22361 cost = ix86_cost->movzbl_load;
22363 cost = ix86_cost->int_load[0];
22365 return MAX (cost, ix86_cost->int_store[0]);
22371 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22373 return ix86_cost->movzbl_load;
22375 return ix86_cost->int_store[0] + 4;
22380 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22381 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22383 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22384 if (mode == TFmode)
22387 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22389 cost = ix86_cost->int_load[2];
22391 cost = ix86_cost->int_store[2];
22392 return (cost * (((int) GET_MODE_SIZE (mode)
22393 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22398 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22400 return inline_memory_move_cost (mode, regclass, in);
22404 /* Return the cost of moving data from a register in class CLASS1 to
22405 one in class CLASS2.
22407 It is not required that the cost always equal 2 when FROM is the same as TO;
22408 on some machines it is expensive to move between registers if they are not
22409 general registers. */
22412 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22413 enum reg_class class2)
22415 /* In case we require secondary memory, compute cost of the store followed
22416 by load. In order to avoid bad register allocation choices, we need
22417 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22419 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22423 cost += inline_memory_move_cost (mode, class1, 2);
22424 cost += inline_memory_move_cost (mode, class2, 2);
22426 /* In case of copying from general_purpose_register we may emit multiple
22427 stores followed by single load causing memory size mismatch stall.
22428 Count this as arbitrarily high cost of 20. */
22429 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22432 /* In the case of FP/MMX moves, the registers actually overlap, and we
22433 have to switch modes in order to treat them differently. */
22434 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22435 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22441 /* Moves between SSE/MMX and integer unit are expensive. */
22442 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22443 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22445 /* ??? By keeping returned value relatively high, we limit the number
22446 of moves between integer and MMX/SSE registers for all targets.
22447 Additionally, high value prevents problem with x86_modes_tieable_p(),
22448 where integer modes in MMX/SSE registers are not tieable
22449 because of missing QImode and HImode moves to, from or between
22450 MMX/SSE registers. */
22451 return MAX (8, ix86_cost->mmxsse_to_integer);
22453 if (MAYBE_FLOAT_CLASS_P (class1))
22454 return ix86_cost->fp_move;
22455 if (MAYBE_SSE_CLASS_P (class1))
22456 return ix86_cost->sse_move;
22457 if (MAYBE_MMX_CLASS_P (class1))
22458 return ix86_cost->mmx_move;
22462 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22465 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22467 /* Flags and only flags can only hold CCmode values. */
22468 if (CC_REGNO_P (regno))
22469 return GET_MODE_CLASS (mode) == MODE_CC;
22470 if (GET_MODE_CLASS (mode) == MODE_CC
22471 || GET_MODE_CLASS (mode) == MODE_RANDOM
22472 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22474 if (FP_REGNO_P (regno))
22475 return VALID_FP_MODE_P (mode);
22476 if (SSE_REGNO_P (regno))
22478 /* We implement the move patterns for all vector modes into and
22479 out of SSE registers, even when no operation instructions
22481 return (VALID_SSE_REG_MODE (mode)
22482 || VALID_SSE2_REG_MODE (mode)
22483 || VALID_MMX_REG_MODE (mode)
22484 || VALID_MMX_REG_MODE_3DNOW (mode));
22486 if (MMX_REGNO_P (regno))
22488 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22489 so if the register is available at all, then we can move data of
22490 the given mode into or out of it. */
22491 return (VALID_MMX_REG_MODE (mode)
22492 || VALID_MMX_REG_MODE_3DNOW (mode));
22495 if (mode == QImode)
22497 /* Take care for QImode values - they can be in non-QI regs,
22498 but then they do cause partial register stalls. */
22499 if (regno < 4 || TARGET_64BIT)
22501 if (!TARGET_PARTIAL_REG_STALL)
22503 return reload_in_progress || reload_completed;
22505 /* We handle both integer and floats in the general purpose registers. */
22506 else if (VALID_INT_MODE_P (mode))
22508 else if (VALID_FP_MODE_P (mode))
22510 else if (VALID_DFP_MODE_P (mode))
22512 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22513 on to use that value in smaller contexts, this can easily force a
22514 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22515 supporting DImode, allow it. */
22516 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22522 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22523 tieable integer mode. */
22526 ix86_tieable_integer_mode_p (enum machine_mode mode)
22535 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22538 return TARGET_64BIT;
22545 /* Return true if MODE1 is accessible in a register that can hold MODE2
22546 without copying. That is, all register classes that can hold MODE2
22547 can also hold MODE1. */
22550 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22552 if (mode1 == mode2)
22555 if (ix86_tieable_integer_mode_p (mode1)
22556 && ix86_tieable_integer_mode_p (mode2))
22559 /* MODE2 being XFmode implies fp stack or general regs, which means we
22560 can tie any smaller floating point modes to it. Note that we do not
22561 tie this with TFmode. */
22562 if (mode2 == XFmode)
22563 return mode1 == SFmode || mode1 == DFmode;
22565 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22566 that we can tie it with SFmode. */
22567 if (mode2 == DFmode)
22568 return mode1 == SFmode;
22570 /* If MODE2 is only appropriate for an SSE register, then tie with
22571 any other mode acceptable to SSE registers. */
22572 if (GET_MODE_SIZE (mode2) == 16
22573 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22574 return (GET_MODE_SIZE (mode1) == 16
22575 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22577 /* If MODE2 is appropriate for an MMX register, then tie
22578 with any other mode acceptable to MMX registers. */
22579 if (GET_MODE_SIZE (mode2) == 8
22580 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22581 return (GET_MODE_SIZE (mode1) == 8
22582 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22587 /* Compute a (partial) cost for rtx X. Return true if the complete
22588 cost has been computed, and false if subexpressions should be
22589 scanned. In either case, *TOTAL contains the cost result. */
22592 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22594 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22595 enum machine_mode mode = GET_MODE (x);
22603 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22605 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22607 else if (flag_pic && SYMBOLIC_CONST (x)
22609 || (!GET_CODE (x) != LABEL_REF
22610 && (GET_CODE (x) != SYMBOL_REF
22611 || !SYMBOL_REF_LOCAL_P (x)))))
22618 if (mode == VOIDmode)
22621 switch (standard_80387_constant_p (x))
22626 default: /* Other constants */
22631 /* Start with (MEM (SYMBOL_REF)), since that's where
22632 it'll probably end up. Add a penalty for size. */
22633 *total = (COSTS_N_INSNS (1)
22634 + (flag_pic != 0 && !TARGET_64BIT)
22635 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22641 /* The zero extensions is often completely free on x86_64, so make
22642 it as cheap as possible. */
22643 if (TARGET_64BIT && mode == DImode
22644 && GET_MODE (XEXP (x, 0)) == SImode)
22646 else if (TARGET_ZERO_EXTEND_WITH_AND)
22647 *total = ix86_cost->add;
22649 *total = ix86_cost->movzx;
22653 *total = ix86_cost->movsx;
22657 if (CONST_INT_P (XEXP (x, 1))
22658 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22660 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22663 *total = ix86_cost->add;
22666 if ((value == 2 || value == 3)
22667 && ix86_cost->lea <= ix86_cost->shift_const)
22669 *total = ix86_cost->lea;
22679 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22681 if (CONST_INT_P (XEXP (x, 1)))
22683 if (INTVAL (XEXP (x, 1)) > 32)
22684 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22686 *total = ix86_cost->shift_const * 2;
22690 if (GET_CODE (XEXP (x, 1)) == AND)
22691 *total = ix86_cost->shift_var * 2;
22693 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22698 if (CONST_INT_P (XEXP (x, 1)))
22699 *total = ix86_cost->shift_const;
22701 *total = ix86_cost->shift_var;
22706 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22708 /* ??? SSE scalar cost should be used here. */
22709 *total = ix86_cost->fmul;
22712 else if (X87_FLOAT_MODE_P (mode))
22714 *total = ix86_cost->fmul;
22717 else if (FLOAT_MODE_P (mode))
22719 /* ??? SSE vector cost should be used here. */
22720 *total = ix86_cost->fmul;
22725 rtx op0 = XEXP (x, 0);
22726 rtx op1 = XEXP (x, 1);
22728 if (CONST_INT_P (XEXP (x, 1)))
22730 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22731 for (nbits = 0; value != 0; value &= value - 1)
22735 /* This is arbitrary. */
22738 /* Compute costs correctly for widening multiplication. */
22739 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22740 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22741 == GET_MODE_SIZE (mode))
22743 int is_mulwiden = 0;
22744 enum machine_mode inner_mode = GET_MODE (op0);
22746 if (GET_CODE (op0) == GET_CODE (op1))
22747 is_mulwiden = 1, op1 = XEXP (op1, 0);
22748 else if (CONST_INT_P (op1))
22750 if (GET_CODE (op0) == SIGN_EXTEND)
22751 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22754 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22758 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22761 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22762 + nbits * ix86_cost->mult_bit
22763 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22772 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22773 /* ??? SSE cost should be used here. */
22774 *total = ix86_cost->fdiv;
22775 else if (X87_FLOAT_MODE_P (mode))
22776 *total = ix86_cost->fdiv;
22777 else if (FLOAT_MODE_P (mode))
22778 /* ??? SSE vector cost should be used here. */
22779 *total = ix86_cost->fdiv;
22781 *total = ix86_cost->divide[MODE_INDEX (mode)];
22785 if (GET_MODE_CLASS (mode) == MODE_INT
22786 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22788 if (GET_CODE (XEXP (x, 0)) == PLUS
22789 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22790 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22791 && CONSTANT_P (XEXP (x, 1)))
22793 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22794 if (val == 2 || val == 4 || val == 8)
22796 *total = ix86_cost->lea;
22797 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22798 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22800 *total += rtx_cost (XEXP (x, 1), outer_code);
22804 else if (GET_CODE (XEXP (x, 0)) == MULT
22805 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22807 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22808 if (val == 2 || val == 4 || val == 8)
22810 *total = ix86_cost->lea;
22811 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22812 *total += rtx_cost (XEXP (x, 1), outer_code);
22816 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22818 *total = ix86_cost->lea;
22819 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22820 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22821 *total += rtx_cost (XEXP (x, 1), outer_code);
22828 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22830 /* ??? SSE cost should be used here. */
22831 *total = ix86_cost->fadd;
22834 else if (X87_FLOAT_MODE_P (mode))
22836 *total = ix86_cost->fadd;
22839 else if (FLOAT_MODE_P (mode))
22841 /* ??? SSE vector cost should be used here. */
22842 *total = ix86_cost->fadd;
22850 if (!TARGET_64BIT && mode == DImode)
22852 *total = (ix86_cost->add * 2
22853 + (rtx_cost (XEXP (x, 0), outer_code)
22854 << (GET_MODE (XEXP (x, 0)) != DImode))
22855 + (rtx_cost (XEXP (x, 1), outer_code)
22856 << (GET_MODE (XEXP (x, 1)) != DImode)));
22862 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22864 /* ??? SSE cost should be used here. */
22865 *total = ix86_cost->fchs;
22868 else if (X87_FLOAT_MODE_P (mode))
22870 *total = ix86_cost->fchs;
22873 else if (FLOAT_MODE_P (mode))
22875 /* ??? SSE vector cost should be used here. */
22876 *total = ix86_cost->fchs;
22882 if (!TARGET_64BIT && mode == DImode)
22883 *total = ix86_cost->add * 2;
22885 *total = ix86_cost->add;
22889 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22890 && XEXP (XEXP (x, 0), 1) == const1_rtx
22891 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22892 && XEXP (x, 1) == const0_rtx)
22894 /* This kind of construct is implemented using test[bwl].
22895 Treat it as if we had an AND. */
22896 *total = (ix86_cost->add
22897 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22898 + rtx_cost (const1_rtx, outer_code));
22904 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22909 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22910 /* ??? SSE cost should be used here. */
22911 *total = ix86_cost->fabs;
22912 else if (X87_FLOAT_MODE_P (mode))
22913 *total = ix86_cost->fabs;
22914 else if (FLOAT_MODE_P (mode))
22915 /* ??? SSE vector cost should be used here. */
22916 *total = ix86_cost->fabs;
22920 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22921 /* ??? SSE cost should be used here. */
22922 *total = ix86_cost->fsqrt;
22923 else if (X87_FLOAT_MODE_P (mode))
22924 *total = ix86_cost->fsqrt;
22925 else if (FLOAT_MODE_P (mode))
22926 /* ??? SSE vector cost should be used here. */
22927 *total = ix86_cost->fsqrt;
22931 if (XINT (x, 1) == UNSPEC_TP)
22942 static int current_machopic_label_num;
22944 /* Given a symbol name and its associated stub, write out the
22945 definition of the stub. */
22948 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22950 unsigned int length;
22951 char *binder_name, *symbol_name, lazy_ptr_name[32];
22952 int label = ++current_machopic_label_num;
22954 /* For 64-bit we shouldn't get here. */
22955 gcc_assert (!TARGET_64BIT);
22957 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22958 symb = (*targetm.strip_name_encoding) (symb);
22960 length = strlen (stub);
22961 binder_name = XALLOCAVEC (char, length + 32);
22962 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22964 length = strlen (symb);
22965 symbol_name = XALLOCAVEC (char, length + 32);
22966 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22968 sprintf (lazy_ptr_name, "L%d$lz", label);
22971 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22973 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22975 fprintf (file, "%s:\n", stub);
22976 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22980 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22981 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22982 fprintf (file, "\tjmp\t*%%edx\n");
22985 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22987 fprintf (file, "%s:\n", binder_name);
22991 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22992 fprintf (file, "\tpushl\t%%eax\n");
22995 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22997 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22999 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
23000 fprintf (file, "%s:\n", lazy_ptr_name);
23001 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23002 fprintf (file, "\t.long %s\n", binder_name);
23006 darwin_x86_file_end (void)
23008 darwin_file_end ();
23011 #endif /* TARGET_MACHO */
23013 /* Order the registers for register allocator. */
23016 x86_order_regs_for_local_alloc (void)
23021 /* First allocate the local general purpose registers. */
23022 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23023 if (GENERAL_REGNO_P (i) && call_used_regs[i])
23024 reg_alloc_order [pos++] = i;
23026 /* Global general purpose registers. */
23027 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23028 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23029 reg_alloc_order [pos++] = i;
23031 /* x87 registers come first in case we are doing FP math
23033 if (!TARGET_SSE_MATH)
23034 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23035 reg_alloc_order [pos++] = i;
23037 /* SSE registers. */
23038 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23039 reg_alloc_order [pos++] = i;
23040 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23041 reg_alloc_order [pos++] = i;
23043 /* x87 registers. */
23044 if (TARGET_SSE_MATH)
23045 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23046 reg_alloc_order [pos++] = i;
23048 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23049 reg_alloc_order [pos++] = i;
23051 /* Initialize the rest of array as we do not allocate some registers
23053 while (pos < FIRST_PSEUDO_REGISTER)
23054 reg_alloc_order [pos++] = 0;
23057 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23058 struct attribute_spec.handler. */
23060 ix86_handle_struct_attribute (tree *node, tree name,
23061 tree args ATTRIBUTE_UNUSED,
23062 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23065 if (DECL_P (*node))
23067 if (TREE_CODE (*node) == TYPE_DECL)
23068 type = &TREE_TYPE (*node);
23073 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23074 || TREE_CODE (*type) == UNION_TYPE)))
23076 warning (OPT_Wattributes, "%qs attribute ignored",
23077 IDENTIFIER_POINTER (name));
23078 *no_add_attrs = true;
23081 else if ((is_attribute_p ("ms_struct", name)
23082 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23083 || ((is_attribute_p ("gcc_struct", name)
23084 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23086 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
23087 IDENTIFIER_POINTER (name));
23088 *no_add_attrs = true;
23095 ix86_ms_bitfield_layout_p (const_tree record_type)
23097 return (TARGET_MS_BITFIELD_LAYOUT &&
23098 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23099 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
23102 /* Returns an expression indicating where the this parameter is
23103 located on entry to the FUNCTION. */
23106 x86_this_parameter (tree function)
23108 tree type = TREE_TYPE (function);
23109 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23114 const int *parm_regs;
23116 if (ix86_function_type_abi (type) == MS_ABI)
23117 parm_regs = x86_64_ms_abi_int_parameter_registers;
23119 parm_regs = x86_64_int_parameter_registers;
23120 return gen_rtx_REG (DImode, parm_regs[aggr]);
23123 nregs = ix86_function_regparm (type, function);
23125 if (nregs > 0 && !stdarg_p (type))
23129 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23130 regno = aggr ? DX_REG : CX_REG;
23138 return gen_rtx_MEM (SImode,
23139 plus_constant (stack_pointer_rtx, 4));
23142 return gen_rtx_REG (SImode, regno);
23145 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23148 /* Determine whether x86_output_mi_thunk can succeed. */
23151 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23152 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23153 HOST_WIDE_INT vcall_offset, const_tree function)
23155 /* 64-bit can handle anything. */
23159 /* For 32-bit, everything's fine if we have one free register. */
23160 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23163 /* Need a free register for vcall_offset. */
23167 /* Need a free register for GOT references. */
23168 if (flag_pic && !(*targetm.binds_local_p) (function))
23171 /* Otherwise ok. */
23175 /* Output the assembler code for a thunk function. THUNK_DECL is the
23176 declaration for the thunk function itself, FUNCTION is the decl for
23177 the target function. DELTA is an immediate constant offset to be
23178 added to THIS. If VCALL_OFFSET is nonzero, the word at
23179 *(*this + vcall_offset) should be added to THIS. */
23182 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23183 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23184 HOST_WIDE_INT vcall_offset, tree function)
23187 rtx this_param = x86_this_parameter (function);
23190 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23191 pull it in now and let DELTA benefit. */
23192 if (REG_P (this_param))
23193 this_reg = this_param;
23194 else if (vcall_offset)
23196 /* Put the this parameter into %eax. */
23197 xops[0] = this_param;
23198 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23199 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23202 this_reg = NULL_RTX;
23204 /* Adjust the this parameter by a fixed constant. */
23207 xops[0] = GEN_INT (delta);
23208 xops[1] = this_reg ? this_reg : this_param;
23211 if (!x86_64_general_operand (xops[0], DImode))
23213 tmp = gen_rtx_REG (DImode, R10_REG);
23215 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23217 xops[1] = this_param;
23219 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23222 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23225 /* Adjust the this parameter by a value stored in the vtable. */
23229 tmp = gen_rtx_REG (DImode, R10_REG);
23232 int tmp_regno = CX_REG;
23233 if (lookup_attribute ("fastcall",
23234 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23235 tmp_regno = AX_REG;
23236 tmp = gen_rtx_REG (SImode, tmp_regno);
23239 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23241 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23243 /* Adjust the this parameter. */
23244 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23245 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23247 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23248 xops[0] = GEN_INT (vcall_offset);
23250 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23251 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23253 xops[1] = this_reg;
23254 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
23257 /* If necessary, drop THIS back to its stack slot. */
23258 if (this_reg && this_reg != this_param)
23260 xops[0] = this_reg;
23261 xops[1] = this_param;
23262 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23265 xops[0] = XEXP (DECL_RTL (function), 0);
23268 if (!flag_pic || (*targetm.binds_local_p) (function))
23269 output_asm_insn ("jmp\t%P0", xops);
23270 /* All thunks should be in the same object as their target,
23271 and thus binds_local_p should be true. */
23272 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
23273 gcc_unreachable ();
23276 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23277 tmp = gen_rtx_CONST (Pmode, tmp);
23278 tmp = gen_rtx_MEM (QImode, tmp);
23280 output_asm_insn ("jmp\t%A0", xops);
23285 if (!flag_pic || (*targetm.binds_local_p) (function))
23286 output_asm_insn ("jmp\t%P0", xops);
23291 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23292 tmp = (gen_rtx_SYMBOL_REF
23294 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23295 tmp = gen_rtx_MEM (QImode, tmp);
23297 output_asm_insn ("jmp\t%0", xops);
23300 #endif /* TARGET_MACHO */
23302 tmp = gen_rtx_REG (SImode, CX_REG);
23303 output_set_got (tmp, NULL_RTX);
23306 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23307 output_asm_insn ("jmp\t{*}%1", xops);
23313 x86_file_start (void)
23315 default_file_start ();
23317 darwin_file_start ();
23319 if (X86_FILE_START_VERSION_DIRECTIVE)
23320 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23321 if (X86_FILE_START_FLTUSED)
23322 fputs ("\t.global\t__fltused\n", asm_out_file);
23323 if (ix86_asm_dialect == ASM_INTEL)
23324 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23328 x86_field_alignment (tree field, int computed)
23330 enum machine_mode mode;
23331 tree type = TREE_TYPE (field);
23333 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23335 mode = TYPE_MODE (strip_array_types (type));
23336 if (mode == DFmode || mode == DCmode
23337 || GET_MODE_CLASS (mode) == MODE_INT
23338 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23339 return MIN (32, computed);
23343 /* Output assembler code to FILE to increment profiler label # LABELNO
23344 for profiling a function entry. */
23346 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23350 #ifndef NO_PROFILE_COUNTERS
23351 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23354 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
23355 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23357 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23361 #ifndef NO_PROFILE_COUNTERS
23362 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23363 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23365 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23369 #ifndef NO_PROFILE_COUNTERS
23370 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23371 PROFILE_COUNT_REGISTER);
23373 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23377 /* We don't have exact information about the insn sizes, but we may assume
23378 quite safely that we are informed about all 1 byte insns and memory
23379 address sizes. This is enough to eliminate unnecessary padding in
23383 min_insn_size (rtx insn)
23387 if (!INSN_P (insn) || !active_insn_p (insn))
23390 /* Discard alignments we've emit and jump instructions. */
23391 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23392 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23395 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23396 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23399 /* Important case - calls are always 5 bytes.
23400 It is common to have many calls in the row. */
23402 && symbolic_reference_mentioned_p (PATTERN (insn))
23403 && !SIBLING_CALL_P (insn))
23405 if (get_attr_length (insn) <= 1)
23408 /* For normal instructions we may rely on the sizes of addresses
23409 and the presence of symbol to require 4 bytes of encoding.
23410 This is not the case for jumps where references are PC relative. */
23411 if (!JUMP_P (insn))
23413 l = get_attr_length_address (insn);
23414 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23423 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23427 ix86_avoid_jump_misspredicts (void)
23429 rtx insn, start = get_insns ();
23430 int nbytes = 0, njumps = 0;
23433 /* Look for all minimal intervals of instructions containing 4 jumps.
23434 The intervals are bounded by START and INSN. NBYTES is the total
23435 size of instructions in the interval including INSN and not including
23436 START. When the NBYTES is smaller than 16 bytes, it is possible
23437 that the end of START and INSN ends up in the same 16byte page.
23439 The smallest offset in the page INSN can start is the case where START
23440 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23441 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23443 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23446 nbytes += min_insn_size (insn);
23448 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23449 INSN_UID (insn), min_insn_size (insn));
23451 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23452 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23460 start = NEXT_INSN (start);
23461 if ((JUMP_P (start)
23462 && GET_CODE (PATTERN (start)) != ADDR_VEC
23463 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23465 njumps--, isjump = 1;
23468 nbytes -= min_insn_size (start);
23470 gcc_assert (njumps >= 0);
23472 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23473 INSN_UID (start), INSN_UID (insn), nbytes);
23475 if (njumps == 3 && isjump && nbytes < 16)
23477 int padsize = 15 - nbytes + min_insn_size (insn);
23480 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23481 INSN_UID (insn), padsize);
23482 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23487 /* AMD Athlon works faster
23488 when RET is not destination of conditional jump or directly preceded
23489 by other jump instruction. We avoid the penalty by inserting NOP just
23490 before the RET instructions in such cases. */
23492 ix86_pad_returns (void)
23497 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23499 basic_block bb = e->src;
23500 rtx ret = BB_END (bb);
23502 bool replace = false;
23504 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23505 || !maybe_hot_bb_p (bb))
23507 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23508 if (active_insn_p (prev) || LABEL_P (prev))
23510 if (prev && LABEL_P (prev))
23515 FOR_EACH_EDGE (e, ei, bb->preds)
23516 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23517 && !(e->flags & EDGE_FALLTHRU))
23522 prev = prev_active_insn (ret);
23524 && ((JUMP_P (prev) && any_condjump_p (prev))
23527 /* Empty functions get branch mispredict even when the jump destination
23528 is not visible to us. */
23529 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23534 emit_insn_before (gen_return_internal_long (), ret);
23540 /* Implement machine specific optimizations. We implement padding of returns
23541 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23545 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23546 ix86_pad_returns ();
23547 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23548 ix86_avoid_jump_misspredicts ();
23551 /* Return nonzero when QImode register that must be represented via REX prefix
23554 x86_extended_QIreg_mentioned_p (rtx insn)
23557 extract_insn_cached (insn);
23558 for (i = 0; i < recog_data.n_operands; i++)
23559 if (REG_P (recog_data.operand[i])
23560 && REGNO (recog_data.operand[i]) >= 4)
23565 /* Return nonzero when P points to register encoded via REX prefix.
23566 Called via for_each_rtx. */
23568 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23570 unsigned int regno;
23573 regno = REGNO (*p);
23574 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23577 /* Return true when INSN mentions register that must be encoded using REX
23580 x86_extended_reg_mentioned_p (rtx insn)
23582 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23585 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23586 optabs would emit if we didn't have TFmode patterns. */
23589 x86_emit_floatuns (rtx operands[2])
23591 rtx neglab, donelab, i0, i1, f0, in, out;
23592 enum machine_mode mode, inmode;
23594 inmode = GET_MODE (operands[1]);
23595 gcc_assert (inmode == SImode || inmode == DImode);
23598 in = force_reg (inmode, operands[1]);
23599 mode = GET_MODE (out);
23600 neglab = gen_label_rtx ();
23601 donelab = gen_label_rtx ();
23602 f0 = gen_reg_rtx (mode);
23604 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23606 expand_float (out, in, 0);
23608 emit_jump_insn (gen_jump (donelab));
23611 emit_label (neglab);
23613 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23615 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23617 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23619 expand_float (f0, i0, 0);
23621 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23623 emit_label (donelab);
23626 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23627 with all elements equal to VAR. Return true if successful. */
23630 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23631 rtx target, rtx val)
23633 enum machine_mode smode, wsmode, wvmode;
23648 val = force_reg (GET_MODE_INNER (mode), val);
23649 x = gen_rtx_VEC_DUPLICATE (mode, val);
23650 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23656 if (TARGET_SSE || TARGET_3DNOW_A)
23658 val = gen_lowpart (SImode, val);
23659 x = gen_rtx_TRUNCATE (HImode, val);
23660 x = gen_rtx_VEC_DUPLICATE (mode, x);
23661 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23683 /* Extend HImode to SImode using a paradoxical SUBREG. */
23684 tmp1 = gen_reg_rtx (SImode);
23685 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23686 /* Insert the SImode value as low element of V4SImode vector. */
23687 tmp2 = gen_reg_rtx (V4SImode);
23688 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23689 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23690 CONST0_RTX (V4SImode),
23692 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23693 /* Cast the V4SImode vector back to a V8HImode vector. */
23694 tmp1 = gen_reg_rtx (V8HImode);
23695 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23696 /* Duplicate the low short through the whole low SImode word. */
23697 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23698 /* Cast the V8HImode vector back to a V4SImode vector. */
23699 tmp2 = gen_reg_rtx (V4SImode);
23700 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23701 /* Replicate the low element of the V4SImode vector. */
23702 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23703 /* Cast the V2SImode back to V8HImode, and store in target. */
23704 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23715 /* Extend QImode to SImode using a paradoxical SUBREG. */
23716 tmp1 = gen_reg_rtx (SImode);
23717 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23718 /* Insert the SImode value as low element of V4SImode vector. */
23719 tmp2 = gen_reg_rtx (V4SImode);
23720 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23721 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23722 CONST0_RTX (V4SImode),
23724 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23725 /* Cast the V4SImode vector back to a V16QImode vector. */
23726 tmp1 = gen_reg_rtx (V16QImode);
23727 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23728 /* Duplicate the low byte through the whole low SImode word. */
23729 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23730 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23731 /* Cast the V16QImode vector back to a V4SImode vector. */
23732 tmp2 = gen_reg_rtx (V4SImode);
23733 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23734 /* Replicate the low element of the V4SImode vector. */
23735 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23736 /* Cast the V2SImode back to V16QImode, and store in target. */
23737 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23745 /* Replicate the value once into the next wider mode and recurse. */
23746 val = convert_modes (wsmode, smode, val, true);
23747 x = expand_simple_binop (wsmode, ASHIFT, val,
23748 GEN_INT (GET_MODE_BITSIZE (smode)),
23749 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23750 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23752 x = gen_reg_rtx (wvmode);
23753 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23754 gcc_unreachable ();
23755 emit_move_insn (target, gen_lowpart (mode, x));
23763 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23764 whose ONE_VAR element is VAR, and other elements are zero. Return true
23768 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23769 rtx target, rtx var, int one_var)
23771 enum machine_mode vsimode;
23774 bool use_vector_set = false;
23779 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23784 use_vector_set = TARGET_SSE4_1;
23787 use_vector_set = TARGET_SSE2;
23790 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23796 if (use_vector_set)
23798 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23799 var = force_reg (GET_MODE_INNER (mode), var);
23800 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23816 var = force_reg (GET_MODE_INNER (mode), var);
23817 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23818 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23823 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23824 new_target = gen_reg_rtx (mode);
23826 new_target = target;
23827 var = force_reg (GET_MODE_INNER (mode), var);
23828 x = gen_rtx_VEC_DUPLICATE (mode, var);
23829 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23830 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23833 /* We need to shuffle the value to the correct position, so
23834 create a new pseudo to store the intermediate result. */
23836 /* With SSE2, we can use the integer shuffle insns. */
23837 if (mode != V4SFmode && TARGET_SSE2)
23839 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23841 GEN_INT (one_var == 1 ? 0 : 1),
23842 GEN_INT (one_var == 2 ? 0 : 1),
23843 GEN_INT (one_var == 3 ? 0 : 1)));
23844 if (target != new_target)
23845 emit_move_insn (target, new_target);
23849 /* Otherwise convert the intermediate result to V4SFmode and
23850 use the SSE1 shuffle instructions. */
23851 if (mode != V4SFmode)
23853 tmp = gen_reg_rtx (V4SFmode);
23854 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23859 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23861 GEN_INT (one_var == 1 ? 0 : 1),
23862 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23863 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23865 if (mode != V4SFmode)
23866 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23867 else if (tmp != target)
23868 emit_move_insn (target, tmp);
23870 else if (target != new_target)
23871 emit_move_insn (target, new_target);
23876 vsimode = V4SImode;
23882 vsimode = V2SImode;
23888 /* Zero extend the variable element to SImode and recurse. */
23889 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23891 x = gen_reg_rtx (vsimode);
23892 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23894 gcc_unreachable ();
23896 emit_move_insn (target, gen_lowpart (mode, x));
23904 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23905 consisting of the values in VALS. It is known that all elements
23906 except ONE_VAR are constants. Return true if successful. */
23909 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23910 rtx target, rtx vals, int one_var)
23912 rtx var = XVECEXP (vals, 0, one_var);
23913 enum machine_mode wmode;
23916 const_vec = copy_rtx (vals);
23917 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23918 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23926 /* For the two element vectors, it's just as easy to use
23927 the general case. */
23945 /* There's no way to set one QImode entry easily. Combine
23946 the variable value with its adjacent constant value, and
23947 promote to an HImode set. */
23948 x = XVECEXP (vals, 0, one_var ^ 1);
23951 var = convert_modes (HImode, QImode, var, true);
23952 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23953 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23954 x = GEN_INT (INTVAL (x) & 0xff);
23958 var = convert_modes (HImode, QImode, var, true);
23959 x = gen_int_mode (INTVAL (x) << 8, HImode);
23961 if (x != const0_rtx)
23962 var = expand_simple_binop (HImode, IOR, var, x, var,
23963 1, OPTAB_LIB_WIDEN);
23965 x = gen_reg_rtx (wmode);
23966 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23967 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23969 emit_move_insn (target, gen_lowpart (mode, x));
23976 emit_move_insn (target, const_vec);
23977 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23981 /* A subroutine of ix86_expand_vector_init_general. Use vector
23982 concatenate to handle the most general case: all values variable,
23983 and none identical. */
23986 ix86_expand_vector_init_concat (enum machine_mode mode,
23987 rtx target, rtx *ops, int n)
23989 enum machine_mode cmode, hmode = VOIDmode;
23990 rtx first[4], second[2];
24018 gcc_unreachable ();
24021 if (!register_operand (ops[1], cmode))
24022 ops[1] = force_reg (cmode, ops[1]);
24023 if (!register_operand (ops[0], cmode))
24024 ops[0] = force_reg (cmode, ops[0]);
24025 emit_insn (gen_rtx_SET (VOIDmode, target,
24026 gen_rtx_VEC_CONCAT (mode, ops[0],
24040 gcc_unreachable ();
24045 /* FIXME: We process inputs backward to help RA. PR 36222. */
24048 for (; i > 0; i -= 2, j--)
24050 first[j] = gen_reg_rtx (cmode);
24051 v = gen_rtvec (2, ops[i - 1], ops[i]);
24052 ix86_expand_vector_init (false, first[j],
24053 gen_rtx_PARALLEL (cmode, v));
24059 gcc_assert (hmode != VOIDmode);
24060 for (i = j = 0; i < n; i += 2, j++)
24062 second[j] = gen_reg_rtx (hmode);
24063 ix86_expand_vector_init_concat (hmode, second [j],
24067 ix86_expand_vector_init_concat (mode, target, second, n);
24070 ix86_expand_vector_init_concat (mode, target, first, n);
24074 gcc_unreachable ();
24078 /* A subroutine of ix86_expand_vector_init_general. Use vector
24079 interleave to handle the most general case: all values variable,
24080 and none identical. */
24083 ix86_expand_vector_init_interleave (enum machine_mode mode,
24084 rtx target, rtx *ops, int n)
24086 enum machine_mode first_imode, second_imode, third_imode;
24089 rtx (*gen_load_even) (rtx, rtx, rtx);
24090 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24091 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24096 gen_load_even = gen_vec_setv8hi;
24097 gen_interleave_first_low = gen_vec_interleave_lowv4si;
24098 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24099 first_imode = V4SImode;
24100 second_imode = V2DImode;
24101 third_imode = VOIDmode;
24104 gen_load_even = gen_vec_setv16qi;
24105 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24106 gen_interleave_second_low = gen_vec_interleave_lowv4si;
24107 first_imode = V8HImode;
24108 second_imode = V4SImode;
24109 third_imode = V2DImode;
24112 gcc_unreachable ();
24115 for (i = 0; i < n; i++)
24117 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24118 op0 = gen_reg_rtx (SImode);
24119 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24121 /* Insert the SImode value as low element of V4SImode vector. */
24122 op1 = gen_reg_rtx (V4SImode);
24123 op0 = gen_rtx_VEC_MERGE (V4SImode,
24124 gen_rtx_VEC_DUPLICATE (V4SImode,
24126 CONST0_RTX (V4SImode),
24128 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24130 /* Cast the V4SImode vector back to a vector in orignal mode. */
24131 op0 = gen_reg_rtx (mode);
24132 emit_move_insn (op0, gen_lowpart (mode, op1));
24134 /* Load even elements into the second positon. */
24135 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24138 /* Cast vector to FIRST_IMODE vector. */
24139 ops[i] = gen_reg_rtx (first_imode);
24140 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24143 /* Interleave low FIRST_IMODE vectors. */
24144 for (i = j = 0; i < n; i += 2, j++)
24146 op0 = gen_reg_rtx (first_imode);
24147 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24149 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24150 ops[j] = gen_reg_rtx (second_imode);
24151 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24154 /* Interleave low SECOND_IMODE vectors. */
24155 switch (second_imode)
24158 for (i = j = 0; i < n / 2; i += 2, j++)
24160 op0 = gen_reg_rtx (second_imode);
24161 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24164 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24166 ops[j] = gen_reg_rtx (third_imode);
24167 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24169 second_imode = V2DImode;
24170 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24174 op0 = gen_reg_rtx (second_imode);
24175 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24178 /* Cast the SECOND_IMODE vector back to a vector on original
24180 emit_insn (gen_rtx_SET (VOIDmode, target,
24181 gen_lowpart (mode, op0)));
24185 gcc_unreachable ();
24189 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
24190 all values variable, and none identical. */
24193 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24194 rtx target, rtx vals)
24203 if (!mmx_ok && !TARGET_SSE)
24211 n = GET_MODE_NUNITS (mode);
24212 for (i = 0; i < n; i++)
24213 ops[i] = XVECEXP (vals, 0, i);
24214 ix86_expand_vector_init_concat (mode, target, ops, n);
24218 if (!TARGET_SSE4_1)
24226 n = GET_MODE_NUNITS (mode);
24227 for (i = 0; i < n; i++)
24228 ops[i] = XVECEXP (vals, 0, i);
24229 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24237 gcc_unreachable ();
24241 int i, j, n_elts, n_words, n_elt_per_word;
24242 enum machine_mode inner_mode;
24243 rtx words[4], shift;
24245 inner_mode = GET_MODE_INNER (mode);
24246 n_elts = GET_MODE_NUNITS (mode);
24247 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24248 n_elt_per_word = n_elts / n_words;
24249 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24251 for (i = 0; i < n_words; ++i)
24253 rtx word = NULL_RTX;
24255 for (j = 0; j < n_elt_per_word; ++j)
24257 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24258 elt = convert_modes (word_mode, inner_mode, elt, true);
24264 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24265 word, 1, OPTAB_LIB_WIDEN);
24266 word = expand_simple_binop (word_mode, IOR, word, elt,
24267 word, 1, OPTAB_LIB_WIDEN);
24275 emit_move_insn (target, gen_lowpart (mode, words[0]));
24276 else if (n_words == 2)
24278 rtx tmp = gen_reg_rtx (mode);
24279 emit_clobber (tmp);
24280 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24281 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24282 emit_move_insn (target, tmp);
24284 else if (n_words == 4)
24286 rtx tmp = gen_reg_rtx (V4SImode);
24287 gcc_assert (word_mode == SImode);
24288 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24289 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24290 emit_move_insn (target, gen_lowpart (mode, tmp));
24293 gcc_unreachable ();
24297 /* Initialize vector TARGET via VALS. Suppress the use of MMX
24298 instructions unless MMX_OK is true. */
24301 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24303 enum machine_mode mode = GET_MODE (target);
24304 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24305 int n_elts = GET_MODE_NUNITS (mode);
24306 int n_var = 0, one_var = -1;
24307 bool all_same = true, all_const_zero = true;
24311 for (i = 0; i < n_elts; ++i)
24313 x = XVECEXP (vals, 0, i);
24314 if (!(CONST_INT_P (x)
24315 || GET_CODE (x) == CONST_DOUBLE
24316 || GET_CODE (x) == CONST_FIXED))
24317 n_var++, one_var = i;
24318 else if (x != CONST0_RTX (inner_mode))
24319 all_const_zero = false;
24320 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24324 /* Constants are best loaded from the constant pool. */
24327 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24331 /* If all values are identical, broadcast the value. */
24333 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24334 XVECEXP (vals, 0, 0)))
24337 /* Values where only one field is non-constant are best loaded from
24338 the pool and overwritten via move later. */
24342 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24343 XVECEXP (vals, 0, one_var),
24347 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24351 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24355 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24357 enum machine_mode mode = GET_MODE (target);
24358 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24359 bool use_vec_merge = false;
24368 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24369 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24371 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24373 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24374 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24380 use_vec_merge = TARGET_SSE4_1;
24388 /* For the two element vectors, we implement a VEC_CONCAT with
24389 the extraction of the other element. */
24391 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24392 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24395 op0 = val, op1 = tmp;
24397 op0 = tmp, op1 = val;
24399 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24400 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24405 use_vec_merge = TARGET_SSE4_1;
24412 use_vec_merge = true;
24416 /* tmp = target = A B C D */
24417 tmp = copy_to_reg (target);
24418 /* target = A A B B */
24419 emit_insn (gen_sse_unpcklps (target, target, target));
24420 /* target = X A B B */
24421 ix86_expand_vector_set (false, target, val, 0);
24422 /* target = A X C D */
24423 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24424 GEN_INT (1), GEN_INT (0),
24425 GEN_INT (2+4), GEN_INT (3+4)));
24429 /* tmp = target = A B C D */
24430 tmp = copy_to_reg (target);
24431 /* tmp = X B C D */
24432 ix86_expand_vector_set (false, tmp, val, 0);
24433 /* target = A B X D */
24434 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24435 GEN_INT (0), GEN_INT (1),
24436 GEN_INT (0+4), GEN_INT (3+4)));
24440 /* tmp = target = A B C D */
24441 tmp = copy_to_reg (target);
24442 /* tmp = X B C D */
24443 ix86_expand_vector_set (false, tmp, val, 0);
24444 /* target = A B X D */
24445 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24446 GEN_INT (0), GEN_INT (1),
24447 GEN_INT (2+4), GEN_INT (0+4)));
24451 gcc_unreachable ();
24456 use_vec_merge = TARGET_SSE4_1;
24460 /* Element 0 handled by vec_merge below. */
24463 use_vec_merge = true;
24469 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24470 store into element 0, then shuffle them back. */
24474 order[0] = GEN_INT (elt);
24475 order[1] = const1_rtx;
24476 order[2] = const2_rtx;
24477 order[3] = GEN_INT (3);
24478 order[elt] = const0_rtx;
24480 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24481 order[1], order[2], order[3]));
24483 ix86_expand_vector_set (false, target, val, 0);
24485 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24486 order[1], order[2], order[3]));
24490 /* For SSE1, we have to reuse the V4SF code. */
24491 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24492 gen_lowpart (SFmode, val), elt);
24497 use_vec_merge = TARGET_SSE2;
24500 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24504 use_vec_merge = TARGET_SSE4_1;
24514 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24515 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24516 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24520 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24522 emit_move_insn (mem, target);
24524 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24525 emit_move_insn (tmp, val);
24527 emit_move_insn (target, mem);
24532 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24534 enum machine_mode mode = GET_MODE (vec);
24535 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24536 bool use_vec_extr = false;
24549 use_vec_extr = true;
24553 use_vec_extr = TARGET_SSE4_1;
24565 tmp = gen_reg_rtx (mode);
24566 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24567 GEN_INT (elt), GEN_INT (elt),
24568 GEN_INT (elt+4), GEN_INT (elt+4)));
24572 tmp = gen_reg_rtx (mode);
24573 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24577 gcc_unreachable ();
24580 use_vec_extr = true;
24585 use_vec_extr = TARGET_SSE4_1;
24599 tmp = gen_reg_rtx (mode);
24600 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24601 GEN_INT (elt), GEN_INT (elt),
24602 GEN_INT (elt), GEN_INT (elt)));
24606 tmp = gen_reg_rtx (mode);
24607 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24611 gcc_unreachable ();
24614 use_vec_extr = true;
24619 /* For SSE1, we have to reuse the V4SF code. */
24620 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24621 gen_lowpart (V4SFmode, vec), elt);
24627 use_vec_extr = TARGET_SSE2;
24630 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24634 use_vec_extr = TARGET_SSE4_1;
24638 /* ??? Could extract the appropriate HImode element and shift. */
24645 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24646 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24648 /* Let the rtl optimizers know about the zero extension performed. */
24649 if (inner_mode == QImode || inner_mode == HImode)
24651 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24652 target = gen_lowpart (SImode, target);
24655 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24659 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24661 emit_move_insn (mem, vec);
24663 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24664 emit_move_insn (target, tmp);
24668 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24669 pattern to reduce; DEST is the destination; IN is the input vector. */
24672 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24674 rtx tmp1, tmp2, tmp3;
24676 tmp1 = gen_reg_rtx (V4SFmode);
24677 tmp2 = gen_reg_rtx (V4SFmode);
24678 tmp3 = gen_reg_rtx (V4SFmode);
24680 emit_insn (gen_sse_movhlps (tmp1, in, in));
24681 emit_insn (fn (tmp2, tmp1, in));
24683 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24684 GEN_INT (1), GEN_INT (1),
24685 GEN_INT (1+4), GEN_INT (1+4)));
24686 emit_insn (fn (dest, tmp2, tmp3));
24689 /* Target hook for scalar_mode_supported_p. */
24691 ix86_scalar_mode_supported_p (enum machine_mode mode)
24693 if (DECIMAL_FLOAT_MODE_P (mode))
24695 else if (mode == TFmode)
24698 return default_scalar_mode_supported_p (mode);
24701 /* Implements target hook vector_mode_supported_p. */
24703 ix86_vector_mode_supported_p (enum machine_mode mode)
24705 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24707 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24709 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24711 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24716 /* Target hook for c_mode_for_suffix. */
24717 static enum machine_mode
24718 ix86_c_mode_for_suffix (char suffix)
24728 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24730 We do this in the new i386 backend to maintain source compatibility
24731 with the old cc0-based compiler. */
24734 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24735 tree inputs ATTRIBUTE_UNUSED,
24738 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24740 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24745 /* Implements target vector targetm.asm.encode_section_info. This
24746 is not used by netware. */
24748 static void ATTRIBUTE_UNUSED
24749 ix86_encode_section_info (tree decl, rtx rtl, int first)
24751 default_encode_section_info (decl, rtl, first);
24753 if (TREE_CODE (decl) == VAR_DECL
24754 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24755 && ix86_in_large_data_p (decl))
24756 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24759 /* Worker function for REVERSE_CONDITION. */
24762 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24764 return (mode != CCFPmode && mode != CCFPUmode
24765 ? reverse_condition (code)
24766 : reverse_condition_maybe_unordered (code));
24769 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24773 output_387_reg_move (rtx insn, rtx *operands)
24775 if (REG_P (operands[0]))
24777 if (REG_P (operands[1])
24778 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24780 if (REGNO (operands[0]) == FIRST_STACK_REG)
24781 return output_387_ffreep (operands, 0);
24782 return "fstp\t%y0";
24784 if (STACK_TOP_P (operands[0]))
24785 return "fld%z1\t%y1";
24788 else if (MEM_P (operands[0]))
24790 gcc_assert (REG_P (operands[1]));
24791 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24792 return "fstp%z0\t%y0";
24795 /* There is no non-popping store to memory for XFmode.
24796 So if we need one, follow the store with a load. */
24797 if (GET_MODE (operands[0]) == XFmode)
24798 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24800 return "fst%z0\t%y0";
24807 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24808 FP status register is set. */
24811 ix86_emit_fp_unordered_jump (rtx label)
24813 rtx reg = gen_reg_rtx (HImode);
24816 emit_insn (gen_x86_fnstsw_1 (reg));
24818 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24820 emit_insn (gen_x86_sahf_1 (reg));
24822 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24823 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24827 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24829 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24830 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24833 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24834 gen_rtx_LABEL_REF (VOIDmode, label),
24836 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24838 emit_jump_insn (temp);
24839 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24842 /* Output code to perform a log1p XFmode calculation. */
24844 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24846 rtx label1 = gen_label_rtx ();
24847 rtx label2 = gen_label_rtx ();
24849 rtx tmp = gen_reg_rtx (XFmode);
24850 rtx tmp2 = gen_reg_rtx (XFmode);
24852 emit_insn (gen_absxf2 (tmp, op1));
24853 emit_insn (gen_cmpxf (tmp,
24854 CONST_DOUBLE_FROM_REAL_VALUE (
24855 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24857 emit_jump_insn (gen_bge (label1));
24859 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24860 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24861 emit_jump (label2);
24863 emit_label (label1);
24864 emit_move_insn (tmp, CONST1_RTX (XFmode));
24865 emit_insn (gen_addxf3 (tmp, op1, tmp));
24866 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24867 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24869 emit_label (label2);
24872 /* Output code to perform a Newton-Rhapson approximation of a single precision
24873 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24875 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24877 rtx x0, x1, e0, e1, two;
24879 x0 = gen_reg_rtx (mode);
24880 e0 = gen_reg_rtx (mode);
24881 e1 = gen_reg_rtx (mode);
24882 x1 = gen_reg_rtx (mode);
24884 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24886 if (VECTOR_MODE_P (mode))
24887 two = ix86_build_const_vector (SFmode, true, two);
24889 two = force_reg (mode, two);
24891 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24893 /* x0 = rcp(b) estimate */
24894 emit_insn (gen_rtx_SET (VOIDmode, x0,
24895 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24898 emit_insn (gen_rtx_SET (VOIDmode, e0,
24899 gen_rtx_MULT (mode, x0, b)));
24901 emit_insn (gen_rtx_SET (VOIDmode, e1,
24902 gen_rtx_MINUS (mode, two, e0)));
24904 emit_insn (gen_rtx_SET (VOIDmode, x1,
24905 gen_rtx_MULT (mode, x0, e1)));
24907 emit_insn (gen_rtx_SET (VOIDmode, res,
24908 gen_rtx_MULT (mode, a, x1)));
24911 /* Output code to perform a Newton-Rhapson approximation of a
24912 single precision floating point [reciprocal] square root. */
24914 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24917 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24920 x0 = gen_reg_rtx (mode);
24921 e0 = gen_reg_rtx (mode);
24922 e1 = gen_reg_rtx (mode);
24923 e2 = gen_reg_rtx (mode);
24924 e3 = gen_reg_rtx (mode);
24926 real_from_integer (&r, VOIDmode, -3, -1, 0);
24927 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24929 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24930 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24932 if (VECTOR_MODE_P (mode))
24934 mthree = ix86_build_const_vector (SFmode, true, mthree);
24935 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24938 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24939 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24941 /* x0 = rsqrt(a) estimate */
24942 emit_insn (gen_rtx_SET (VOIDmode, x0,
24943 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24946 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24951 zero = gen_reg_rtx (mode);
24952 mask = gen_reg_rtx (mode);
24954 zero = force_reg (mode, CONST0_RTX(mode));
24955 emit_insn (gen_rtx_SET (VOIDmode, mask,
24956 gen_rtx_NE (mode, zero, a)));
24958 emit_insn (gen_rtx_SET (VOIDmode, x0,
24959 gen_rtx_AND (mode, x0, mask)));
24963 emit_insn (gen_rtx_SET (VOIDmode, e0,
24964 gen_rtx_MULT (mode, x0, a)));
24966 emit_insn (gen_rtx_SET (VOIDmode, e1,
24967 gen_rtx_MULT (mode, e0, x0)));
24970 mthree = force_reg (mode, mthree);
24971 emit_insn (gen_rtx_SET (VOIDmode, e2,
24972 gen_rtx_PLUS (mode, e1, mthree)));
24974 mhalf = force_reg (mode, mhalf);
24976 /* e3 = -.5 * x0 */
24977 emit_insn (gen_rtx_SET (VOIDmode, e3,
24978 gen_rtx_MULT (mode, x0, mhalf)));
24980 /* e3 = -.5 * e0 */
24981 emit_insn (gen_rtx_SET (VOIDmode, e3,
24982 gen_rtx_MULT (mode, e0, mhalf)));
24983 /* ret = e2 * e3 */
24984 emit_insn (gen_rtx_SET (VOIDmode, res,
24985 gen_rtx_MULT (mode, e2, e3)));
24988 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24990 static void ATTRIBUTE_UNUSED
24991 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24994 /* With Binutils 2.15, the "@unwind" marker must be specified on
24995 every occurrence of the ".eh_frame" section, not just the first
24998 && strcmp (name, ".eh_frame") == 0)
25000 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25001 flags & SECTION_WRITE ? "aw" : "a");
25004 default_elf_asm_named_section (name, flags, decl);
25007 /* Return the mangling of TYPE if it is an extended fundamental type. */
25009 static const char *
25010 ix86_mangle_type (const_tree type)
25012 type = TYPE_MAIN_VARIANT (type);
25014 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25015 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25018 switch (TYPE_MODE (type))
25021 /* __float128 is "g". */
25024 /* "long double" or __float80 is "e". */
25031 /* For 32-bit code we can save PIC register setup by using
25032 __stack_chk_fail_local hidden function instead of calling
25033 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25034 register, so it is better to call __stack_chk_fail directly. */
25037 ix86_stack_protect_fail (void)
25039 return TARGET_64BIT
25040 ? default_external_stack_protect_fail ()
25041 : default_hidden_stack_protect_fail ();
25044 /* Select a format to encode pointers in exception handling data. CODE
25045 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25046 true if the symbol may be affected by dynamic relocations.
25048 ??? All x86 object file formats are capable of representing this.
25049 After all, the relocation needed is the same as for the call insn.
25050 Whether or not a particular assembler allows us to enter such, I
25051 guess we'll have to see. */
25053 asm_preferred_eh_data_format (int code, int global)
25057 int type = DW_EH_PE_sdata8;
25059 || ix86_cmodel == CM_SMALL_PIC
25060 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25061 type = DW_EH_PE_sdata4;
25062 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25064 if (ix86_cmodel == CM_SMALL
25065 || (ix86_cmodel == CM_MEDIUM && code))
25066 return DW_EH_PE_udata4;
25067 return DW_EH_PE_absptr;
25070 /* Expand copysign from SIGN to the positive value ABS_VALUE
25071 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
25074 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
25076 enum machine_mode mode = GET_MODE (sign);
25077 rtx sgn = gen_reg_rtx (mode);
25078 if (mask == NULL_RTX)
25080 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25081 if (!VECTOR_MODE_P (mode))
25083 /* We need to generate a scalar mode mask in this case. */
25084 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25085 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25086 mask = gen_reg_rtx (mode);
25087 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25091 mask = gen_rtx_NOT (mode, mask);
25092 emit_insn (gen_rtx_SET (VOIDmode, sgn,
25093 gen_rtx_AND (mode, mask, sign)));
25094 emit_insn (gen_rtx_SET (VOIDmode, result,
25095 gen_rtx_IOR (mode, abs_value, sgn)));
25098 /* Expand fabs (OP0) and return a new rtx that holds the result. The
25099 mask for masking out the sign-bit is stored in *SMASK, if that is
25102 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25104 enum machine_mode mode = GET_MODE (op0);
25107 xa = gen_reg_rtx (mode);
25108 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25109 if (!VECTOR_MODE_P (mode))
25111 /* We need to generate a scalar mode mask in this case. */
25112 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25113 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25114 mask = gen_reg_rtx (mode);
25115 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25117 emit_insn (gen_rtx_SET (VOIDmode, xa,
25118 gen_rtx_AND (mode, op0, mask)));
25126 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25127 swapping the operands if SWAP_OPERANDS is true. The expanded
25128 code is a forward jump to a newly created label in case the
25129 comparison is true. The generated label rtx is returned. */
25131 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25132 bool swap_operands)
25143 label = gen_label_rtx ();
25144 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25145 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25146 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25147 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25148 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25149 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25150 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25151 JUMP_LABEL (tmp) = label;
25156 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25157 using comparison code CODE. Operands are swapped for the comparison if
25158 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25160 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25161 bool swap_operands)
25163 enum machine_mode mode = GET_MODE (op0);
25164 rtx mask = gen_reg_rtx (mode);
25173 if (mode == DFmode)
25174 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25175 gen_rtx_fmt_ee (code, mode, op0, op1)));
25177 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25178 gen_rtx_fmt_ee (code, mode, op0, op1)));
25183 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25184 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25186 ix86_gen_TWO52 (enum machine_mode mode)
25188 REAL_VALUE_TYPE TWO52r;
25191 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25192 TWO52 = const_double_from_real_value (TWO52r, mode);
25193 TWO52 = force_reg (mode, TWO52);
25198 /* Expand SSE sequence for computing lround from OP1 storing
25201 ix86_expand_lround (rtx op0, rtx op1)
25203 /* C code for the stuff we're doing below:
25204 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25207 enum machine_mode mode = GET_MODE (op1);
25208 const struct real_format *fmt;
25209 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25212 /* load nextafter (0.5, 0.0) */
25213 fmt = REAL_MODE_FORMAT (mode);
25214 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25215 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25217 /* adj = copysign (0.5, op1) */
25218 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25219 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25221 /* adj = op1 + adj */
25222 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25224 /* op0 = (imode)adj */
25225 expand_fix (op0, adj, 0);
25228 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25231 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25233 /* C code for the stuff we're doing below (for do_floor):
25235 xi -= (double)xi > op1 ? 1 : 0;
25238 enum machine_mode fmode = GET_MODE (op1);
25239 enum machine_mode imode = GET_MODE (op0);
25240 rtx ireg, freg, label, tmp;
25242 /* reg = (long)op1 */
25243 ireg = gen_reg_rtx (imode);
25244 expand_fix (ireg, op1, 0);
25246 /* freg = (double)reg */
25247 freg = gen_reg_rtx (fmode);
25248 expand_float (freg, ireg, 0);
25250 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25251 label = ix86_expand_sse_compare_and_jump (UNLE,
25252 freg, op1, !do_floor);
25253 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25254 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25255 emit_move_insn (ireg, tmp);
25257 emit_label (label);
25258 LABEL_NUSES (label) = 1;
25260 emit_move_insn (op0, ireg);
25263 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25264 result in OPERAND0. */
25266 ix86_expand_rint (rtx operand0, rtx operand1)
25268 /* C code for the stuff we're doing below:
25269 xa = fabs (operand1);
25270 if (!isless (xa, 2**52))
25272 xa = xa + 2**52 - 2**52;
25273 return copysign (xa, operand1);
25275 enum machine_mode mode = GET_MODE (operand0);
25276 rtx res, xa, label, TWO52, mask;
25278 res = gen_reg_rtx (mode);
25279 emit_move_insn (res, operand1);
25281 /* xa = abs (operand1) */
25282 xa = ix86_expand_sse_fabs (res, &mask);
25284 /* if (!isless (xa, TWO52)) goto label; */
25285 TWO52 = ix86_gen_TWO52 (mode);
25286 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25288 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25289 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25291 ix86_sse_copysign_to_positive (res, xa, res, mask);
25293 emit_label (label);
25294 LABEL_NUSES (label) = 1;
25296 emit_move_insn (operand0, res);
25299 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25302 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25304 /* C code for the stuff we expand below.
25305 double xa = fabs (x), x2;
25306 if (!isless (xa, TWO52))
25308 xa = xa + TWO52 - TWO52;
25309 x2 = copysign (xa, x);
25318 enum machine_mode mode = GET_MODE (operand0);
25319 rtx xa, TWO52, tmp, label, one, res, mask;
25321 TWO52 = ix86_gen_TWO52 (mode);
25323 /* Temporary for holding the result, initialized to the input
25324 operand to ease control flow. */
25325 res = gen_reg_rtx (mode);
25326 emit_move_insn (res, operand1);
25328 /* xa = abs (operand1) */
25329 xa = ix86_expand_sse_fabs (res, &mask);
25331 /* if (!isless (xa, TWO52)) goto label; */
25332 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25334 /* xa = xa + TWO52 - TWO52; */
25335 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25336 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25338 /* xa = copysign (xa, operand1) */
25339 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25341 /* generate 1.0 or -1.0 */
25342 one = force_reg (mode,
25343 const_double_from_real_value (do_floor
25344 ? dconst1 : dconstm1, mode));
25346 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25347 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25348 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25349 gen_rtx_AND (mode, one, tmp)));
25350 /* We always need to subtract here to preserve signed zero. */
25351 tmp = expand_simple_binop (mode, MINUS,
25352 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25353 emit_move_insn (res, tmp);
25355 emit_label (label);
25356 LABEL_NUSES (label) = 1;
25358 emit_move_insn (operand0, res);
25361 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25364 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25366 /* C code for the stuff we expand below.
25367 double xa = fabs (x), x2;
25368 if (!isless (xa, TWO52))
25370 x2 = (double)(long)x;
25377 if (HONOR_SIGNED_ZEROS (mode))
25378 return copysign (x2, x);
25381 enum machine_mode mode = GET_MODE (operand0);
25382 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25384 TWO52 = ix86_gen_TWO52 (mode);
25386 /* Temporary for holding the result, initialized to the input
25387 operand to ease control flow. */
25388 res = gen_reg_rtx (mode);
25389 emit_move_insn (res, operand1);
25391 /* xa = abs (operand1) */
25392 xa = ix86_expand_sse_fabs (res, &mask);
25394 /* if (!isless (xa, TWO52)) goto label; */
25395 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25397 /* xa = (double)(long)x */
25398 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25399 expand_fix (xi, res, 0);
25400 expand_float (xa, xi, 0);
25403 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25405 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25406 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25407 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25408 gen_rtx_AND (mode, one, tmp)));
25409 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25410 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25411 emit_move_insn (res, tmp);
25413 if (HONOR_SIGNED_ZEROS (mode))
25414 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25416 emit_label (label);
25417 LABEL_NUSES (label) = 1;
25419 emit_move_insn (operand0, res);
25422 /* Expand SSE sequence for computing round from OPERAND1 storing
25423 into OPERAND0. Sequence that works without relying on DImode truncation
25424 via cvttsd2siq that is only available on 64bit targets. */
25426 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25428 /* C code for the stuff we expand below.
25429 double xa = fabs (x), xa2, x2;
25430 if (!isless (xa, TWO52))
25432 Using the absolute value and copying back sign makes
25433 -0.0 -> -0.0 correct.
25434 xa2 = xa + TWO52 - TWO52;
25439 else if (dxa > 0.5)
25441 x2 = copysign (xa2, x);
25444 enum machine_mode mode = GET_MODE (operand0);
25445 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25447 TWO52 = ix86_gen_TWO52 (mode);
25449 /* Temporary for holding the result, initialized to the input
25450 operand to ease control flow. */
25451 res = gen_reg_rtx (mode);
25452 emit_move_insn (res, operand1);
25454 /* xa = abs (operand1) */
25455 xa = ix86_expand_sse_fabs (res, &mask);
25457 /* if (!isless (xa, TWO52)) goto label; */
25458 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25460 /* xa2 = xa + TWO52 - TWO52; */
25461 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25462 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25464 /* dxa = xa2 - xa; */
25465 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25467 /* generate 0.5, 1.0 and -0.5 */
25468 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25469 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25470 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25474 tmp = gen_reg_rtx (mode);
25475 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25476 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25477 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25478 gen_rtx_AND (mode, one, tmp)));
25479 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25480 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25481 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25482 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25483 gen_rtx_AND (mode, one, tmp)));
25484 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25486 /* res = copysign (xa2, operand1) */
25487 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25489 emit_label (label);
25490 LABEL_NUSES (label) = 1;
25492 emit_move_insn (operand0, res);
25495 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25498 ix86_expand_trunc (rtx operand0, rtx operand1)
25500 /* C code for SSE variant we expand below.
25501 double xa = fabs (x), x2;
25502 if (!isless (xa, TWO52))
25504 x2 = (double)(long)x;
25505 if (HONOR_SIGNED_ZEROS (mode))
25506 return copysign (x2, x);
25509 enum machine_mode mode = GET_MODE (operand0);
25510 rtx xa, xi, TWO52, label, res, mask;
25512 TWO52 = ix86_gen_TWO52 (mode);
25514 /* Temporary for holding the result, initialized to the input
25515 operand to ease control flow. */
25516 res = gen_reg_rtx (mode);
25517 emit_move_insn (res, operand1);
25519 /* xa = abs (operand1) */
25520 xa = ix86_expand_sse_fabs (res, &mask);
25522 /* if (!isless (xa, TWO52)) goto label; */
25523 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25525 /* x = (double)(long)x */
25526 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25527 expand_fix (xi, res, 0);
25528 expand_float (res, xi, 0);
25530 if (HONOR_SIGNED_ZEROS (mode))
25531 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25533 emit_label (label);
25534 LABEL_NUSES (label) = 1;
25536 emit_move_insn (operand0, res);
25539 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25542 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25544 enum machine_mode mode = GET_MODE (operand0);
25545 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25547 /* C code for SSE variant we expand below.
25548 double xa = fabs (x), x2;
25549 if (!isless (xa, TWO52))
25551 xa2 = xa + TWO52 - TWO52;
25555 x2 = copysign (xa2, x);
25559 TWO52 = ix86_gen_TWO52 (mode);
25561 /* Temporary for holding the result, initialized to the input
25562 operand to ease control flow. */
25563 res = gen_reg_rtx (mode);
25564 emit_move_insn (res, operand1);
25566 /* xa = abs (operand1) */
25567 xa = ix86_expand_sse_fabs (res, &smask);
25569 /* if (!isless (xa, TWO52)) goto label; */
25570 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25572 /* res = xa + TWO52 - TWO52; */
25573 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25574 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25575 emit_move_insn (res, tmp);
25578 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25580 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25581 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25582 emit_insn (gen_rtx_SET (VOIDmode, mask,
25583 gen_rtx_AND (mode, mask, one)));
25584 tmp = expand_simple_binop (mode, MINUS,
25585 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25586 emit_move_insn (res, tmp);
25588 /* res = copysign (res, operand1) */
25589 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25591 emit_label (label);
25592 LABEL_NUSES (label) = 1;
25594 emit_move_insn (operand0, res);
25597 /* Expand SSE sequence for computing round from OPERAND1 storing
25600 ix86_expand_round (rtx operand0, rtx operand1)
25602 /* C code for the stuff we're doing below:
25603 double xa = fabs (x);
25604 if (!isless (xa, TWO52))
25606 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25607 return copysign (xa, x);
25609 enum machine_mode mode = GET_MODE (operand0);
25610 rtx res, TWO52, xa, label, xi, half, mask;
25611 const struct real_format *fmt;
25612 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25614 /* Temporary for holding the result, initialized to the input
25615 operand to ease control flow. */
25616 res = gen_reg_rtx (mode);
25617 emit_move_insn (res, operand1);
25619 TWO52 = ix86_gen_TWO52 (mode);
25620 xa = ix86_expand_sse_fabs (res, &mask);
25621 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25623 /* load nextafter (0.5, 0.0) */
25624 fmt = REAL_MODE_FORMAT (mode);
25625 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25626 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25628 /* xa = xa + 0.5 */
25629 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25630 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25632 /* xa = (double)(int64_t)xa */
25633 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25634 expand_fix (xi, xa, 0);
25635 expand_float (xa, xi, 0);
25637 /* res = copysign (xa, operand1) */
25638 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25640 emit_label (label);
25641 LABEL_NUSES (label) = 1;
25643 emit_move_insn (operand0, res);
25647 /* Validate whether a SSE5 instruction is valid or not.
25648 OPERANDS is the array of operands.
25649 NUM is the number of operands.
25650 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25651 NUM_MEMORY is the maximum number of memory operands to accept. */
25654 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25655 bool uses_oc0, int num_memory)
25661 /* Count the number of memory arguments */
25664 for (i = 0; i < num; i++)
25666 enum machine_mode mode = GET_MODE (operands[i]);
25667 if (register_operand (operands[i], mode))
25670 else if (memory_operand (operands[i], mode))
25672 mem_mask |= (1 << i);
25678 rtx pattern = PATTERN (insn);
25680 /* allow 0 for pcmov */
25681 if (GET_CODE (pattern) != SET
25682 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25684 || operands[i] != CONST0_RTX (mode))
25689 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25690 a memory operation. */
25691 if (num_memory < 0)
25693 num_memory = -num_memory;
25694 if ((mem_mask & (1 << (num-1))) != 0)
25696 mem_mask &= ~(1 << (num-1));
25701 /* If there were no memory operations, allow the insn */
25705 /* Do not allow the destination register to be a memory operand. */
25706 else if (mem_mask & (1 << 0))
25709 /* If there are too many memory operations, disallow the instruction. While
25710 the hardware only allows 1 memory reference, before register allocation
25711 for some insns, we allow two memory operations sometimes in order to allow
25712 code like the following to be optimized:
25714 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25716 or similar cases that are vectorized into using the fmaddss
25718 else if (mem_count > num_memory)
25721 /* Don't allow more than one memory operation if not optimizing. */
25722 else if (mem_count > 1 && !optimize)
25725 else if (num == 4 && mem_count == 1)
25727 /* formats (destination is the first argument), example fmaddss:
25728 xmm1, xmm1, xmm2, xmm3/mem
25729 xmm1, xmm1, xmm2/mem, xmm3
25730 xmm1, xmm2, xmm3/mem, xmm1
25731 xmm1, xmm2/mem, xmm3, xmm1 */
25733 return ((mem_mask == (1 << 1))
25734 || (mem_mask == (1 << 2))
25735 || (mem_mask == (1 << 3)));
25737 /* format, example pmacsdd:
25738 xmm1, xmm2, xmm3/mem, xmm1 */
25740 return (mem_mask == (1 << 2));
25743 else if (num == 4 && num_memory == 2)
25745 /* If there are two memory operations, we can load one of the memory ops
25746 into the destination register. This is for optimizing the
25747 multiply/add ops, which the combiner has optimized both the multiply
25748 and the add insns to have a memory operation. We have to be careful
25749 that the destination doesn't overlap with the inputs. */
25750 rtx op0 = operands[0];
25752 if (reg_mentioned_p (op0, operands[1])
25753 || reg_mentioned_p (op0, operands[2])
25754 || reg_mentioned_p (op0, operands[3]))
25757 /* formats (destination is the first argument), example fmaddss:
25758 xmm1, xmm1, xmm2, xmm3/mem
25759 xmm1, xmm1, xmm2/mem, xmm3
25760 xmm1, xmm2, xmm3/mem, xmm1
25761 xmm1, xmm2/mem, xmm3, xmm1
25763 For the oc0 case, we will load either operands[1] or operands[3] into
25764 operands[0], so any combination of 2 memory operands is ok. */
25768 /* format, example pmacsdd:
25769 xmm1, xmm2, xmm3/mem, xmm1
25771 For the integer multiply/add instructions be more restrictive and
25772 require operands[2] and operands[3] to be the memory operands. */
25774 return (mem_mask == ((1 << 2) | (1 << 3)));
25777 else if (num == 3 && num_memory == 1)
25779 /* formats, example protb:
25780 xmm1, xmm2, xmm3/mem
25781 xmm1, xmm2/mem, xmm3 */
25783 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25785 /* format, example comeq:
25786 xmm1, xmm2, xmm3/mem */
25788 return (mem_mask == (1 << 2));
25792 gcc_unreachable ();
25798 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25799 hardware will allow by using the destination register to load one of the
25800 memory operations. Presently this is used by the multiply/add routines to
25801 allow 2 memory references. */
25804 ix86_expand_sse5_multiple_memory (rtx operands[],
25806 enum machine_mode mode)
25808 rtx op0 = operands[0];
25810 || memory_operand (op0, mode)
25811 || reg_mentioned_p (op0, operands[1])
25812 || reg_mentioned_p (op0, operands[2])
25813 || reg_mentioned_p (op0, operands[3]))
25814 gcc_unreachable ();
25816 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25817 the destination register. */
25818 if (memory_operand (operands[1], mode))
25820 emit_move_insn (op0, operands[1]);
25823 else if (memory_operand (operands[3], mode))
25825 emit_move_insn (op0, operands[3]);
25829 gcc_unreachable ();
25835 /* Table of valid machine attributes. */
25836 static const struct attribute_spec ix86_attribute_table[] =
25838 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25839 /* Stdcall attribute says callee is responsible for popping arguments
25840 if they are not variable. */
25841 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25842 /* Fastcall attribute says callee is responsible for popping arguments
25843 if they are not variable. */
25844 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25845 /* Cdecl attribute says the callee is a normal C declaration */
25846 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25847 /* Regparm attribute specifies how many integer arguments are to be
25848 passed in registers. */
25849 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25850 /* Sseregparm attribute says we are using x86_64 calling conventions
25851 for FP arguments. */
25852 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25853 /* force_align_arg_pointer says this function realigns the stack at entry. */
25854 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25855 false, true, true, ix86_handle_cconv_attribute },
25856 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25857 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25858 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25859 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25861 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25862 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25863 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25864 SUBTARGET_ATTRIBUTE_TABLE,
25866 { NULL, 0, 0, false, false, false, NULL }
25869 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25871 x86_builtin_vectorization_cost (bool runtime_test)
25873 /* If the branch of the runtime test is taken - i.e. - the vectorized
25874 version is skipped - this incurs a misprediction cost (because the
25875 vectorized version is expected to be the fall-through). So we subtract
25876 the latency of a mispredicted branch from the costs that are incured
25877 when the vectorized version is executed.
25879 TODO: The values in individual target tables have to be tuned or new
25880 fields may be needed. For eg. on K8, the default branch path is the
25881 not-taken path. If the taken path is predicted correctly, the minimum
25882 penalty of going down the taken-path is 1 cycle. If the taken-path is
25883 not predicted correctly, then the minimum penalty is 10 cycles. */
25887 return (-(ix86_cost->cond_taken_branch_cost));
25893 /* Initialize the GCC target structure. */
25894 #undef TARGET_RETURN_IN_MEMORY
25895 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25897 #undef TARGET_ATTRIBUTE_TABLE
25898 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25899 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25900 # undef TARGET_MERGE_DECL_ATTRIBUTES
25901 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25904 #undef TARGET_COMP_TYPE_ATTRIBUTES
25905 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25907 #undef TARGET_INIT_BUILTINS
25908 #define TARGET_INIT_BUILTINS ix86_init_builtins
25909 #undef TARGET_EXPAND_BUILTIN
25910 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25912 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25913 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25914 ix86_builtin_vectorized_function
25916 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25917 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25919 #undef TARGET_BUILTIN_RECIPROCAL
25920 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25922 #undef TARGET_ASM_FUNCTION_EPILOGUE
25923 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25925 #undef TARGET_ENCODE_SECTION_INFO
25926 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25927 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25929 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25932 #undef TARGET_ASM_OPEN_PAREN
25933 #define TARGET_ASM_OPEN_PAREN ""
25934 #undef TARGET_ASM_CLOSE_PAREN
25935 #define TARGET_ASM_CLOSE_PAREN ""
25937 #undef TARGET_ASM_ALIGNED_HI_OP
25938 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25939 #undef TARGET_ASM_ALIGNED_SI_OP
25940 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25942 #undef TARGET_ASM_ALIGNED_DI_OP
25943 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25946 #undef TARGET_ASM_UNALIGNED_HI_OP
25947 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25948 #undef TARGET_ASM_UNALIGNED_SI_OP
25949 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25950 #undef TARGET_ASM_UNALIGNED_DI_OP
25951 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25953 #undef TARGET_SCHED_ADJUST_COST
25954 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25955 #undef TARGET_SCHED_ISSUE_RATE
25956 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25957 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25958 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25959 ia32_multipass_dfa_lookahead
25961 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25962 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25965 #undef TARGET_HAVE_TLS
25966 #define TARGET_HAVE_TLS true
25968 #undef TARGET_CANNOT_FORCE_CONST_MEM
25969 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25970 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25971 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25973 #undef TARGET_DELEGITIMIZE_ADDRESS
25974 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25976 #undef TARGET_MS_BITFIELD_LAYOUT_P
25977 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25980 #undef TARGET_BINDS_LOCAL_P
25981 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25983 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25984 #undef TARGET_BINDS_LOCAL_P
25985 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25988 #undef TARGET_ASM_OUTPUT_MI_THUNK
25989 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25990 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25991 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25993 #undef TARGET_ASM_FILE_START
25994 #define TARGET_ASM_FILE_START x86_file_start
25996 #undef TARGET_DEFAULT_TARGET_FLAGS
25997 #define TARGET_DEFAULT_TARGET_FLAGS \
25999 | TARGET_SUBTARGET_DEFAULT \
26000 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26002 #undef TARGET_HANDLE_OPTION
26003 #define TARGET_HANDLE_OPTION ix86_handle_option
26005 #undef TARGET_RTX_COSTS
26006 #define TARGET_RTX_COSTS ix86_rtx_costs
26007 #undef TARGET_ADDRESS_COST
26008 #define TARGET_ADDRESS_COST ix86_address_cost
26010 #undef TARGET_FIXED_CONDITION_CODE_REGS
26011 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26012 #undef TARGET_CC_MODES_COMPATIBLE
26013 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26015 #undef TARGET_MACHINE_DEPENDENT_REORG
26016 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26018 #undef TARGET_BUILD_BUILTIN_VA_LIST
26019 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26021 #undef TARGET_EXPAND_BUILTIN_VA_START
26022 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26024 #undef TARGET_MD_ASM_CLOBBERS
26025 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26027 #undef TARGET_PROMOTE_PROTOTYPES
26028 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26029 #undef TARGET_STRUCT_VALUE_RTX
26030 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26031 #undef TARGET_SETUP_INCOMING_VARARGS
26032 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26033 #undef TARGET_MUST_PASS_IN_STACK
26034 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26035 #undef TARGET_PASS_BY_REFERENCE
26036 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26037 #undef TARGET_INTERNAL_ARG_POINTER
26038 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26039 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26040 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
26041 #undef TARGET_STRICT_ARGUMENT_NAMING
26042 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26044 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26045 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26047 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26048 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26050 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26051 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26053 #undef TARGET_C_MODE_FOR_SUFFIX
26054 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26057 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26058 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26061 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26062 #undef TARGET_INSERT_ATTRIBUTES
26063 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26066 #undef TARGET_MANGLE_TYPE
26067 #define TARGET_MANGLE_TYPE ix86_mangle_type
26069 #undef TARGET_STACK_PROTECT_FAIL
26070 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26072 #undef TARGET_FUNCTION_VALUE
26073 #define TARGET_FUNCTION_VALUE ix86_function_value
26075 #undef TARGET_SECONDARY_RELOAD
26076 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26078 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26079 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26081 struct gcc_target targetm = TARGET_INITIALIZER;
26083 #include "gt-i386.h"