1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 /* Vectorization library interface and handlers. */
2017 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2018 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2019 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2021 /* Processor target table, indexed by processor number */
2024 const struct processor_costs *cost; /* Processor costs */
2025 const int align_loop; /* Default alignments. */
2026 const int align_loop_max_skip;
2027 const int align_jump;
2028 const int align_jump_max_skip;
2029 const int align_func;
2032 static const struct ptt processor_target_table[PROCESSOR_max] =
2034 {&i386_cost, 4, 3, 4, 3, 4},
2035 {&i486_cost, 16, 15, 16, 15, 16},
2036 {&pentium_cost, 16, 7, 16, 7, 16},
2037 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2038 {&geode_cost, 0, 0, 0, 0, 0},
2039 {&k6_cost, 32, 7, 32, 7, 32},
2040 {&athlon_cost, 16, 7, 16, 7, 16},
2041 {&pentium4_cost, 0, 0, 0, 0, 0},
2042 {&k8_cost, 16, 7, 16, 7, 16},
2043 {&nocona_cost, 0, 0, 0, 0, 0},
2044 {&core2_cost, 16, 10, 16, 10, 16},
2045 {&generic32_cost, 16, 7, 16, 7, 16},
2046 {&generic64_cost, 16, 10, 16, 10, 16},
2047 {&amdfam10_cost, 32, 24, 32, 7, 32},
2048 {&atom_cost, 16, 7, 16, 7, 16}
2051 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2077 /* Implement TARGET_HANDLE_OPTION. */
2080 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2087 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2116 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2121 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2122 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2129 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2134 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2135 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2168 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2181 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2194 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2207 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2218 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2219 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2223 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2230 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2235 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2236 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2243 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2248 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2249 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2349 /* Return a string the documents the current -m options. The caller is
2350 responsible for freeing the string. */
2353 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2354 const char *fpmath, bool add_nl_p)
2356 struct ix86_target_opts
2358 const char *option; /* option string */
2359 int mask; /* isa mask options */
2362 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2363 preceding options while match those first. */
2364 static struct ix86_target_opts isa_opts[] =
2366 { "-m64", OPTION_MASK_ISA_64BIT },
2367 { "-msse5", OPTION_MASK_ISA_SSE5 },
2368 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2369 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2370 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2371 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2372 { "-msse3", OPTION_MASK_ISA_SSE3 },
2373 { "-msse2", OPTION_MASK_ISA_SSE2 },
2374 { "-msse", OPTION_MASK_ISA_SSE },
2375 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2376 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2377 { "-mmmx", OPTION_MASK_ISA_MMX },
2378 { "-mabm", OPTION_MASK_ISA_ABM },
2379 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2380 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2381 { "-maes", OPTION_MASK_ISA_AES },
2382 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2386 static struct ix86_target_opts flag_opts[] =
2388 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2389 { "-m80387", MASK_80387 },
2390 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2391 { "-malign-double", MASK_ALIGN_DOUBLE },
2392 { "-mcld", MASK_CLD },
2393 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2394 { "-mieee-fp", MASK_IEEE_FP },
2395 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2396 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2397 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2398 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2399 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2400 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2401 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2402 { "-mno-red-zone", MASK_NO_RED_ZONE },
2403 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2404 { "-mrecip", MASK_RECIP },
2405 { "-mrtd", MASK_RTD },
2406 { "-msseregparm", MASK_SSEREGPARM },
2407 { "-mstack-arg-probe", MASK_STACK_PROBE },
2408 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2411 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2414 char target_other[40];
2423 memset (opts, '\0', sizeof (opts));
2425 /* Add -march= option. */
2428 opts[num][0] = "-march=";
2429 opts[num++][1] = arch;
2432 /* Add -mtune= option. */
2435 opts[num][0] = "-mtune=";
2436 opts[num++][1] = tune;
2439 /* Pick out the options in isa options. */
2440 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2442 if ((isa & isa_opts[i].mask) != 0)
2444 opts[num++][0] = isa_opts[i].option;
2445 isa &= ~ isa_opts[i].mask;
2449 if (isa && add_nl_p)
2451 opts[num++][0] = isa_other;
2452 sprintf (isa_other, "(other isa: 0x%x)", isa);
2455 /* Add flag options. */
2456 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2458 if ((flags & flag_opts[i].mask) != 0)
2460 opts[num++][0] = flag_opts[i].option;
2461 flags &= ~ flag_opts[i].mask;
2465 if (flags && add_nl_p)
2467 opts[num++][0] = target_other;
2468 sprintf (target_other, "(other flags: 0x%x)", isa);
2471 /* Add -fpmath= option. */
2474 opts[num][0] = "-mfpmath=";
2475 opts[num++][1] = fpmath;
2482 gcc_assert (num < ARRAY_SIZE (opts));
2484 /* Size the string. */
2486 sep_len = (add_nl_p) ? 3 : 1;
2487 for (i = 0; i < num; i++)
2490 for (j = 0; j < 2; j++)
2492 len += strlen (opts[i][j]);
2495 /* Build the string. */
2496 ret = ptr = (char *) xmalloc (len);
2499 for (i = 0; i < num; i++)
2503 for (j = 0; j < 2; j++)
2504 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2511 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2519 for (j = 0; j < 2; j++)
2522 memcpy (ptr, opts[i][j], len2[j]);
2524 line_len += len2[j];
2529 gcc_assert (ret + len >= ptr);
2534 /* Function that is callable from the debugger to print the current
2537 ix86_debug_options (void)
2539 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2540 ix86_arch_string, ix86_tune_string,
2541 ix86_fpmath_string, true);
2545 fprintf (stderr, "%s\n\n", opts);
2549 fprintf (stderr, "<no options>\n\n");
2554 /* Sometimes certain combinations of command options do not make
2555 sense on a particular target machine. You can define a macro
2556 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2557 defined, is executed once just after all the command options have
2560 Don't use this macro to turn on various extra optimizations for
2561 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2564 override_options (bool main_args_p)
2567 unsigned int ix86_arch_mask, ix86_tune_mask;
2572 /* Comes from final.c -- no real reason to change it. */
2573 #define MAX_CODE_ALIGN 16
2581 PTA_PREFETCH_SSE = 1 << 4,
2583 PTA_3DNOW_A = 1 << 6,
2587 PTA_POPCNT = 1 << 10,
2589 PTA_SSE4A = 1 << 12,
2590 PTA_NO_SAHF = 1 << 13,
2591 PTA_SSE4_1 = 1 << 14,
2592 PTA_SSE4_2 = 1 << 15,
2595 PTA_PCLMUL = 1 << 18,
2603 const char *const name; /* processor name or nickname. */
2604 const enum processor_type processor;
2605 const enum attr_cpu schedule;
2606 const unsigned /*enum pta_flags*/ flags;
2608 const processor_alias_table[] =
2610 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2611 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2612 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2613 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2614 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2615 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2616 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2617 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2618 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2619 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2620 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2621 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2622 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2624 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2626 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2627 PTA_MMX | PTA_SSE | PTA_SSE2},
2628 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2629 PTA_MMX |PTA_SSE | PTA_SSE2},
2630 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2631 PTA_MMX | PTA_SSE | PTA_SSE2},
2632 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2633 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2634 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2635 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2636 | PTA_CX16 | PTA_NO_SAHF},
2637 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2638 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2639 | PTA_SSSE3 | PTA_CX16},
2640 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2642 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2643 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2644 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2645 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2646 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2647 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2648 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2649 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2650 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2651 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2652 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2653 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2654 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2655 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2656 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2657 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2658 {"x86-64", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2660 {"k8", PROCESSOR_K8, CPU_K8,
2661 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2662 | PTA_SSE2 | PTA_NO_SAHF},
2663 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2664 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2665 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2666 {"opteron", PROCESSOR_K8, CPU_K8,
2667 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2668 | PTA_SSE2 | PTA_NO_SAHF},
2669 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2670 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2671 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2672 {"athlon64", PROCESSOR_K8, CPU_K8,
2673 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2674 | PTA_SSE2 | PTA_NO_SAHF},
2675 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2676 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2677 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2678 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2679 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2680 | PTA_SSE2 | PTA_NO_SAHF},
2681 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2682 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2683 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2684 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2685 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2686 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2687 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2688 0 /* flags are only used for -march switch. */ },
2689 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2690 PTA_64BIT /* flags are only used for -march switch. */ },
2693 int const pta_size = ARRAY_SIZE (processor_alias_table);
2695 /* Set up prefix/suffix so the error messages refer to either the command
2696 line argument, or the attribute(target). */
2705 prefix = "option(\"";
2710 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2711 SUBTARGET_OVERRIDE_OPTIONS;
2714 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2715 SUBSUBTARGET_OVERRIDE_OPTIONS;
2718 /* -fPIC is the default for x86_64. */
2719 if (TARGET_MACHO && TARGET_64BIT)
2722 /* Set the default values for switches whose default depends on TARGET_64BIT
2723 in case they weren't overwritten by command line options. */
2726 /* Mach-O doesn't support omitting the frame pointer for now. */
2727 if (flag_omit_frame_pointer == 2)
2728 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2729 if (flag_asynchronous_unwind_tables == 2)
2730 flag_asynchronous_unwind_tables = 1;
2731 if (flag_pcc_struct_return == 2)
2732 flag_pcc_struct_return = 0;
2736 if (flag_omit_frame_pointer == 2)
2737 flag_omit_frame_pointer = 0;
2738 if (flag_asynchronous_unwind_tables == 2)
2739 flag_asynchronous_unwind_tables = 0;
2740 if (flag_pcc_struct_return == 2)
2741 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2744 /* Need to check -mtune=generic first. */
2745 if (ix86_tune_string)
2747 if (!strcmp (ix86_tune_string, "generic")
2748 || !strcmp (ix86_tune_string, "i686")
2749 /* As special support for cross compilers we read -mtune=native
2750 as -mtune=generic. With native compilers we won't see the
2751 -mtune=native, as it was changed by the driver. */
2752 || !strcmp (ix86_tune_string, "native"))
2755 ix86_tune_string = "generic64";
2757 ix86_tune_string = "generic32";
2759 /* If this call is for setting the option attribute, allow the
2760 generic32/generic64 that was previously set. */
2761 else if (!main_args_p
2762 && (!strcmp (ix86_tune_string, "generic32")
2763 || !strcmp (ix86_tune_string, "generic64")))
2765 else if (!strncmp (ix86_tune_string, "generic", 7))
2766 error ("bad value (%s) for %stune=%s %s",
2767 ix86_tune_string, prefix, suffix, sw);
2771 if (ix86_arch_string)
2772 ix86_tune_string = ix86_arch_string;
2773 if (!ix86_tune_string)
2775 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2776 ix86_tune_defaulted = 1;
2779 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2780 need to use a sensible tune option. */
2781 if (!strcmp (ix86_tune_string, "generic")
2782 || !strcmp (ix86_tune_string, "x86-64")
2783 || !strcmp (ix86_tune_string, "i686"))
2786 ix86_tune_string = "generic64";
2788 ix86_tune_string = "generic32";
2791 if (ix86_stringop_string)
2793 if (!strcmp (ix86_stringop_string, "rep_byte"))
2794 stringop_alg = rep_prefix_1_byte;
2795 else if (!strcmp (ix86_stringop_string, "libcall"))
2796 stringop_alg = libcall;
2797 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2798 stringop_alg = rep_prefix_4_byte;
2799 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2801 /* rep; movq isn't available in 32-bit code. */
2802 stringop_alg = rep_prefix_8_byte;
2803 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2804 stringop_alg = loop_1_byte;
2805 else if (!strcmp (ix86_stringop_string, "loop"))
2806 stringop_alg = loop;
2807 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2808 stringop_alg = unrolled_loop;
2810 error ("bad value (%s) for %sstringop-strategy=%s %s",
2811 ix86_stringop_string, prefix, suffix, sw);
2813 if (!strcmp (ix86_tune_string, "x86-64"))
2814 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2815 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2816 prefix, suffix, prefix, suffix, prefix, suffix);
2818 if (!ix86_arch_string)
2819 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2821 ix86_arch_specified = 1;
2823 if (!strcmp (ix86_arch_string, "generic"))
2824 error ("generic CPU can be used only for %stune=%s %s",
2825 prefix, suffix, sw);
2826 if (!strncmp (ix86_arch_string, "generic", 7))
2827 error ("bad value (%s) for %sarch=%s %s",
2828 ix86_arch_string, prefix, suffix, sw);
2830 /* Validate -mabi= value. */
2831 if (ix86_abi_string)
2833 if (strcmp (ix86_abi_string, "sysv") == 0)
2834 ix86_abi = SYSV_ABI;
2835 else if (strcmp (ix86_abi_string, "ms") == 0)
2838 error ("unknown ABI (%s) for %sabi=%s %s",
2839 ix86_abi_string, prefix, suffix, sw);
2842 ix86_abi = DEFAULT_ABI;
2844 if (ix86_cmodel_string != 0)
2846 if (!strcmp (ix86_cmodel_string, "small"))
2847 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2848 else if (!strcmp (ix86_cmodel_string, "medium"))
2849 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2850 else if (!strcmp (ix86_cmodel_string, "large"))
2851 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2853 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2854 else if (!strcmp (ix86_cmodel_string, "32"))
2855 ix86_cmodel = CM_32;
2856 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2857 ix86_cmodel = CM_KERNEL;
2859 error ("bad value (%s) for %scmodel=%s %s",
2860 ix86_cmodel_string, prefix, suffix, sw);
2864 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2865 use of rip-relative addressing. This eliminates fixups that
2866 would otherwise be needed if this object is to be placed in a
2867 DLL, and is essentially just as efficient as direct addressing. */
2868 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2869 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2870 else if (TARGET_64BIT)
2871 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2873 ix86_cmodel = CM_32;
2875 if (ix86_asm_string != 0)
2878 && !strcmp (ix86_asm_string, "intel"))
2879 ix86_asm_dialect = ASM_INTEL;
2880 else if (!strcmp (ix86_asm_string, "att"))
2881 ix86_asm_dialect = ASM_ATT;
2883 error ("bad value (%s) for %sasm=%s %s",
2884 ix86_asm_string, prefix, suffix, sw);
2886 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2887 error ("code model %qs not supported in the %s bit mode",
2888 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2889 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2890 sorry ("%i-bit mode not compiled in",
2891 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2893 for (i = 0; i < pta_size; i++)
2894 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2896 ix86_schedule = processor_alias_table[i].schedule;
2897 ix86_arch = processor_alias_table[i].processor;
2898 /* Default cpu tuning to the architecture. */
2899 ix86_tune = ix86_arch;
2901 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2902 error ("CPU you selected does not support x86-64 "
2905 if (processor_alias_table[i].flags & PTA_MMX
2906 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2907 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2908 if (processor_alias_table[i].flags & PTA_3DNOW
2909 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2910 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2911 if (processor_alias_table[i].flags & PTA_3DNOW_A
2912 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2913 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2914 if (processor_alias_table[i].flags & PTA_SSE
2915 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2916 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2917 if (processor_alias_table[i].flags & PTA_SSE2
2918 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2919 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2920 if (processor_alias_table[i].flags & PTA_SSE3
2921 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2922 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2923 if (processor_alias_table[i].flags & PTA_SSSE3
2924 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2925 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2926 if (processor_alias_table[i].flags & PTA_SSE4_1
2927 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2928 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2929 if (processor_alias_table[i].flags & PTA_SSE4_2
2930 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2931 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2932 if (processor_alias_table[i].flags & PTA_AVX
2933 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2934 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2935 if (processor_alias_table[i].flags & PTA_FMA
2936 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2937 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2938 if (processor_alias_table[i].flags & PTA_SSE4A
2939 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2940 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2941 if (processor_alias_table[i].flags & PTA_SSE5
2942 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2943 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2944 if (processor_alias_table[i].flags & PTA_ABM
2945 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2946 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2947 if (processor_alias_table[i].flags & PTA_CX16
2948 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2949 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2950 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2951 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2952 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2953 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2954 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2955 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2956 if (processor_alias_table[i].flags & PTA_MOVBE
2957 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2958 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2959 if (processor_alias_table[i].flags & PTA_AES
2960 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2961 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2962 if (processor_alias_table[i].flags & PTA_PCLMUL
2963 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2964 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2965 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2966 x86_prefetch_sse = true;
2972 error ("bad value (%s) for %sarch=%s %s",
2973 ix86_arch_string, prefix, suffix, sw);
2975 ix86_arch_mask = 1u << ix86_arch;
2976 for (i = 0; i < X86_ARCH_LAST; ++i)
2977 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2979 for (i = 0; i < pta_size; i++)
2980 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2982 ix86_schedule = processor_alias_table[i].schedule;
2983 ix86_tune = processor_alias_table[i].processor;
2984 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2986 if (ix86_tune_defaulted)
2988 ix86_tune_string = "x86-64";
2989 for (i = 0; i < pta_size; i++)
2990 if (! strcmp (ix86_tune_string,
2991 processor_alias_table[i].name))
2993 ix86_schedule = processor_alias_table[i].schedule;
2994 ix86_tune = processor_alias_table[i].processor;
2997 error ("CPU you selected does not support x86-64 "
3000 /* Intel CPUs have always interpreted SSE prefetch instructions as
3001 NOPs; so, we can enable SSE prefetch instructions even when
3002 -mtune (rather than -march) points us to a processor that has them.
3003 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3004 higher processors. */
3006 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3007 x86_prefetch_sse = true;
3011 error ("bad value (%s) for %stune=%s %s",
3012 ix86_tune_string, prefix, suffix, sw);
3014 ix86_tune_mask = 1u << ix86_tune;
3015 for (i = 0; i < X86_TUNE_LAST; ++i)
3016 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3019 ix86_cost = &ix86_size_cost;
3021 ix86_cost = processor_target_table[ix86_tune].cost;
3023 /* Arrange to set up i386_stack_locals for all functions. */
3024 init_machine_status = ix86_init_machine_status;
3026 /* Validate -mregparm= value. */
3027 if (ix86_regparm_string)
3030 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3031 i = atoi (ix86_regparm_string);
3032 if (i < 0 || i > REGPARM_MAX)
3033 error ("%sregparm=%d%s is not between 0 and %d",
3034 prefix, i, suffix, REGPARM_MAX);
3039 ix86_regparm = REGPARM_MAX;
3041 /* If the user has provided any of the -malign-* options,
3042 warn and use that value only if -falign-* is not set.
3043 Remove this code in GCC 3.2 or later. */
3044 if (ix86_align_loops_string)
3046 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3047 prefix, suffix, suffix);
3048 if (align_loops == 0)
3050 i = atoi (ix86_align_loops_string);
3051 if (i < 0 || i > MAX_CODE_ALIGN)
3052 error ("%salign-loops=%d%s is not between 0 and %d",
3053 prefix, i, suffix, MAX_CODE_ALIGN);
3055 align_loops = 1 << i;
3059 if (ix86_align_jumps_string)
3061 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3062 prefix, suffix, suffix);
3063 if (align_jumps == 0)
3065 i = atoi (ix86_align_jumps_string);
3066 if (i < 0 || i > MAX_CODE_ALIGN)
3067 error ("%salign-loops=%d%s is not between 0 and %d",
3068 prefix, i, suffix, MAX_CODE_ALIGN);
3070 align_jumps = 1 << i;
3074 if (ix86_align_funcs_string)
3076 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3077 prefix, suffix, suffix);
3078 if (align_functions == 0)
3080 i = atoi (ix86_align_funcs_string);
3081 if (i < 0 || i > MAX_CODE_ALIGN)
3082 error ("%salign-loops=%d%s is not between 0 and %d",
3083 prefix, i, suffix, MAX_CODE_ALIGN);
3085 align_functions = 1 << i;
3089 /* Default align_* from the processor table. */
3090 if (align_loops == 0)
3092 align_loops = processor_target_table[ix86_tune].align_loop;
3093 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3095 if (align_jumps == 0)
3097 align_jumps = processor_target_table[ix86_tune].align_jump;
3098 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3100 if (align_functions == 0)
3102 align_functions = processor_target_table[ix86_tune].align_func;
3105 /* Validate -mbranch-cost= value, or provide default. */
3106 ix86_branch_cost = ix86_cost->branch_cost;
3107 if (ix86_branch_cost_string)
3109 i = atoi (ix86_branch_cost_string);
3111 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3113 ix86_branch_cost = i;
3115 if (ix86_section_threshold_string)
3117 i = atoi (ix86_section_threshold_string);
3119 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3121 ix86_section_threshold = i;
3124 if (ix86_tls_dialect_string)
3126 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3127 ix86_tls_dialect = TLS_DIALECT_GNU;
3128 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3129 ix86_tls_dialect = TLS_DIALECT_GNU2;
3130 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3131 ix86_tls_dialect = TLS_DIALECT_SUN;
3133 error ("bad value (%s) for %stls-dialect=%s %s",
3134 ix86_tls_dialect_string, prefix, suffix, sw);
3137 if (ix87_precision_string)
3139 i = atoi (ix87_precision_string);
3140 if (i != 32 && i != 64 && i != 80)
3141 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3146 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3148 /* Enable by default the SSE and MMX builtins. Do allow the user to
3149 explicitly disable any of these. In particular, disabling SSE and
3150 MMX for kernel code is extremely useful. */
3151 if (!ix86_arch_specified)
3153 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3154 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3157 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3161 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3163 if (!ix86_arch_specified)
3165 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3167 /* i386 ABI does not specify red zone. It still makes sense to use it
3168 when programmer takes care to stack from being destroyed. */
3169 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3170 target_flags |= MASK_NO_RED_ZONE;
3173 /* Keep nonleaf frame pointers. */
3174 if (flag_omit_frame_pointer)
3175 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3176 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3177 flag_omit_frame_pointer = 1;
3179 /* If we're doing fast math, we don't care about comparison order
3180 wrt NaNs. This lets us use a shorter comparison sequence. */
3181 if (flag_finite_math_only)
3182 target_flags &= ~MASK_IEEE_FP;
3184 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3185 since the insns won't need emulation. */
3186 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3187 target_flags &= ~MASK_NO_FANCY_MATH_387;
3189 /* Likewise, if the target doesn't have a 387, or we've specified
3190 software floating point, don't use 387 inline intrinsics. */
3192 target_flags |= MASK_NO_FANCY_MATH_387;
3194 /* Turn on MMX builtins for -msse. */
3197 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3198 x86_prefetch_sse = true;
3201 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3202 if (TARGET_SSE4_2 || TARGET_ABM)
3203 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3205 /* Validate -mpreferred-stack-boundary= value or default it to
3206 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3207 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3208 if (ix86_preferred_stack_boundary_string)
3210 i = atoi (ix86_preferred_stack_boundary_string);
3211 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3212 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3213 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3215 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 /* Set the default value for -mstackrealign. */
3219 if (ix86_force_align_arg_pointer == -1)
3220 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3222 /* Validate -mincoming-stack-boundary= value or default it to
3223 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3224 if (ix86_force_align_arg_pointer)
3225 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3227 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3228 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3229 if (ix86_incoming_stack_boundary_string)
3231 i = atoi (ix86_incoming_stack_boundary_string);
3232 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3233 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3234 i, TARGET_64BIT ? 4 : 2);
3237 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3238 ix86_incoming_stack_boundary
3239 = ix86_user_incoming_stack_boundary;
3243 /* Accept -msseregparm only if at least SSE support is enabled. */
3244 if (TARGET_SSEREGPARM
3246 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3248 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3249 if (ix86_fpmath_string != 0)
3251 if (! strcmp (ix86_fpmath_string, "387"))
3252 ix86_fpmath = FPMATH_387;
3253 else if (! strcmp (ix86_fpmath_string, "sse"))
3257 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3258 ix86_fpmath = FPMATH_387;
3261 ix86_fpmath = FPMATH_SSE;
3263 else if (! strcmp (ix86_fpmath_string, "387,sse")
3264 || ! strcmp (ix86_fpmath_string, "387+sse")
3265 || ! strcmp (ix86_fpmath_string, "sse,387")
3266 || ! strcmp (ix86_fpmath_string, "sse+387")
3267 || ! strcmp (ix86_fpmath_string, "both"))
3271 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3272 ix86_fpmath = FPMATH_387;
3274 else if (!TARGET_80387)
3276 warning (0, "387 instruction set disabled, using SSE arithmetics");
3277 ix86_fpmath = FPMATH_SSE;
3280 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3283 error ("bad value (%s) for %sfpmath=%s %s",
3284 ix86_fpmath_string, prefix, suffix, sw);
3287 /* If the i387 is disabled, then do not return values in it. */
3289 target_flags &= ~MASK_FLOAT_RETURNS;
3291 /* Use external vectorized library in vectorizing intrinsics. */
3292 if (ix86_veclibabi_string)
3294 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3295 ix86_veclib_handler = ix86_veclibabi_svml;
3296 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3297 ix86_veclib_handler = ix86_veclibabi_acml;
3299 error ("unknown vectorization library ABI type (%s) for "
3300 "%sveclibabi=%s %s", ix86_veclibabi_string,
3301 prefix, suffix, sw);
3304 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3305 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3307 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3309 /* ??? Unwind info is not correct around the CFG unless either a frame
3310 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3311 unwind info generation to be aware of the CFG and propagating states
3313 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3314 || flag_exceptions || flag_non_call_exceptions)
3315 && flag_omit_frame_pointer
3316 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3318 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3319 warning (0, "unwind tables currently require either a frame pointer "
3320 "or %saccumulate-outgoing-args%s for correctness",
3322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* If stack probes are required, the space used for large function
3326 arguments on the stack must also be probed, so enable
3327 -maccumulate-outgoing-args so this happens in the prologue. */
3328 if (TARGET_STACK_PROBE
3329 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3331 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3332 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3333 "for correctness", prefix, suffix);
3334 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3337 /* For sane SSE instruction set generation we need fcomi instruction.
3338 It is safe to enable all CMOVE instructions. */
3342 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3345 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3346 p = strchr (internal_label_prefix, 'X');
3347 internal_label_prefix_len = p - internal_label_prefix;
3351 /* When scheduling description is not available, disable scheduler pass
3352 so it won't slow down the compilation and make x87 code slower. */
3353 if (!TARGET_SCHEDULE)
3354 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3356 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3357 set_param_value ("simultaneous-prefetches",
3358 ix86_cost->simultaneous_prefetches);
3359 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3360 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3361 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3362 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3363 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3364 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3366 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3367 can be optimized to ap = __builtin_next_arg (0). */
3369 targetm.expand_builtin_va_start = NULL;
3373 ix86_gen_leave = gen_leave_rex64;
3374 ix86_gen_pop1 = gen_popdi1;
3375 ix86_gen_add3 = gen_adddi3;
3376 ix86_gen_sub3 = gen_subdi3;
3377 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3378 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3379 ix86_gen_monitor = gen_sse3_monitor64;
3380 ix86_gen_andsp = gen_anddi3;
3384 ix86_gen_leave = gen_leave;
3385 ix86_gen_pop1 = gen_popsi1;
3386 ix86_gen_add3 = gen_addsi3;
3387 ix86_gen_sub3 = gen_subsi3;
3388 ix86_gen_sub3_carry = gen_subsi3_carry;
3389 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3390 ix86_gen_monitor = gen_sse3_monitor;
3391 ix86_gen_andsp = gen_andsi3;
3395 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3397 target_flags |= MASK_CLD & ~target_flags_explicit;
3400 /* Save the initial options in case the user does function specific options */
3402 target_option_default_node = target_option_current_node
3403 = build_target_option_node ();
3406 /* Save the current options */
3409 ix86_function_specific_save (struct cl_target_option *ptr)
3411 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3412 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3413 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3414 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3415 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3417 ptr->arch = ix86_arch;
3418 ptr->schedule = ix86_schedule;
3419 ptr->tune = ix86_tune;
3420 ptr->fpmath = ix86_fpmath;
3421 ptr->branch_cost = ix86_branch_cost;
3422 ptr->tune_defaulted = ix86_tune_defaulted;
3423 ptr->arch_specified = ix86_arch_specified;
3424 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3425 ptr->target_flags_explicit = target_flags_explicit;
3428 /* Restore the current options */
3431 ix86_function_specific_restore (struct cl_target_option *ptr)
3433 enum processor_type old_tune = ix86_tune;
3434 enum processor_type old_arch = ix86_arch;
3435 unsigned int ix86_arch_mask, ix86_tune_mask;
3438 ix86_arch = (enum processor_type) ptr->arch;
3439 ix86_schedule = (enum attr_cpu) ptr->schedule;
3440 ix86_tune = (enum processor_type) ptr->tune;
3441 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3442 ix86_branch_cost = ptr->branch_cost;
3443 ix86_tune_defaulted = ptr->tune_defaulted;
3444 ix86_arch_specified = ptr->arch_specified;
3445 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3446 target_flags_explicit = ptr->target_flags_explicit;
3448 /* Recreate the arch feature tests if the arch changed */
3449 if (old_arch != ix86_arch)
3451 ix86_arch_mask = 1u << ix86_arch;
3452 for (i = 0; i < X86_ARCH_LAST; ++i)
3453 ix86_arch_features[i]
3454 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3457 /* Recreate the tune optimization tests */
3458 if (old_tune != ix86_tune)
3460 ix86_tune_mask = 1u << ix86_tune;
3461 for (i = 0; i < X86_TUNE_LAST; ++i)
3462 ix86_tune_features[i]
3463 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3467 /* Print the current options */
3470 ix86_function_specific_print (FILE *file, int indent,
3471 struct cl_target_option *ptr)
3474 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3475 NULL, NULL, NULL, false);
3477 fprintf (file, "%*sarch = %d (%s)\n",
3480 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3481 ? cpu_names[ptr->arch]
3484 fprintf (file, "%*stune = %d (%s)\n",
3487 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3488 ? cpu_names[ptr->tune]
3491 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3492 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3493 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3494 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3498 fprintf (file, "%*s%s\n", indent, "", target_string);
3499 free (target_string);
3504 /* Inner function to process the attribute((target(...))), take an argument and
3505 set the current options from the argument. If we have a list, recursively go
3509 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3514 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3515 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3516 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3517 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3532 enum ix86_opt_type type;
3537 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3538 IX86_ATTR_ISA ("abm", OPT_mabm),
3539 IX86_ATTR_ISA ("aes", OPT_maes),
3540 IX86_ATTR_ISA ("avx", OPT_mavx),
3541 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3542 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3543 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3544 IX86_ATTR_ISA ("sse", OPT_msse),
3545 IX86_ATTR_ISA ("sse2", OPT_msse2),
3546 IX86_ATTR_ISA ("sse3", OPT_msse3),
3547 IX86_ATTR_ISA ("sse4", OPT_msse4),
3548 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3549 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3550 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3551 IX86_ATTR_ISA ("sse5", OPT_msse5),
3552 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3554 /* string options */
3555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3556 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3557 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3560 IX86_ATTR_YES ("cld",
3564 IX86_ATTR_NO ("fancy-math-387",
3565 OPT_mfancy_math_387,
3566 MASK_NO_FANCY_MATH_387),
3568 IX86_ATTR_NO ("fused-madd",
3570 MASK_NO_FUSED_MADD),
3572 IX86_ATTR_YES ("ieee-fp",
3576 IX86_ATTR_YES ("inline-all-stringops",
3577 OPT_minline_all_stringops,
3578 MASK_INLINE_ALL_STRINGOPS),
3580 IX86_ATTR_YES ("inline-stringops-dynamically",
3581 OPT_minline_stringops_dynamically,
3582 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3584 IX86_ATTR_NO ("align-stringops",
3585 OPT_mno_align_stringops,
3586 MASK_NO_ALIGN_STRINGOPS),
3588 IX86_ATTR_YES ("recip",
3594 /* If this is a list, recurse to get the options. */
3595 if (TREE_CODE (args) == TREE_LIST)
3599 for (; args; args = TREE_CHAIN (args))
3600 if (TREE_VALUE (args)
3601 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3607 else if (TREE_CODE (args) != STRING_CST)
3610 /* Handle multiple arguments separated by commas. */
3611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3613 while (next_optstr && *next_optstr != '\0')
3615 char *p = next_optstr;
3617 char *comma = strchr (next_optstr, ',');
3618 const char *opt_string;
3619 size_t len, opt_len;
3624 enum ix86_opt_type type = ix86_opt_unknown;
3630 len = comma - next_optstr;
3631 next_optstr = comma + 1;
3639 /* Recognize no-xxx. */
3640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3649 /* Find the option. */
3652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3654 type = attrs[i].type;
3655 opt_len = attrs[i].len;
3656 if (ch == attrs[i].string[0]
3657 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3658 && memcmp (p, attrs[i].string, opt_len) == 0)
3661 mask = attrs[i].mask;
3662 opt_string = attrs[i].string;
3667 /* Process the option. */
3670 error ("attribute(target(\"%s\")) is unknown", orig_p);
3674 else if (type == ix86_opt_isa)
3675 ix86_handle_option (opt, p, opt_set_p);
3677 else if (type == ix86_opt_yes || type == ix86_opt_no)
3679 if (type == ix86_opt_no)
3680 opt_set_p = !opt_set_p;
3683 target_flags |= mask;
3685 target_flags &= ~mask;
3688 else if (type == ix86_opt_str)
3692 error ("option(\"%s\") was already specified", opt_string);
3696 p_strings[opt] = xstrdup (p + opt_len);
3706 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3709 ix86_valid_target_attribute_tree (tree args)
3711 const char *orig_arch_string = ix86_arch_string;
3712 const char *orig_tune_string = ix86_tune_string;
3713 const char *orig_fpmath_string = ix86_fpmath_string;
3714 int orig_tune_defaulted = ix86_tune_defaulted;
3715 int orig_arch_specified = ix86_arch_specified;
3716 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3719 struct cl_target_option *def
3720 = TREE_TARGET_OPTION (target_option_default_node);
3722 /* Process each of the options on the chain. */
3723 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3726 /* If the changed options are different from the default, rerun override_options,
3727 and then save the options away. The string options are are attribute options,
3728 and will be undone when we copy the save structure. */
3729 if (ix86_isa_flags != def->ix86_isa_flags
3730 || target_flags != def->target_flags
3731 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3732 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3733 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3735 /* If we are using the default tune= or arch=, undo the string assigned,
3736 and use the default. */
3737 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3738 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3739 else if (!orig_arch_specified)
3740 ix86_arch_string = NULL;
3742 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3743 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3744 else if (orig_tune_defaulted)
3745 ix86_tune_string = NULL;
3747 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3748 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3749 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3750 else if (!TARGET_64BIT && TARGET_SSE)
3751 ix86_fpmath_string = "sse,387";
3753 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3754 override_options (false);
3756 /* Add any builtin functions with the new isa if any. */
3757 ix86_add_new_builtins (ix86_isa_flags);
3759 /* Save the current options unless we are validating options for
3761 t = build_target_option_node ();
3763 ix86_arch_string = orig_arch_string;
3764 ix86_tune_string = orig_tune_string;
3765 ix86_fpmath_string = orig_fpmath_string;
3767 /* Free up memory allocated to hold the strings */
3768 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3769 if (option_strings[i])
3770 free (option_strings[i]);
3776 /* Hook to validate attribute((target("string"))). */
3779 ix86_valid_target_attribute_p (tree fndecl,
3780 tree ARG_UNUSED (name),
3782 int ARG_UNUSED (flags))
3784 struct cl_target_option cur_target;
3786 tree old_optimize = build_optimization_node ();
3787 tree new_target, new_optimize;
3788 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3790 /* If the function changed the optimization levels as well as setting target
3791 options, start with the optimizations specified. */
3792 if (func_optimize && func_optimize != old_optimize)
3793 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3795 /* The target attributes may also change some optimization flags, so update
3796 the optimization options if necessary. */
3797 cl_target_option_save (&cur_target);
3798 new_target = ix86_valid_target_attribute_tree (args);
3799 new_optimize = build_optimization_node ();
3806 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3808 if (old_optimize != new_optimize)
3809 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3812 cl_target_option_restore (&cur_target);
3814 if (old_optimize != new_optimize)
3815 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3821 /* Hook to determine if one function can safely inline another. */
3824 ix86_can_inline_p (tree caller, tree callee)
3827 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3828 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3830 /* If callee has no option attributes, then it is ok to inline. */
3834 /* If caller has no option attributes, but callee does then it is not ok to
3836 else if (!caller_tree)
3841 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3842 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3844 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3845 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3847 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3848 != callee_opts->ix86_isa_flags)
3851 /* See if we have the same non-isa options. */
3852 else if (caller_opts->target_flags != callee_opts->target_flags)
3855 /* See if arch, tune, etc. are the same. */
3856 else if (caller_opts->arch != callee_opts->arch)
3859 else if (caller_opts->tune != callee_opts->tune)
3862 else if (caller_opts->fpmath != callee_opts->fpmath)
3865 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3876 /* Remember the last target of ix86_set_current_function. */
3877 static GTY(()) tree ix86_previous_fndecl;
3879 /* Establish appropriate back-end context for processing the function
3880 FNDECL. The argument might be NULL to indicate processing at top
3881 level, outside of any function scope. */
3883 ix86_set_current_function (tree fndecl)
3885 /* Only change the context if the function changes. This hook is called
3886 several times in the course of compiling a function, and we don't want to
3887 slow things down too much or call target_reinit when it isn't safe. */
3888 if (fndecl && fndecl != ix86_previous_fndecl)
3890 tree old_tree = (ix86_previous_fndecl
3891 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3894 tree new_tree = (fndecl
3895 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3898 ix86_previous_fndecl = fndecl;
3899 if (old_tree == new_tree)
3904 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3910 struct cl_target_option *def
3911 = TREE_TARGET_OPTION (target_option_current_node);
3913 cl_target_option_restore (def);
3920 /* Return true if this goes in large data/bss. */
3923 ix86_in_large_data_p (tree exp)
3925 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3928 /* Functions are never large data. */
3929 if (TREE_CODE (exp) == FUNCTION_DECL)
3932 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3934 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3935 if (strcmp (section, ".ldata") == 0
3936 || strcmp (section, ".lbss") == 0)
3942 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3944 /* If this is an incomplete type with size 0, then we can't put it
3945 in data because it might be too big when completed. */
3946 if (!size || size > ix86_section_threshold)
3953 /* Switch to the appropriate section for output of DECL.
3954 DECL is either a `VAR_DECL' node or a constant of some sort.
3955 RELOC indicates whether forming the initial value of DECL requires
3956 link-time relocations. */
3958 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3962 x86_64_elf_select_section (tree decl, int reloc,
3963 unsigned HOST_WIDE_INT align)
3965 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3966 && ix86_in_large_data_p (decl))
3968 const char *sname = NULL;
3969 unsigned int flags = SECTION_WRITE;
3970 switch (categorize_decl_for_section (decl, reloc))
3975 case SECCAT_DATA_REL:
3976 sname = ".ldata.rel";
3978 case SECCAT_DATA_REL_LOCAL:
3979 sname = ".ldata.rel.local";
3981 case SECCAT_DATA_REL_RO:
3982 sname = ".ldata.rel.ro";
3984 case SECCAT_DATA_REL_RO_LOCAL:
3985 sname = ".ldata.rel.ro.local";
3989 flags |= SECTION_BSS;
3992 case SECCAT_RODATA_MERGE_STR:
3993 case SECCAT_RODATA_MERGE_STR_INIT:
3994 case SECCAT_RODATA_MERGE_CONST:
3998 case SECCAT_SRODATA:
4005 /* We don't split these for medium model. Place them into
4006 default sections and hope for best. */
4008 case SECCAT_EMUTLS_VAR:
4009 case SECCAT_EMUTLS_TMPL:
4014 /* We might get called with string constants, but get_named_section
4015 doesn't like them as they are not DECLs. Also, we need to set
4016 flags in that case. */
4018 return get_section (sname, flags, NULL);
4019 return get_named_section (decl, sname, reloc);
4022 return default_elf_select_section (decl, reloc, align);
4025 /* Build up a unique section name, expressed as a
4026 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4027 RELOC indicates whether the initial value of EXP requires
4028 link-time relocations. */
4030 static void ATTRIBUTE_UNUSED
4031 x86_64_elf_unique_section (tree decl, int reloc)
4033 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4034 && ix86_in_large_data_p (decl))
4036 const char *prefix = NULL;
4037 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4038 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4040 switch (categorize_decl_for_section (decl, reloc))
4043 case SECCAT_DATA_REL:
4044 case SECCAT_DATA_REL_LOCAL:
4045 case SECCAT_DATA_REL_RO:
4046 case SECCAT_DATA_REL_RO_LOCAL:
4047 prefix = one_only ? ".ld" : ".ldata";
4050 prefix = one_only ? ".lb" : ".lbss";
4053 case SECCAT_RODATA_MERGE_STR:
4054 case SECCAT_RODATA_MERGE_STR_INIT:
4055 case SECCAT_RODATA_MERGE_CONST:
4056 prefix = one_only ? ".lr" : ".lrodata";
4058 case SECCAT_SRODATA:
4065 /* We don't split these for medium model. Place them into
4066 default sections and hope for best. */
4068 case SECCAT_EMUTLS_VAR:
4069 prefix = targetm.emutls.var_section;
4071 case SECCAT_EMUTLS_TMPL:
4072 prefix = targetm.emutls.tmpl_section;
4077 const char *name, *linkonce;
4080 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4081 name = targetm.strip_name_encoding (name);
4083 /* If we're using one_only, then there needs to be a .gnu.linkonce
4084 prefix to the section name. */
4085 linkonce = one_only ? ".gnu.linkonce" : "";
4087 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4089 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4093 default_unique_section (decl, reloc);
4096 #ifdef COMMON_ASM_OP
4097 /* This says how to output assembler code to declare an
4098 uninitialized external linkage data object.
4100 For medium model x86-64 we need to use .largecomm opcode for
4103 x86_elf_aligned_common (FILE *file,
4104 const char *name, unsigned HOST_WIDE_INT size,
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 fprintf (file, ".largecomm\t");
4111 fprintf (file, "%s", COMMON_ASM_OP);
4112 assemble_name (file, name);
4113 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4114 size, align / BITS_PER_UNIT);
4118 /* Utility function for targets to use in implementing
4119 ASM_OUTPUT_ALIGNED_BSS. */
4122 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4123 const char *name, unsigned HOST_WIDE_INT size,
4126 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4127 && size > (unsigned int)ix86_section_threshold)
4128 switch_to_section (get_named_section (decl, ".lbss", 0));
4130 switch_to_section (bss_section);
4131 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4132 #ifdef ASM_DECLARE_OBJECT_NAME
4133 last_assemble_variable_decl = decl;
4134 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4136 /* Standard thing is just output label for the object. */
4137 ASM_OUTPUT_LABEL (file, name);
4138 #endif /* ASM_DECLARE_OBJECT_NAME */
4139 ASM_OUTPUT_SKIP (file, size ? size : 1);
4143 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4145 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4146 make the problem with not enough registers even worse. */
4147 #ifdef INSN_SCHEDULING
4149 flag_schedule_insns = 0;
4153 /* The Darwin libraries never set errno, so we might as well
4154 avoid calling them when that's the only reason we would. */
4155 flag_errno_math = 0;
4157 /* The default values of these switches depend on the TARGET_64BIT
4158 that is not known at this moment. Mark these values with 2 and
4159 let user the to override these. In case there is no command line option
4160 specifying them, we will set the defaults in override_options. */
4162 flag_omit_frame_pointer = 2;
4163 flag_pcc_struct_return = 2;
4164 flag_asynchronous_unwind_tables = 2;
4165 flag_vect_cost_model = 1;
4166 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4167 SUBTARGET_OPTIMIZATION_OPTIONS;
4171 /* Decide whether we can make a sibling call to a function. DECL is the
4172 declaration of the function being targeted by the call and EXP is the
4173 CALL_EXPR representing the call. */
4176 ix86_function_ok_for_sibcall (tree decl, tree exp)
4181 /* If we are generating position-independent code, we cannot sibcall
4182 optimize any indirect call, or a direct call to a global function,
4183 as the PLT requires %ebx be live. */
4184 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4191 func = TREE_TYPE (CALL_EXPR_FN (exp));
4192 if (POINTER_TYPE_P (func))
4193 func = TREE_TYPE (func);
4196 /* Check that the return value locations are the same. Like
4197 if we are returning floats on the 80387 register stack, we cannot
4198 make a sibcall from a function that doesn't return a float to a
4199 function that does or, conversely, from a function that does return
4200 a float to a function that doesn't; the necessary stack adjustment
4201 would not be executed. This is also the place we notice
4202 differences in the return value ABI. Note that it is ok for one
4203 of the functions to have void return type as long as the return
4204 value of the other is passed in a register. */
4205 a = ix86_function_value (TREE_TYPE (exp), func, false);
4206 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4208 if (STACK_REG_P (a) || STACK_REG_P (b))
4210 if (!rtx_equal_p (a, b))
4213 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4215 else if (!rtx_equal_p (a, b))
4218 /* If this call is indirect, we'll need to be able to use a call-clobbered
4219 register for the address of the target function. Make sure that all
4220 such registers are not used for passing parameters. */
4221 if (!decl && !TARGET_64BIT)
4225 /* We're looking at the CALL_EXPR, we need the type of the function. */
4226 type = CALL_EXPR_FN (exp); /* pointer expression */
4227 type = TREE_TYPE (type); /* pointer type */
4228 type = TREE_TYPE (type); /* function type */
4230 if (ix86_function_regparm (type, NULL) >= 3)
4232 /* ??? Need to count the actual number of registers to be used,
4233 not the possible number of registers. Fix later. */
4238 /* Dllimport'd functions are also called indirectly. */
4239 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4241 && decl && DECL_DLLIMPORT_P (decl)
4242 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4245 /* If we need to align the outgoing stack, then sibcalling would
4246 unalign the stack, which may break the called function. */
4247 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4250 /* Otherwise okay. That also includes certain types of indirect calls. */
4254 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4255 calling convention attributes;
4256 arguments as in struct attribute_spec.handler. */
4259 ix86_handle_cconv_attribute (tree *node, tree name,
4261 int flags ATTRIBUTE_UNUSED,
4264 if (TREE_CODE (*node) != FUNCTION_TYPE
4265 && TREE_CODE (*node) != METHOD_TYPE
4266 && TREE_CODE (*node) != FIELD_DECL
4267 && TREE_CODE (*node) != TYPE_DECL)
4269 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4271 *no_add_attrs = true;
4275 /* Can combine regparm with all attributes but fastcall. */
4276 if (is_attribute_p ("regparm", name))
4280 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4282 error ("fastcall and regparm attributes are not compatible");
4285 cst = TREE_VALUE (args);
4286 if (TREE_CODE (cst) != INTEGER_CST)
4288 warning (OPT_Wattributes,
4289 "%qE attribute requires an integer constant argument",
4291 *no_add_attrs = true;
4293 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4295 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4297 *no_add_attrs = true;
4305 /* Do not warn when emulating the MS ABI. */
4306 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4307 warning (OPT_Wattributes, "%qE attribute ignored",
4309 *no_add_attrs = true;
4313 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4314 if (is_attribute_p ("fastcall", name))
4316 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4318 error ("fastcall and cdecl attributes are not compatible");
4320 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4322 error ("fastcall and stdcall attributes are not compatible");
4324 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4326 error ("fastcall and regparm attributes are not compatible");
4330 /* Can combine stdcall with fastcall (redundant), regparm and
4332 else if (is_attribute_p ("stdcall", name))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4336 error ("stdcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4340 error ("stdcall and fastcall attributes are not compatible");
4344 /* Can combine cdecl with regparm and sseregparm. */
4345 else if (is_attribute_p ("cdecl", name))
4347 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4349 error ("stdcall and cdecl attributes are not compatible");
4351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4353 error ("fastcall and cdecl attributes are not compatible");
4357 /* Can combine sseregparm with all attributes. */
4362 /* Return 0 if the attributes for two types are incompatible, 1 if they
4363 are compatible, and 2 if they are nearly compatible (which causes a
4364 warning to be generated). */
4367 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4369 /* Check for mismatch of non-default calling convention. */
4370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4372 if (TREE_CODE (type1) != FUNCTION_TYPE
4373 && TREE_CODE (type1) != METHOD_TYPE)
4376 /* Check for mismatched fastcall/regparm types. */
4377 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4378 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4379 || (ix86_function_regparm (type1, NULL)
4380 != ix86_function_regparm (type2, NULL)))
4383 /* Check for mismatched sseregparm types. */
4384 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4385 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4388 /* Check for mismatched return types (cdecl vs stdcall). */
4389 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4390 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4396 /* Return the regparm value for a function with the indicated TYPE and DECL.
4397 DECL may be NULL when calling function indirectly
4398 or considering a libcall. */
4401 ix86_function_regparm (const_tree type, const_tree decl)
4406 static bool error_issued;
4409 return (ix86_function_type_abi (type) == SYSV_ABI
4410 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4412 regparm = ix86_regparm;
4413 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4417 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4419 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4421 /* We can't use regparm(3) for nested functions because
4422 these pass static chain pointer in %ecx register. */
4423 if (!error_issued && regparm == 3
4424 && decl_function_context (decl)
4425 && !DECL_NO_STATIC_CHAIN (decl))
4427 error ("nested functions are limited to 2 register parameters");
4428 error_issued = true;
4436 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4439 /* Use register calling convention for local functions when possible. */
4441 && TREE_CODE (decl) == FUNCTION_DECL
4445 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4446 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4449 int local_regparm, globals = 0, regno;
4452 /* Make sure no regparm register is taken by a
4453 fixed register variable. */
4454 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4455 if (fixed_regs[local_regparm])
4458 /* We can't use regparm(3) for nested functions as these use
4459 static chain pointer in third argument. */
4460 if (local_regparm == 3
4461 && decl_function_context (decl)
4462 && !DECL_NO_STATIC_CHAIN (decl))
4465 /* If the function realigns its stackpointer, the prologue will
4466 clobber %ecx. If we've already generated code for the callee,
4467 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4468 scanning the attributes for the self-realigning property. */
4469 f = DECL_STRUCT_FUNCTION (decl);
4470 /* Since current internal arg pointer won't conflict with
4471 parameter passing regs, so no need to change stack
4472 realignment and adjust regparm number.
4474 Each fixed register usage increases register pressure,
4475 so less registers should be used for argument passing.
4476 This functionality can be overriden by an explicit
4478 for (regno = 0; regno <= DI_REG; regno++)
4479 if (fixed_regs[regno])
4483 = globals < local_regparm ? local_regparm - globals : 0;
4485 if (local_regparm > regparm)
4486 regparm = local_regparm;
4493 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4494 DFmode (2) arguments in SSE registers for a function with the
4495 indicated TYPE and DECL. DECL may be NULL when calling function
4496 indirectly or considering a libcall. Otherwise return 0. */
4499 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4501 gcc_assert (!TARGET_64BIT);
4503 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4504 by the sseregparm attribute. */
4505 if (TARGET_SSEREGPARM
4506 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4513 error ("Calling %qD with attribute sseregparm without "
4514 "SSE/SSE2 enabled", decl);
4516 error ("Calling %qT with attribute sseregparm without "
4517 "SSE/SSE2 enabled", type);
4525 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4526 (and DFmode for SSE2) arguments in SSE registers. */
4527 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4529 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4530 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4532 return TARGET_SSE2 ? 2 : 1;
4538 /* Return true if EAX is live at the start of the function. Used by
4539 ix86_expand_prologue to determine if we need special help before
4540 calling allocate_stack_worker. */
4543 ix86_eax_live_at_start_p (void)
4545 /* Cheat. Don't bother working forward from ix86_function_regparm
4546 to the function type to whether an actual argument is located in
4547 eax. Instead just look at cfg info, which is still close enough
4548 to correct at this point. This gives false positives for broken
4549 functions that might use uninitialized data that happens to be
4550 allocated in eax, but who cares? */
4551 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4554 /* Value is the number of bytes of arguments automatically
4555 popped when returning from a subroutine call.
4556 FUNDECL is the declaration node of the function (as a tree),
4557 FUNTYPE is the data type of the function (as a tree),
4558 or for a library call it is an identifier node for the subroutine name.
4559 SIZE is the number of bytes of arguments passed on the stack.
4561 On the 80386, the RTD insn may be used to pop them if the number
4562 of args is fixed, but if the number is variable then the caller
4563 must pop them all. RTD can't be used for library calls now
4564 because the library is compiled with the Unix compiler.
4565 Use of RTD is a selectable option, since it is incompatible with
4566 standard Unix calling sequences. If the option is not selected,
4567 the caller must always pop the args.
4569 The attribute stdcall is equivalent to RTD on a per module basis. */
4572 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4576 /* None of the 64-bit ABIs pop arguments. */
4580 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4582 /* Cdecl functions override -mrtd, and never pop the stack. */
4583 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4585 /* Stdcall and fastcall functions will pop the stack if not
4587 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4588 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4591 if (rtd && ! stdarg_p (funtype))
4595 /* Lose any fake structure return argument if it is passed on the stack. */
4596 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4597 && !KEEP_AGGREGATE_RETURN_POINTER)
4599 int nregs = ix86_function_regparm (funtype, fundecl);
4601 return GET_MODE_SIZE (Pmode);
4607 /* Argument support functions. */
4609 /* Return true when register may be used to pass function parameters. */
4611 ix86_function_arg_regno_p (int regno)
4614 const int *parm_regs;
4619 return (regno < REGPARM_MAX
4620 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4622 return (regno < REGPARM_MAX
4623 || (TARGET_MMX && MMX_REGNO_P (regno)
4624 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4625 || (TARGET_SSE && SSE_REGNO_P (regno)
4626 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4631 if (SSE_REGNO_P (regno) && TARGET_SSE)
4636 if (TARGET_SSE && SSE_REGNO_P (regno)
4637 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4641 /* TODO: The function should depend on current function ABI but
4642 builtins.c would need updating then. Therefore we use the
4645 /* RAX is used as hidden argument to va_arg functions. */
4646 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4649 if (ix86_abi == MS_ABI)
4650 parm_regs = x86_64_ms_abi_int_parameter_registers;
4652 parm_regs = x86_64_int_parameter_registers;
4653 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4654 : X86_64_REGPARM_MAX); i++)
4655 if (regno == parm_regs[i])
4660 /* Return if we do not know how to pass TYPE solely in registers. */
4663 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4665 if (must_pass_in_stack_var_size_or_pad (mode, type))
4668 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4669 The layout_type routine is crafty and tries to trick us into passing
4670 currently unsupported vector types on the stack by using TImode. */
4671 return (!TARGET_64BIT && mode == TImode
4672 && type && TREE_CODE (type) != VECTOR_TYPE);
4675 /* It returns the size, in bytes, of the area reserved for arguments passed
4676 in registers for the function represented by fndecl dependent to the used
4679 ix86_reg_parm_stack_space (const_tree fndecl)
4681 enum calling_abi call_abi = SYSV_ABI;
4682 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4683 call_abi = ix86_function_abi (fndecl);
4685 call_abi = ix86_function_type_abi (fndecl);
4686 if (call_abi == MS_ABI)
4691 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4694 ix86_function_type_abi (const_tree fntype)
4696 if (TARGET_64BIT && fntype != NULL)
4698 enum calling_abi abi = ix86_abi;
4699 if (abi == SYSV_ABI)
4701 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4704 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4711 static enum calling_abi
4712 ix86_function_abi (const_tree fndecl)
4716 return ix86_function_type_abi (TREE_TYPE (fndecl));
4719 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4722 ix86_cfun_abi (void)
4724 if (! cfun || ! TARGET_64BIT)
4726 return cfun->machine->call_abi;
4730 extern void init_regs (void);
4732 /* Implementation of call abi switching target hook. Specific to FNDECL
4733 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4734 for more details. */
4736 ix86_call_abi_override (const_tree fndecl)
4738 if (fndecl == NULL_TREE)
4739 cfun->machine->call_abi = ix86_abi;
4741 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4744 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4745 re-initialization of init_regs each time we switch function context since
4746 this is needed only during RTL expansion. */
4748 ix86_maybe_switch_abi (void)
4751 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4755 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4756 for a call to a function whose data type is FNTYPE.
4757 For a library call, FNTYPE is 0. */
4760 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4761 tree fntype, /* tree ptr for function decl */
4762 rtx libname, /* SYMBOL_REF of library name or 0 */
4765 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4766 memset (cum, 0, sizeof (*cum));
4769 cum->call_abi = ix86_function_abi (fndecl);
4771 cum->call_abi = ix86_function_type_abi (fntype);
4772 /* Set up the number of registers to use for passing arguments. */
4774 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4775 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4776 cum->nregs = ix86_regparm;
4779 if (cum->call_abi != ix86_abi)
4780 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4785 cum->sse_nregs = SSE_REGPARM_MAX;
4788 if (cum->call_abi != ix86_abi)
4789 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4790 : X64_SSE_REGPARM_MAX;
4794 cum->mmx_nregs = MMX_REGPARM_MAX;
4795 cum->warn_avx = true;
4796 cum->warn_sse = true;
4797 cum->warn_mmx = true;
4799 /* Because type might mismatch in between caller and callee, we need to
4800 use actual type of function for local calls.
4801 FIXME: cgraph_analyze can be told to actually record if function uses
4802 va_start so for local functions maybe_vaarg can be made aggressive
4804 FIXME: once typesytem is fixed, we won't need this code anymore. */
4806 fntype = TREE_TYPE (fndecl);
4807 cum->maybe_vaarg = (fntype
4808 ? (!prototype_p (fntype) || stdarg_p (fntype))
4813 /* If there are variable arguments, then we won't pass anything
4814 in registers in 32-bit mode. */
4815 if (stdarg_p (fntype))
4826 /* Use ecx and edx registers if function has fastcall attribute,
4827 else look for regparm information. */
4830 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4836 cum->nregs = ix86_function_regparm (fntype, fndecl);
4839 /* Set up the number of SSE registers used for passing SFmode
4840 and DFmode arguments. Warn for mismatching ABI. */
4841 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4845 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4846 But in the case of vector types, it is some vector mode.
4848 When we have only some of our vector isa extensions enabled, then there
4849 are some modes for which vector_mode_supported_p is false. For these
4850 modes, the generic vector support in gcc will choose some non-vector mode
4851 in order to implement the type. By computing the natural mode, we'll
4852 select the proper ABI location for the operand and not depend on whatever
4853 the middle-end decides to do with these vector types.
4855 The midde-end can't deal with the vector types > 16 bytes. In this
4856 case, we return the original mode and warn ABI change if CUM isn't
4859 static enum machine_mode
4860 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4862 enum machine_mode mode = TYPE_MODE (type);
4864 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4866 HOST_WIDE_INT size = int_size_in_bytes (type);
4867 if ((size == 8 || size == 16 || size == 32)
4868 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4869 && TYPE_VECTOR_SUBPARTS (type) > 1)
4871 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4873 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4874 mode = MIN_MODE_VECTOR_FLOAT;
4876 mode = MIN_MODE_VECTOR_INT;
4878 /* Get the mode which has this inner mode and number of units. */
4879 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4880 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4881 && GET_MODE_INNER (mode) == innermode)
4883 if (size == 32 && !TARGET_AVX)
4885 static bool warnedavx;
4892 warning (0, "AVX vector argument without AVX "
4893 "enabled changes the ABI");
4895 return TYPE_MODE (type);
4908 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4909 this may not agree with the mode that the type system has chosen for the
4910 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4911 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4914 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4919 if (orig_mode != BLKmode)
4920 tmp = gen_rtx_REG (orig_mode, regno);
4923 tmp = gen_rtx_REG (mode, regno);
4924 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4925 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4931 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4932 of this code is to classify each 8bytes of incoming argument by the register
4933 class and assign registers accordingly. */
4935 /* Return the union class of CLASS1 and CLASS2.
4936 See the x86-64 PS ABI for details. */
4938 static enum x86_64_reg_class
4939 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4941 /* Rule #1: If both classes are equal, this is the resulting class. */
4942 if (class1 == class2)
4945 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4947 if (class1 == X86_64_NO_CLASS)
4949 if (class2 == X86_64_NO_CLASS)
4952 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4953 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4954 return X86_64_MEMORY_CLASS;
4956 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4957 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4958 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4959 return X86_64_INTEGERSI_CLASS;
4960 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4961 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4962 return X86_64_INTEGER_CLASS;
4964 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4966 if (class1 == X86_64_X87_CLASS
4967 || class1 == X86_64_X87UP_CLASS
4968 || class1 == X86_64_COMPLEX_X87_CLASS
4969 || class2 == X86_64_X87_CLASS
4970 || class2 == X86_64_X87UP_CLASS
4971 || class2 == X86_64_COMPLEX_X87_CLASS)
4972 return X86_64_MEMORY_CLASS;
4974 /* Rule #6: Otherwise class SSE is used. */
4975 return X86_64_SSE_CLASS;
4978 /* Classify the argument of type TYPE and mode MODE.
4979 CLASSES will be filled by the register class used to pass each word
4980 of the operand. The number of words is returned. In case the parameter
4981 should be passed in memory, 0 is returned. As a special case for zero
4982 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4984 BIT_OFFSET is used internally for handling records and specifies offset
4985 of the offset in bits modulo 256 to avoid overflow cases.
4987 See the x86-64 PS ABI for details.
4991 classify_argument (enum machine_mode mode, const_tree type,
4992 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4994 HOST_WIDE_INT bytes =
4995 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4996 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4998 /* Variable sized entities are always passed/returned in memory. */
5002 if (mode != VOIDmode
5003 && targetm.calls.must_pass_in_stack (mode, type))
5006 if (type && AGGREGATE_TYPE_P (type))
5010 enum x86_64_reg_class subclasses[MAX_CLASSES];
5012 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5016 for (i = 0; i < words; i++)
5017 classes[i] = X86_64_NO_CLASS;
5019 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5020 signalize memory class, so handle it as special case. */
5023 classes[0] = X86_64_NO_CLASS;
5027 /* Classify each field of record and merge classes. */
5028 switch (TREE_CODE (type))
5031 /* And now merge the fields of structure. */
5032 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5034 if (TREE_CODE (field) == FIELD_DECL)
5038 if (TREE_TYPE (field) == error_mark_node)
5041 /* Bitfields are always classified as integer. Handle them
5042 early, since later code would consider them to be
5043 misaligned integers. */
5044 if (DECL_BIT_FIELD (field))
5046 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5047 i < ((int_bit_position (field) + (bit_offset % 64))
5048 + tree_low_cst (DECL_SIZE (field), 0)
5051 merge_classes (X86_64_INTEGER_CLASS,
5058 type = TREE_TYPE (field);
5060 /* Flexible array member is ignored. */
5061 if (TYPE_MODE (type) == BLKmode
5062 && TREE_CODE (type) == ARRAY_TYPE
5063 && TYPE_SIZE (type) == NULL_TREE
5064 && TYPE_DOMAIN (type) != NULL_TREE
5065 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5070 if (!warned && warn_psabi)
5073 inform (input_location,
5074 "The ABI of passing struct with"
5075 " a flexible array member has"
5076 " changed in GCC 4.4");
5080 num = classify_argument (TYPE_MODE (type), type,
5082 (int_bit_position (field)
5083 + bit_offset) % 256);
5086 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5087 for (i = 0; i < num && (i + pos) < words; i++)
5089 merge_classes (subclasses[i], classes[i + pos]);
5096 /* Arrays are handled as small records. */
5099 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5100 TREE_TYPE (type), subclasses, bit_offset);
5104 /* The partial classes are now full classes. */
5105 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5106 subclasses[0] = X86_64_SSE_CLASS;
5107 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5108 && !((bit_offset % 64) == 0 && bytes == 4))
5109 subclasses[0] = X86_64_INTEGER_CLASS;
5111 for (i = 0; i < words; i++)
5112 classes[i] = subclasses[i % num];
5117 case QUAL_UNION_TYPE:
5118 /* Unions are similar to RECORD_TYPE but offset is always 0.
5120 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5122 if (TREE_CODE (field) == FIELD_DECL)
5126 if (TREE_TYPE (field) == error_mark_node)
5129 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5130 TREE_TYPE (field), subclasses,
5134 for (i = 0; i < num; i++)
5135 classes[i] = merge_classes (subclasses[i], classes[i]);
5146 /* When size > 16 bytes, if the first one isn't
5147 X86_64_SSE_CLASS or any other ones aren't
5148 X86_64_SSEUP_CLASS, everything should be passed in
5150 if (classes[0] != X86_64_SSE_CLASS)
5153 for (i = 1; i < words; i++)
5154 if (classes[i] != X86_64_SSEUP_CLASS)
5158 /* Final merger cleanup. */
5159 for (i = 0; i < words; i++)
5161 /* If one class is MEMORY, everything should be passed in
5163 if (classes[i] == X86_64_MEMORY_CLASS)
5166 /* The X86_64_SSEUP_CLASS should be always preceded by
5167 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5168 if (classes[i] == X86_64_SSEUP_CLASS
5169 && classes[i - 1] != X86_64_SSE_CLASS
5170 && classes[i - 1] != X86_64_SSEUP_CLASS)
5172 /* The first one should never be X86_64_SSEUP_CLASS. */
5173 gcc_assert (i != 0);
5174 classes[i] = X86_64_SSE_CLASS;
5177 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5178 everything should be passed in memory. */
5179 if (classes[i] == X86_64_X87UP_CLASS
5180 && (classes[i - 1] != X86_64_X87_CLASS))
5184 /* The first one should never be X86_64_X87UP_CLASS. */
5185 gcc_assert (i != 0);
5186 if (!warned && warn_psabi)
5189 inform (input_location,
5190 "The ABI of passing union with long double"
5191 " has changed in GCC 4.4");
5199 /* Compute alignment needed. We align all types to natural boundaries with
5200 exception of XFmode that is aligned to 64bits. */
5201 if (mode != VOIDmode && mode != BLKmode)
5203 int mode_alignment = GET_MODE_BITSIZE (mode);
5206 mode_alignment = 128;
5207 else if (mode == XCmode)
5208 mode_alignment = 256;
5209 if (COMPLEX_MODE_P (mode))
5210 mode_alignment /= 2;
5211 /* Misaligned fields are always returned in memory. */
5212 if (bit_offset % mode_alignment)
5216 /* for V1xx modes, just use the base mode */
5217 if (VECTOR_MODE_P (mode) && mode != V1DImode
5218 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5219 mode = GET_MODE_INNER (mode);
5221 /* Classification of atomic types. */
5226 classes[0] = X86_64_SSE_CLASS;
5229 classes[0] = X86_64_SSE_CLASS;
5230 classes[1] = X86_64_SSEUP_CLASS;
5240 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5244 classes[0] = X86_64_INTEGERSI_CLASS;
5247 else if (size <= 64)
5249 classes[0] = X86_64_INTEGER_CLASS;
5252 else if (size <= 64+32)
5254 classes[0] = X86_64_INTEGER_CLASS;
5255 classes[1] = X86_64_INTEGERSI_CLASS;
5258 else if (size <= 64+64)
5260 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5268 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5272 /* OImode shouldn't be used directly. */
5277 if (!(bit_offset % 64))
5278 classes[0] = X86_64_SSESF_CLASS;
5280 classes[0] = X86_64_SSE_CLASS;
5283 classes[0] = X86_64_SSEDF_CLASS;
5286 classes[0] = X86_64_X87_CLASS;
5287 classes[1] = X86_64_X87UP_CLASS;
5290 classes[0] = X86_64_SSE_CLASS;
5291 classes[1] = X86_64_SSEUP_CLASS;
5294 classes[0] = X86_64_SSE_CLASS;
5295 if (!(bit_offset % 64))
5301 if (!warned && warn_psabi)
5304 inform (input_location,
5305 "The ABI of passing structure with complex float"
5306 " member has changed in GCC 4.4");
5308 classes[1] = X86_64_SSESF_CLASS;
5312 classes[0] = X86_64_SSEDF_CLASS;
5313 classes[1] = X86_64_SSEDF_CLASS;
5316 classes[0] = X86_64_COMPLEX_X87_CLASS;
5319 /* This modes is larger than 16 bytes. */
5327 classes[0] = X86_64_SSE_CLASS;
5328 classes[1] = X86_64_SSEUP_CLASS;
5329 classes[2] = X86_64_SSEUP_CLASS;
5330 classes[3] = X86_64_SSEUP_CLASS;
5338 classes[0] = X86_64_SSE_CLASS;
5339 classes[1] = X86_64_SSEUP_CLASS;
5346 classes[0] = X86_64_SSE_CLASS;
5352 gcc_assert (VECTOR_MODE_P (mode));
5357 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5359 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5360 classes[0] = X86_64_INTEGERSI_CLASS;
5362 classes[0] = X86_64_INTEGER_CLASS;
5363 classes[1] = X86_64_INTEGER_CLASS;
5364 return 1 + (bytes > 8);
5368 /* Examine the argument and return set number of register required in each
5369 class. Return 0 iff parameter should be passed in memory. */
5371 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5372 int *int_nregs, int *sse_nregs)
5374 enum x86_64_reg_class regclass[MAX_CLASSES];
5375 int n = classify_argument (mode, type, regclass, 0);
5381 for (n--; n >= 0; n--)
5382 switch (regclass[n])
5384 case X86_64_INTEGER_CLASS:
5385 case X86_64_INTEGERSI_CLASS:
5388 case X86_64_SSE_CLASS:
5389 case X86_64_SSESF_CLASS:
5390 case X86_64_SSEDF_CLASS:
5393 case X86_64_NO_CLASS:
5394 case X86_64_SSEUP_CLASS:
5396 case X86_64_X87_CLASS:
5397 case X86_64_X87UP_CLASS:
5401 case X86_64_COMPLEX_X87_CLASS:
5402 return in_return ? 2 : 0;
5403 case X86_64_MEMORY_CLASS:
5409 /* Construct container for the argument used by GCC interface. See
5410 FUNCTION_ARG for the detailed description. */
5413 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5414 const_tree type, int in_return, int nintregs, int nsseregs,
5415 const int *intreg, int sse_regno)
5417 /* The following variables hold the static issued_error state. */
5418 static bool issued_sse_arg_error;
5419 static bool issued_sse_ret_error;
5420 static bool issued_x87_ret_error;
5422 enum machine_mode tmpmode;
5424 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5425 enum x86_64_reg_class regclass[MAX_CLASSES];
5429 int needed_sseregs, needed_intregs;
5430 rtx exp[MAX_CLASSES];
5433 n = classify_argument (mode, type, regclass, 0);
5436 if (!examine_argument (mode, type, in_return, &needed_intregs,
5439 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5442 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5443 some less clueful developer tries to use floating-point anyway. */
5444 if (needed_sseregs && !TARGET_SSE)
5448 if (!issued_sse_ret_error)
5450 error ("SSE register return with SSE disabled");
5451 issued_sse_ret_error = true;
5454 else if (!issued_sse_arg_error)
5456 error ("SSE register argument with SSE disabled");
5457 issued_sse_arg_error = true;
5462 /* Likewise, error if the ABI requires us to return values in the
5463 x87 registers and the user specified -mno-80387. */
5464 if (!TARGET_80387 && in_return)
5465 for (i = 0; i < n; i++)
5466 if (regclass[i] == X86_64_X87_CLASS
5467 || regclass[i] == X86_64_X87UP_CLASS
5468 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5470 if (!issued_x87_ret_error)
5472 error ("x87 register return with x87 disabled");
5473 issued_x87_ret_error = true;
5478 /* First construct simple cases. Avoid SCmode, since we want to use
5479 single register to pass this type. */
5480 if (n == 1 && mode != SCmode)
5481 switch (regclass[0])
5483 case X86_64_INTEGER_CLASS:
5484 case X86_64_INTEGERSI_CLASS:
5485 return gen_rtx_REG (mode, intreg[0]);
5486 case X86_64_SSE_CLASS:
5487 case X86_64_SSESF_CLASS:
5488 case X86_64_SSEDF_CLASS:
5489 if (mode != BLKmode)
5490 return gen_reg_or_parallel (mode, orig_mode,
5491 SSE_REGNO (sse_regno));
5493 case X86_64_X87_CLASS:
5494 case X86_64_COMPLEX_X87_CLASS:
5495 return gen_rtx_REG (mode, FIRST_STACK_REG);
5496 case X86_64_NO_CLASS:
5497 /* Zero sized array, struct or class. */
5502 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5503 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5504 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5506 && regclass[0] == X86_64_SSE_CLASS
5507 && regclass[1] == X86_64_SSEUP_CLASS
5508 && regclass[2] == X86_64_SSEUP_CLASS
5509 && regclass[3] == X86_64_SSEUP_CLASS
5511 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5514 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5515 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5516 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5517 && regclass[1] == X86_64_INTEGER_CLASS
5518 && (mode == CDImode || mode == TImode || mode == TFmode)
5519 && intreg[0] + 1 == intreg[1])
5520 return gen_rtx_REG (mode, intreg[0]);
5522 /* Otherwise figure out the entries of the PARALLEL. */
5523 for (i = 0; i < n; i++)
5527 switch (regclass[i])
5529 case X86_64_NO_CLASS:
5531 case X86_64_INTEGER_CLASS:
5532 case X86_64_INTEGERSI_CLASS:
5533 /* Merge TImodes on aligned occasions here too. */
5534 if (i * 8 + 8 > bytes)
5535 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5536 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5540 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5541 if (tmpmode == BLKmode)
5543 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5544 gen_rtx_REG (tmpmode, *intreg),
5548 case X86_64_SSESF_CLASS:
5549 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5550 gen_rtx_REG (SFmode,
5551 SSE_REGNO (sse_regno)),
5555 case X86_64_SSEDF_CLASS:
5556 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5557 gen_rtx_REG (DFmode,
5558 SSE_REGNO (sse_regno)),
5562 case X86_64_SSE_CLASS:
5570 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5580 && regclass[1] == X86_64_SSEUP_CLASS
5581 && regclass[2] == X86_64_SSEUP_CLASS
5582 && regclass[3] == X86_64_SSEUP_CLASS);
5589 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5590 gen_rtx_REG (tmpmode,
5591 SSE_REGNO (sse_regno)),
5600 /* Empty aligned struct, union or class. */
5604 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5605 for (i = 0; i < nexps; i++)
5606 XVECEXP (ret, 0, i) = exp [i];
5610 /* Update the data in CUM to advance over an argument of mode MODE
5611 and data type TYPE. (TYPE is null for libcalls where that information
5612 may not be available.) */
5615 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5616 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5632 cum->words += words;
5633 cum->nregs -= words;
5634 cum->regno += words;
5636 if (cum->nregs <= 0)
5644 /* OImode shouldn't be used directly. */
5648 if (cum->float_in_sse < 2)
5651 if (cum->float_in_sse < 1)
5668 if (!type || !AGGREGATE_TYPE_P (type))
5670 cum->sse_words += words;
5671 cum->sse_nregs -= 1;
5672 cum->sse_regno += 1;
5673 if (cum->sse_nregs <= 0)
5686 if (!type || !AGGREGATE_TYPE_P (type))
5688 cum->mmx_words += words;
5689 cum->mmx_nregs -= 1;
5690 cum->mmx_regno += 1;
5691 if (cum->mmx_nregs <= 0)
5702 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5703 tree type, HOST_WIDE_INT words, int named)
5705 int int_nregs, sse_nregs;
5707 /* Unnamed 256bit vector mode parameters are passed on stack. */
5708 if (!named && VALID_AVX256_REG_MODE (mode))
5711 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5712 cum->words += words;
5713 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5715 cum->nregs -= int_nregs;
5716 cum->sse_nregs -= sse_nregs;
5717 cum->regno += int_nregs;
5718 cum->sse_regno += sse_nregs;
5721 cum->words += words;
5725 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5726 HOST_WIDE_INT words)
5728 /* Otherwise, this should be passed indirect. */
5729 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5731 cum->words += words;
5740 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5741 tree type, int named)
5743 HOST_WIDE_INT bytes, words;
5745 if (mode == BLKmode)
5746 bytes = int_size_in_bytes (type);
5748 bytes = GET_MODE_SIZE (mode);
5749 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5752 mode = type_natural_mode (type, NULL);
5754 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5755 function_arg_advance_ms_64 (cum, bytes, words);
5756 else if (TARGET_64BIT)
5757 function_arg_advance_64 (cum, mode, type, words, named);
5759 function_arg_advance_32 (cum, mode, type, bytes, words);
5762 /* Define where to put the arguments to a function.
5763 Value is zero to push the argument on the stack,
5764 or a hard register in which to store the argument.
5766 MODE is the argument's machine mode.
5767 TYPE is the data type of the argument (as a tree).
5768 This is null for libcalls where that information may
5770 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5771 the preceding args and about the function being called.
5772 NAMED is nonzero if this argument is a named parameter
5773 (otherwise it is an extra parameter matching an ellipsis). */
5776 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5777 enum machine_mode orig_mode, tree type,
5778 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5780 static bool warnedsse, warnedmmx;
5782 /* Avoid the AL settings for the Unix64 ABI. */
5783 if (mode == VOIDmode)
5799 if (words <= cum->nregs)
5801 int regno = cum->regno;
5803 /* Fastcall allocates the first two DWORD (SImode) or
5804 smaller arguments to ECX and EDX if it isn't an
5810 || (type && AGGREGATE_TYPE_P (type)))
5813 /* ECX not EAX is the first allocated register. */
5814 if (regno == AX_REG)
5817 return gen_rtx_REG (mode, regno);
5822 if (cum->float_in_sse < 2)
5825 if (cum->float_in_sse < 1)
5829 /* In 32bit, we pass TImode in xmm registers. */
5836 if (!type || !AGGREGATE_TYPE_P (type))
5838 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5841 warning (0, "SSE vector argument without SSE enabled "
5845 return gen_reg_or_parallel (mode, orig_mode,
5846 cum->sse_regno + FIRST_SSE_REG);
5851 /* OImode shouldn't be used directly. */
5860 if (!type || !AGGREGATE_TYPE_P (type))
5863 return gen_reg_or_parallel (mode, orig_mode,
5864 cum->sse_regno + FIRST_SSE_REG);
5873 if (!type || !AGGREGATE_TYPE_P (type))
5875 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5878 warning (0, "MMX vector argument without MMX enabled "
5882 return gen_reg_or_parallel (mode, orig_mode,
5883 cum->mmx_regno + FIRST_MMX_REG);
5892 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5893 enum machine_mode orig_mode, tree type, int named)
5895 /* Handle a hidden AL argument containing number of registers
5896 for varargs x86-64 functions. */
5897 if (mode == VOIDmode)
5898 return GEN_INT (cum->maybe_vaarg
5899 ? (cum->sse_nregs < 0
5900 ? (cum->call_abi == ix86_abi
5902 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5903 : X64_SSE_REGPARM_MAX))
5918 /* Unnamed 256bit vector mode parameters are passed on stack. */
5924 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5926 &x86_64_int_parameter_registers [cum->regno],
5931 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5932 enum machine_mode orig_mode, int named,
5933 HOST_WIDE_INT bytes)
5937 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5938 We use value of -2 to specify that current function call is MSABI. */
5939 if (mode == VOIDmode)
5940 return GEN_INT (-2);
5942 /* If we've run out of registers, it goes on the stack. */
5943 if (cum->nregs == 0)
5946 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5948 /* Only floating point modes are passed in anything but integer regs. */
5949 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5952 regno = cum->regno + FIRST_SSE_REG;
5957 /* Unnamed floating parameters are passed in both the
5958 SSE and integer registers. */
5959 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5960 t2 = gen_rtx_REG (mode, regno);
5961 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5962 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5963 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5966 /* Handle aggregated types passed in register. */
5967 if (orig_mode == BLKmode)
5969 if (bytes > 0 && bytes <= 8)
5970 mode = (bytes > 4 ? DImode : SImode);
5971 if (mode == BLKmode)
5975 return gen_reg_or_parallel (mode, orig_mode, regno);
5979 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5980 tree type, int named)
5982 enum machine_mode mode = omode;
5983 HOST_WIDE_INT bytes, words;
5985 if (mode == BLKmode)
5986 bytes = int_size_in_bytes (type);
5988 bytes = GET_MODE_SIZE (mode);
5989 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5991 /* To simplify the code below, represent vector types with a vector mode
5992 even if MMX/SSE are not active. */
5993 if (type && TREE_CODE (type) == VECTOR_TYPE)
5994 mode = type_natural_mode (type, cum);
5996 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5997 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5998 else if (TARGET_64BIT)
5999 return function_arg_64 (cum, mode, omode, type, named);
6001 return function_arg_32 (cum, mode, omode, type, bytes, words);
6004 /* A C expression that indicates when an argument must be passed by
6005 reference. If nonzero for an argument, a copy of that argument is
6006 made in memory and a pointer to the argument is passed instead of
6007 the argument itself. The pointer is passed in whatever way is
6008 appropriate for passing a pointer to that type. */
6011 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6012 enum machine_mode mode ATTRIBUTE_UNUSED,
6013 const_tree type, bool named ATTRIBUTE_UNUSED)
6015 /* See Windows x64 Software Convention. */
6016 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6018 int msize = (int) GET_MODE_SIZE (mode);
6021 /* Arrays are passed by reference. */
6022 if (TREE_CODE (type) == ARRAY_TYPE)
6025 if (AGGREGATE_TYPE_P (type))
6027 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6028 are passed by reference. */
6029 msize = int_size_in_bytes (type);
6033 /* __m128 is passed by reference. */
6035 case 1: case 2: case 4: case 8:
6041 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6047 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6050 contains_aligned_value_p (tree type)
6052 enum machine_mode mode = TYPE_MODE (type);
6053 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6057 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6059 if (TYPE_ALIGN (type) < 128)
6062 if (AGGREGATE_TYPE_P (type))
6064 /* Walk the aggregates recursively. */
6065 switch (TREE_CODE (type))
6069 case QUAL_UNION_TYPE:
6073 /* Walk all the structure fields. */
6074 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6076 if (TREE_CODE (field) == FIELD_DECL
6077 && contains_aligned_value_p (TREE_TYPE (field)))
6084 /* Just for use if some languages passes arrays by value. */
6085 if (contains_aligned_value_p (TREE_TYPE (type)))
6096 /* Gives the alignment boundary, in bits, of an argument with the
6097 specified mode and type. */
6100 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6105 /* Since canonical type is used for call, we convert it to
6106 canonical type if needed. */
6107 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6108 type = TYPE_CANONICAL (type);
6109 align = TYPE_ALIGN (type);
6112 align = GET_MODE_ALIGNMENT (mode);
6113 if (align < PARM_BOUNDARY)
6114 align = PARM_BOUNDARY;
6115 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6116 natural boundaries. */
6117 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6119 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6120 make an exception for SSE modes since these require 128bit
6123 The handling here differs from field_alignment. ICC aligns MMX
6124 arguments to 4 byte boundaries, while structure fields are aligned
6125 to 8 byte boundaries. */
6128 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6129 align = PARM_BOUNDARY;
6133 if (!contains_aligned_value_p (type))
6134 align = PARM_BOUNDARY;
6137 if (align > BIGGEST_ALIGNMENT)
6138 align = BIGGEST_ALIGNMENT;
6142 /* Return true if N is a possible register number of function value. */
6145 ix86_function_value_regno_p (int regno)
6152 case FIRST_FLOAT_REG:
6153 /* TODO: The function should depend on current function ABI but
6154 builtins.c would need updating then. Therefore we use the
6156 if (TARGET_64BIT && ix86_abi == MS_ABI)
6158 return TARGET_FLOAT_RETURNS_IN_80387;
6164 if (TARGET_MACHO || TARGET_64BIT)
6172 /* Define how to find the value returned by a function.
6173 VALTYPE is the data type of the value (as a tree).
6174 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6175 otherwise, FUNC is 0. */
6178 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6179 const_tree fntype, const_tree fn)
6183 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6184 we normally prevent this case when mmx is not available. However
6185 some ABIs may require the result to be returned like DImode. */
6186 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6187 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6189 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6190 we prevent this case when sse is not available. However some ABIs
6191 may require the result to be returned like integer TImode. */
6192 else if (mode == TImode
6193 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6194 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6196 /* 32-byte vector modes in %ymm0. */
6197 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6198 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6200 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6201 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6202 regno = FIRST_FLOAT_REG;
6204 /* Most things go in %eax. */
6207 /* Override FP return register with %xmm0 for local functions when
6208 SSE math is enabled or for functions with sseregparm attribute. */
6209 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6211 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6212 if ((sse_level >= 1 && mode == SFmode)
6213 || (sse_level == 2 && mode == DFmode))
6214 regno = FIRST_SSE_REG;
6217 /* OImode shouldn't be used directly. */
6218 gcc_assert (mode != OImode);
6220 return gen_rtx_REG (orig_mode, regno);
6224 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6229 /* Handle libcalls, which don't provide a type node. */
6230 if (valtype == NULL)
6242 return gen_rtx_REG (mode, FIRST_SSE_REG);
6245 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6249 return gen_rtx_REG (mode, AX_REG);
6253 ret = construct_container (mode, orig_mode, valtype, 1,
6254 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6255 x86_64_int_return_registers, 0);
6257 /* For zero sized structures, construct_container returns NULL, but we
6258 need to keep rest of compiler happy by returning meaningful value. */
6260 ret = gen_rtx_REG (orig_mode, AX_REG);
6266 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6268 unsigned int regno = AX_REG;
6272 switch (GET_MODE_SIZE (mode))
6275 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6276 && !COMPLEX_MODE_P (mode))
6277 regno = FIRST_SSE_REG;
6281 if (mode == SFmode || mode == DFmode)
6282 regno = FIRST_SSE_REG;
6288 return gen_rtx_REG (orig_mode, regno);
6292 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6293 enum machine_mode orig_mode, enum machine_mode mode)
6295 const_tree fn, fntype;
6298 if (fntype_or_decl && DECL_P (fntype_or_decl))
6299 fn = fntype_or_decl;
6300 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6302 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6303 return function_value_ms_64 (orig_mode, mode);
6304 else if (TARGET_64BIT)
6305 return function_value_64 (orig_mode, mode, valtype);
6307 return function_value_32 (orig_mode, mode, fntype, fn);
6311 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6312 bool outgoing ATTRIBUTE_UNUSED)
6314 enum machine_mode mode, orig_mode;
6316 orig_mode = TYPE_MODE (valtype);
6317 mode = type_natural_mode (valtype, NULL);
6318 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6322 ix86_libcall_value (enum machine_mode mode)
6324 return ix86_function_value_1 (NULL, NULL, mode, mode);
6327 /* Return true iff type is returned in memory. */
6329 static int ATTRIBUTE_UNUSED
6330 return_in_memory_32 (const_tree type, enum machine_mode mode)
6334 if (mode == BLKmode)
6337 size = int_size_in_bytes (type);
6339 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6342 if (VECTOR_MODE_P (mode) || mode == TImode)
6344 /* User-created vectors small enough to fit in EAX. */
6348 /* MMX/3dNow values are returned in MM0,
6349 except when it doesn't exits. */
6351 return (TARGET_MMX ? 0 : 1);
6353 /* SSE values are returned in XMM0, except when it doesn't exist. */
6355 return (TARGET_SSE ? 0 : 1);
6357 /* AVX values are returned in YMM0, except when it doesn't exist. */
6359 return TARGET_AVX ? 0 : 1;
6368 /* OImode shouldn't be used directly. */
6369 gcc_assert (mode != OImode);
6374 static int ATTRIBUTE_UNUSED
6375 return_in_memory_64 (const_tree type, enum machine_mode mode)
6377 int needed_intregs, needed_sseregs;
6378 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6381 static int ATTRIBUTE_UNUSED
6382 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6384 HOST_WIDE_INT size = int_size_in_bytes (type);
6386 /* __m128 is returned in xmm0. */
6387 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6388 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6391 /* Otherwise, the size must be exactly in [1248]. */
6392 return (size != 1 && size != 2 && size != 4 && size != 8);
6396 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6398 #ifdef SUBTARGET_RETURN_IN_MEMORY
6399 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6401 const enum machine_mode mode = type_natural_mode (type, NULL);
6405 if (ix86_function_type_abi (fntype) == MS_ABI)
6406 return return_in_memory_ms_64 (type, mode);
6408 return return_in_memory_64 (type, mode);
6411 return return_in_memory_32 (type, mode);
6415 /* Return false iff TYPE is returned in memory. This version is used
6416 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6417 but differs notably in that when MMX is available, 8-byte vectors
6418 are returned in memory, rather than in MMX registers. */
6421 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6424 enum machine_mode mode = type_natural_mode (type, NULL);
6427 return return_in_memory_64 (type, mode);
6429 if (mode == BLKmode)
6432 size = int_size_in_bytes (type);
6434 if (VECTOR_MODE_P (mode))
6436 /* Return in memory only if MMX registers *are* available. This
6437 seems backwards, but it is consistent with the existing
6444 else if (mode == TImode)
6446 else if (mode == XFmode)
6452 /* When returning SSE vector types, we have a choice of either
6453 (1) being abi incompatible with a -march switch, or
6454 (2) generating an error.
6455 Given no good solution, I think the safest thing is one warning.
6456 The user won't be able to use -Werror, but....
6458 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6459 called in response to actually generating a caller or callee that
6460 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6461 via aggregate_value_p for general type probing from tree-ssa. */
6464 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6466 static bool warnedsse, warnedmmx;
6468 if (!TARGET_64BIT && type)
6470 /* Look at the return type of the function, not the function type. */
6471 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6473 if (!TARGET_SSE && !warnedsse)
6476 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6479 warning (0, "SSE vector return without SSE enabled "
6484 if (!TARGET_MMX && !warnedmmx)
6486 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6489 warning (0, "MMX vector return without MMX enabled "
6499 /* Create the va_list data type. */
6501 /* Returns the calling convention specific va_list date type.
6502 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6505 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6507 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6509 /* For i386 we use plain pointer to argument area. */
6510 if (!TARGET_64BIT || abi == MS_ABI)
6511 return build_pointer_type (char_type_node);
6513 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6514 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6516 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6517 unsigned_type_node);
6518 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6519 unsigned_type_node);
6520 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6522 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6525 va_list_gpr_counter_field = f_gpr;
6526 va_list_fpr_counter_field = f_fpr;
6528 DECL_FIELD_CONTEXT (f_gpr) = record;
6529 DECL_FIELD_CONTEXT (f_fpr) = record;
6530 DECL_FIELD_CONTEXT (f_ovf) = record;
6531 DECL_FIELD_CONTEXT (f_sav) = record;
6533 TREE_CHAIN (record) = type_decl;
6534 TYPE_NAME (record) = type_decl;
6535 TYPE_FIELDS (record) = f_gpr;
6536 TREE_CHAIN (f_gpr) = f_fpr;
6537 TREE_CHAIN (f_fpr) = f_ovf;
6538 TREE_CHAIN (f_ovf) = f_sav;
6540 layout_type (record);
6542 /* The correct type is an array type of one element. */
6543 return build_array_type (record, build_index_type (size_zero_node));
6546 /* Setup the builtin va_list data type and for 64-bit the additional
6547 calling convention specific va_list data types. */
6550 ix86_build_builtin_va_list (void)
6552 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6554 /* Initialize abi specific va_list builtin types. */
6558 if (ix86_abi == MS_ABI)
6560 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6561 if (TREE_CODE (t) != RECORD_TYPE)
6562 t = build_variant_type_copy (t);
6563 sysv_va_list_type_node = t;
6568 if (TREE_CODE (t) != RECORD_TYPE)
6569 t = build_variant_type_copy (t);
6570 sysv_va_list_type_node = t;
6572 if (ix86_abi != MS_ABI)
6574 t = ix86_build_builtin_va_list_abi (MS_ABI);
6575 if (TREE_CODE (t) != RECORD_TYPE)
6576 t = build_variant_type_copy (t);
6577 ms_va_list_type_node = t;
6582 if (TREE_CODE (t) != RECORD_TYPE)
6583 t = build_variant_type_copy (t);
6584 ms_va_list_type_node = t;
6591 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6594 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6603 int regparm = ix86_regparm;
6605 if (cum->call_abi != ix86_abi)
6606 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6608 /* GPR size of varargs save area. */
6609 if (cfun->va_list_gpr_size)
6610 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6612 ix86_varargs_gpr_size = 0;
6614 /* FPR size of varargs save area. We don't need it if we don't pass
6615 anything in SSE registers. */
6616 if (cum->sse_nregs && cfun->va_list_fpr_size)
6617 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6619 ix86_varargs_fpr_size = 0;
6621 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6624 save_area = frame_pointer_rtx;
6625 set = get_varargs_alias_set ();
6627 for (i = cum->regno;
6629 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6632 mem = gen_rtx_MEM (Pmode,
6633 plus_constant (save_area, i * UNITS_PER_WORD));
6634 MEM_NOTRAP_P (mem) = 1;
6635 set_mem_alias_set (mem, set);
6636 emit_move_insn (mem, gen_rtx_REG (Pmode,
6637 x86_64_int_parameter_registers[i]));
6640 if (ix86_varargs_fpr_size)
6642 /* Now emit code to save SSE registers. The AX parameter contains number
6643 of SSE parameter registers used to call this function. We use
6644 sse_prologue_save insn template that produces computed jump across
6645 SSE saves. We need some preparation work to get this working. */
6647 label = gen_label_rtx ();
6648 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6650 /* Compute address to jump to :
6651 label - eax*4 + nnamed_sse_arguments*4 Or
6652 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6653 tmp_reg = gen_reg_rtx (Pmode);
6654 nsse_reg = gen_reg_rtx (Pmode);
6655 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6656 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6657 gen_rtx_MULT (Pmode, nsse_reg,
6660 /* vmovaps is one byte longer than movaps. */
6662 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6663 gen_rtx_PLUS (Pmode, tmp_reg,
6669 gen_rtx_CONST (DImode,
6670 gen_rtx_PLUS (DImode,
6672 GEN_INT (cum->sse_regno
6673 * (TARGET_AVX ? 5 : 4)))));
6675 emit_move_insn (nsse_reg, label_ref);
6676 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6678 /* Compute address of memory block we save into. We always use pointer
6679 pointing 127 bytes after first byte to store - this is needed to keep
6680 instruction size limited by 4 bytes (5 bytes for AVX) with one
6681 byte displacement. */
6682 tmp_reg = gen_reg_rtx (Pmode);
6683 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6684 plus_constant (save_area,
6685 ix86_varargs_gpr_size + 127)));
6686 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6687 MEM_NOTRAP_P (mem) = 1;
6688 set_mem_alias_set (mem, set);
6689 set_mem_align (mem, BITS_PER_WORD);
6691 /* And finally do the dirty job! */
6692 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6693 GEN_INT (cum->sse_regno), label));
6698 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6700 alias_set_type set = get_varargs_alias_set ();
6703 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6707 mem = gen_rtx_MEM (Pmode,
6708 plus_constant (virtual_incoming_args_rtx,
6709 i * UNITS_PER_WORD));
6710 MEM_NOTRAP_P (mem) = 1;
6711 set_mem_alias_set (mem, set);
6713 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6714 emit_move_insn (mem, reg);
6719 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6720 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6723 CUMULATIVE_ARGS next_cum;
6726 /* This argument doesn't appear to be used anymore. Which is good,
6727 because the old code here didn't suppress rtl generation. */
6728 gcc_assert (!no_rtl);
6733 fntype = TREE_TYPE (current_function_decl);
6735 /* For varargs, we do not want to skip the dummy va_dcl argument.
6736 For stdargs, we do want to skip the last named argument. */
6738 if (stdarg_p (fntype))
6739 function_arg_advance (&next_cum, mode, type, 1);
6741 if (cum->call_abi == MS_ABI)
6742 setup_incoming_varargs_ms_64 (&next_cum);
6744 setup_incoming_varargs_64 (&next_cum);
6747 /* Checks if TYPE is of kind va_list char *. */
6750 is_va_list_char_pointer (tree type)
6754 /* For 32-bit it is always true. */
6757 canonic = ix86_canonical_va_list_type (type);
6758 return (canonic == ms_va_list_type_node
6759 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6762 /* Implement va_start. */
6765 ix86_va_start (tree valist, rtx nextarg)
6767 HOST_WIDE_INT words, n_gpr, n_fpr;
6768 tree f_gpr, f_fpr, f_ovf, f_sav;
6769 tree gpr, fpr, ovf, sav, t;
6772 /* Only 64bit target needs something special. */
6773 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6775 std_expand_builtin_va_start (valist, nextarg);
6779 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6780 f_fpr = TREE_CHAIN (f_gpr);
6781 f_ovf = TREE_CHAIN (f_fpr);
6782 f_sav = TREE_CHAIN (f_ovf);
6784 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6785 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6786 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6787 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6788 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6790 /* Count number of gp and fp argument registers used. */
6791 words = crtl->args.info.words;
6792 n_gpr = crtl->args.info.regno;
6793 n_fpr = crtl->args.info.sse_regno;
6795 if (cfun->va_list_gpr_size)
6797 type = TREE_TYPE (gpr);
6798 t = build2 (MODIFY_EXPR, type,
6799 gpr, build_int_cst (type, n_gpr * 8));
6800 TREE_SIDE_EFFECTS (t) = 1;
6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6804 if (TARGET_SSE && cfun->va_list_fpr_size)
6806 type = TREE_TYPE (fpr);
6807 t = build2 (MODIFY_EXPR, type, fpr,
6808 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6809 TREE_SIDE_EFFECTS (t) = 1;
6810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6813 /* Find the overflow area. */
6814 type = TREE_TYPE (ovf);
6815 t = make_tree (type, crtl->args.internal_arg_pointer);
6817 t = build2 (POINTER_PLUS_EXPR, type, t,
6818 size_int (words * UNITS_PER_WORD));
6819 t = build2 (MODIFY_EXPR, type, ovf, t);
6820 TREE_SIDE_EFFECTS (t) = 1;
6821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6823 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6825 /* Find the register save area.
6826 Prologue of the function save it right above stack frame. */
6827 type = TREE_TYPE (sav);
6828 t = make_tree (type, frame_pointer_rtx);
6829 if (!ix86_varargs_gpr_size)
6830 t = build2 (POINTER_PLUS_EXPR, type, t,
6831 size_int (-8 * X86_64_REGPARM_MAX));
6832 t = build2 (MODIFY_EXPR, type, sav, t);
6833 TREE_SIDE_EFFECTS (t) = 1;
6834 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6838 /* Implement va_arg. */
6841 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6844 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6845 tree f_gpr, f_fpr, f_ovf, f_sav;
6846 tree gpr, fpr, ovf, sav, t;
6848 tree lab_false, lab_over = NULL_TREE;
6853 enum machine_mode nat_mode;
6856 /* Only 64bit target needs something special. */
6857 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6858 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6860 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6861 f_fpr = TREE_CHAIN (f_gpr);
6862 f_ovf = TREE_CHAIN (f_fpr);
6863 f_sav = TREE_CHAIN (f_ovf);
6865 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6866 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6867 valist = build_va_arg_indirect_ref (valist);
6868 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6869 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6870 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6872 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6874 type = build_pointer_type (type);
6875 size = int_size_in_bytes (type);
6876 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6878 nat_mode = type_natural_mode (type, NULL);
6887 /* Unnamed 256bit vector mode parameters are passed on stack. */
6888 if (ix86_cfun_abi () == SYSV_ABI)
6895 container = construct_container (nat_mode, TYPE_MODE (type),
6896 type, 0, X86_64_REGPARM_MAX,
6897 X86_64_SSE_REGPARM_MAX, intreg,
6902 /* Pull the value out of the saved registers. */
6904 addr = create_tmp_var (ptr_type_node, "addr");
6905 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6909 int needed_intregs, needed_sseregs;
6911 tree int_addr, sse_addr;
6913 lab_false = create_artificial_label ();
6914 lab_over = create_artificial_label ();
6916 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6918 need_temp = (!REG_P (container)
6919 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6920 || TYPE_ALIGN (type) > 128));
6922 /* In case we are passing structure, verify that it is consecutive block
6923 on the register save area. If not we need to do moves. */
6924 if (!need_temp && !REG_P (container))
6926 /* Verify that all registers are strictly consecutive */
6927 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6931 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6933 rtx slot = XVECEXP (container, 0, i);
6934 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6935 || INTVAL (XEXP (slot, 1)) != i * 16)
6943 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6945 rtx slot = XVECEXP (container, 0, i);
6946 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6947 || INTVAL (XEXP (slot, 1)) != i * 8)
6959 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6960 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6961 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6962 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6965 /* First ensure that we fit completely in registers. */
6968 t = build_int_cst (TREE_TYPE (gpr),
6969 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6970 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6973 gimplify_and_add (t, pre_p);
6977 t = build_int_cst (TREE_TYPE (fpr),
6978 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6979 + X86_64_REGPARM_MAX * 8);
6980 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6981 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6982 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6983 gimplify_and_add (t, pre_p);
6986 /* Compute index to start of area used for integer regs. */
6989 /* int_addr = gpr + sav; */
6990 t = fold_convert (sizetype, gpr);
6991 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6992 gimplify_assign (int_addr, t, pre_p);
6996 /* sse_addr = fpr + sav; */
6997 t = fold_convert (sizetype, fpr);
6998 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6999 gimplify_assign (sse_addr, t, pre_p);
7004 tree temp = create_tmp_var (type, "va_arg_tmp");
7007 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7008 gimplify_assign (addr, t, pre_p);
7010 for (i = 0; i < XVECLEN (container, 0); i++)
7012 rtx slot = XVECEXP (container, 0, i);
7013 rtx reg = XEXP (slot, 0);
7014 enum machine_mode mode = GET_MODE (reg);
7015 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7016 tree addr_type = build_pointer_type (piece_type);
7017 tree daddr_type = build_pointer_type_for_mode (piece_type,
7021 tree dest_addr, dest;
7023 if (SSE_REGNO_P (REGNO (reg)))
7025 src_addr = sse_addr;
7026 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7030 src_addr = int_addr;
7031 src_offset = REGNO (reg) * 8;
7033 src_addr = fold_convert (addr_type, src_addr);
7034 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7035 size_int (src_offset));
7036 src = build_va_arg_indirect_ref (src_addr);
7038 dest_addr = fold_convert (daddr_type, addr);
7039 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7040 size_int (INTVAL (XEXP (slot, 1))));
7041 dest = build_va_arg_indirect_ref (dest_addr);
7043 gimplify_assign (dest, src, pre_p);
7049 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7050 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7051 gimplify_assign (gpr, t, pre_p);
7056 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7057 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7058 gimplify_assign (fpr, t, pre_p);
7061 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7063 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7066 /* ... otherwise out of the overflow area. */
7068 /* When we align parameter on stack for caller, if the parameter
7069 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7070 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7071 here with caller. */
7072 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7073 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7074 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7076 /* Care for on-stack alignment if needed. */
7077 if (arg_boundary <= 64
7078 || integer_zerop (TYPE_SIZE (type)))
7082 HOST_WIDE_INT align = arg_boundary / 8;
7083 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7084 size_int (align - 1));
7085 t = fold_convert (sizetype, t);
7086 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7088 t = fold_convert (TREE_TYPE (ovf), t);
7090 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7091 gimplify_assign (addr, t, pre_p);
7093 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7094 size_int (rsize * UNITS_PER_WORD));
7095 gimplify_assign (unshare_expr (ovf), t, pre_p);
7098 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7100 ptrtype = build_pointer_type (type);
7101 addr = fold_convert (ptrtype, addr);
7104 addr = build_va_arg_indirect_ref (addr);
7105 return build_va_arg_indirect_ref (addr);
7108 /* Return nonzero if OPNUM's MEM should be matched
7109 in movabs* patterns. */
7112 ix86_check_movabs (rtx insn, int opnum)
7116 set = PATTERN (insn);
7117 if (GET_CODE (set) == PARALLEL)
7118 set = XVECEXP (set, 0, 0);
7119 gcc_assert (GET_CODE (set) == SET);
7120 mem = XEXP (set, opnum);
7121 while (GET_CODE (mem) == SUBREG)
7122 mem = SUBREG_REG (mem);
7123 gcc_assert (MEM_P (mem));
7124 return (volatile_ok || !MEM_VOLATILE_P (mem));
7127 /* Initialize the table of extra 80387 mathematical constants. */
7130 init_ext_80387_constants (void)
7132 static const char * cst[5] =
7134 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7135 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7136 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7137 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7138 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7142 for (i = 0; i < 5; i++)
7144 real_from_string (&ext_80387_constants_table[i], cst[i]);
7145 /* Ensure each constant is rounded to XFmode precision. */
7146 real_convert (&ext_80387_constants_table[i],
7147 XFmode, &ext_80387_constants_table[i]);
7150 ext_80387_constants_init = 1;
7153 /* Return true if the constant is something that can be loaded with
7154 a special instruction. */
7157 standard_80387_constant_p (rtx x)
7159 enum machine_mode mode = GET_MODE (x);
7163 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7166 if (x == CONST0_RTX (mode))
7168 if (x == CONST1_RTX (mode))
7171 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7173 /* For XFmode constants, try to find a special 80387 instruction when
7174 optimizing for size or on those CPUs that benefit from them. */
7176 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7180 if (! ext_80387_constants_init)
7181 init_ext_80387_constants ();
7183 for (i = 0; i < 5; i++)
7184 if (real_identical (&r, &ext_80387_constants_table[i]))
7188 /* Load of the constant -0.0 or -1.0 will be split as
7189 fldz;fchs or fld1;fchs sequence. */
7190 if (real_isnegzero (&r))
7192 if (real_identical (&r, &dconstm1))
7198 /* Return the opcode of the special instruction to be used to load
7202 standard_80387_constant_opcode (rtx x)
7204 switch (standard_80387_constant_p (x))
7228 /* Return the CONST_DOUBLE representing the 80387 constant that is
7229 loaded by the specified special instruction. The argument IDX
7230 matches the return value from standard_80387_constant_p. */
7233 standard_80387_constant_rtx (int idx)
7237 if (! ext_80387_constants_init)
7238 init_ext_80387_constants ();
7254 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7258 /* Return 1 if mode is a valid mode for sse. */
7260 standard_sse_mode_p (enum machine_mode mode)
7277 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7278 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7279 modes and AVX is enabled. */
7282 standard_sse_constant_p (rtx x)
7284 enum machine_mode mode = GET_MODE (x);
7286 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7288 if (vector_all_ones_operand (x, mode))
7290 if (standard_sse_mode_p (mode))
7291 return TARGET_SSE2 ? 2 : -2;
7292 else if (VALID_AVX256_REG_MODE (mode))
7293 return TARGET_AVX ? 3 : -3;
7299 /* Return the opcode of the special instruction to be used to load
7303 standard_sse_constant_opcode (rtx insn, rtx x)
7305 switch (standard_sse_constant_p (x))
7308 switch (get_attr_mode (insn))
7311 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7313 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7315 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7317 return "vxorps\t%x0, %x0, %x0";
7319 return "vxorpd\t%x0, %x0, %x0";
7321 return "vpxor\t%x0, %x0, %x0";
7327 switch (get_attr_mode (insn))
7332 return "vpcmpeqd\t%0, %0, %0";
7338 return "pcmpeqd\t%0, %0";
7343 /* Returns 1 if OP contains a symbol reference */
7346 symbolic_reference_mentioned_p (rtx op)
7351 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7354 fmt = GET_RTX_FORMAT (GET_CODE (op));
7355 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7361 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7362 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7366 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7373 /* Return 1 if it is appropriate to emit `ret' instructions in the
7374 body of a function. Do this only if the epilogue is simple, needing a
7375 couple of insns. Prior to reloading, we can't tell how many registers
7376 must be saved, so return 0 then. Return 0 if there is no frame
7377 marker to de-allocate. */
7380 ix86_can_use_return_insn_p (void)
7382 struct ix86_frame frame;
7384 if (! reload_completed || frame_pointer_needed)
7387 /* Don't allow more than 32 pop, since that's all we can do
7388 with one instruction. */
7389 if (crtl->args.pops_args
7390 && crtl->args.size >= 32768)
7393 ix86_compute_frame_layout (&frame);
7394 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7397 /* Value should be nonzero if functions must have frame pointers.
7398 Zero means the frame pointer need not be set up (and parms may
7399 be accessed via the stack pointer) in functions that seem suitable. */
7402 ix86_frame_pointer_required (void)
7404 /* If we accessed previous frames, then the generated code expects
7405 to be able to access the saved ebp value in our frame. */
7406 if (cfun->machine->accesses_prev_frame)
7409 /* Several x86 os'es need a frame pointer for other reasons,
7410 usually pertaining to setjmp. */
7411 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7414 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7415 the frame pointer by default. Turn it back on now if we've not
7416 got a leaf function. */
7417 if (TARGET_OMIT_LEAF_FRAME_POINTER
7418 && (!current_function_is_leaf
7419 || ix86_current_function_calls_tls_descriptor))
7428 /* Record that the current function accesses previous call frames. */
7431 ix86_setup_frame_addresses (void)
7433 cfun->machine->accesses_prev_frame = 1;
7436 #ifndef USE_HIDDEN_LINKONCE
7437 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7438 # define USE_HIDDEN_LINKONCE 1
7440 # define USE_HIDDEN_LINKONCE 0
7444 static int pic_labels_used;
7446 /* Fills in the label name that should be used for a pc thunk for
7447 the given register. */
7450 get_pc_thunk_name (char name[32], unsigned int regno)
7452 gcc_assert (!TARGET_64BIT);
7454 if (USE_HIDDEN_LINKONCE)
7455 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7457 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7461 /* This function generates code for -fpic that loads %ebx with
7462 the return address of the caller and then returns. */
7465 ix86_file_end (void)
7470 for (regno = 0; regno < 8; ++regno)
7474 if (! ((pic_labels_used >> regno) & 1))
7477 get_pc_thunk_name (name, regno);
7482 switch_to_section (darwin_sections[text_coal_section]);
7483 fputs ("\t.weak_definition\t", asm_out_file);
7484 assemble_name (asm_out_file, name);
7485 fputs ("\n\t.private_extern\t", asm_out_file);
7486 assemble_name (asm_out_file, name);
7487 fputs ("\n", asm_out_file);
7488 ASM_OUTPUT_LABEL (asm_out_file, name);
7492 if (USE_HIDDEN_LINKONCE)
7496 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7498 TREE_PUBLIC (decl) = 1;
7499 TREE_STATIC (decl) = 1;
7500 DECL_ONE_ONLY (decl) = 1;
7502 (*targetm.asm_out.unique_section) (decl, 0);
7503 switch_to_section (get_named_section (decl, NULL, 0));
7505 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7506 fputs ("\t.hidden\t", asm_out_file);
7507 assemble_name (asm_out_file, name);
7508 fputc ('\n', asm_out_file);
7509 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7513 switch_to_section (text_section);
7514 ASM_OUTPUT_LABEL (asm_out_file, name);
7517 xops[0] = gen_rtx_REG (Pmode, regno);
7518 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7519 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7520 output_asm_insn ("ret", xops);
7523 if (NEED_INDICATE_EXEC_STACK)
7524 file_end_indicate_exec_stack ();
7527 /* Emit code for the SET_GOT patterns. */
7530 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7536 if (TARGET_VXWORKS_RTP && flag_pic)
7538 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7539 xops[2] = gen_rtx_MEM (Pmode,
7540 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7541 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7543 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7544 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7545 an unadorned address. */
7546 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7547 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7548 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7552 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7554 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7556 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7559 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7561 output_asm_insn ("call\t%a2", xops);
7564 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7565 is what will be referenced by the Mach-O PIC subsystem. */
7567 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7570 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7571 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7574 output_asm_insn ("pop%z0\t%0", xops);
7579 get_pc_thunk_name (name, REGNO (dest));
7580 pic_labels_used |= 1 << REGNO (dest);
7582 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7583 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7584 output_asm_insn ("call\t%X2", xops);
7585 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7586 is what will be referenced by the Mach-O PIC subsystem. */
7589 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7591 targetm.asm_out.internal_label (asm_out_file, "L",
7592 CODE_LABEL_NUMBER (label));
7599 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7600 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7602 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7607 /* Generate an "push" pattern for input ARG. */
7612 if (ix86_cfa_state->reg == stack_pointer_rtx)
7613 ix86_cfa_state->offset += UNITS_PER_WORD;
7615 return gen_rtx_SET (VOIDmode,
7617 gen_rtx_PRE_DEC (Pmode,
7618 stack_pointer_rtx)),
7622 /* Return >= 0 if there is an unused call-clobbered register available
7623 for the entire function. */
7626 ix86_select_alt_pic_regnum (void)
7628 if (current_function_is_leaf && !crtl->profile
7629 && !ix86_current_function_calls_tls_descriptor)
7632 /* Can't use the same register for both PIC and DRAP. */
7634 drap = REGNO (crtl->drap_reg);
7637 for (i = 2; i >= 0; --i)
7638 if (i != drap && !df_regs_ever_live_p (i))
7642 return INVALID_REGNUM;
7645 /* Return 1 if we need to save REGNO. */
7647 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7649 if (pic_offset_table_rtx
7650 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7651 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7653 || crtl->calls_eh_return
7654 || crtl->uses_const_pool))
7656 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7661 if (crtl->calls_eh_return && maybe_eh_return)
7666 unsigned test = EH_RETURN_DATA_REGNO (i);
7667 if (test == INVALID_REGNUM)
7674 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7677 return (df_regs_ever_live_p (regno)
7678 && !call_used_regs[regno]
7679 && !fixed_regs[regno]
7680 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7683 /* Return number of saved general prupose registers. */
7686 ix86_nsaved_regs (void)
7691 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7692 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7697 /* Return number of saved SSE registrers. */
7700 ix86_nsaved_sseregs (void)
7705 if (ix86_cfun_abi () != MS_ABI)
7707 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7708 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7713 /* Given FROM and TO register numbers, say whether this elimination is
7714 allowed. If stack alignment is needed, we can only replace argument
7715 pointer with hard frame pointer, or replace frame pointer with stack
7716 pointer. Otherwise, frame pointer elimination is automatically
7717 handled and all other eliminations are valid. */
7720 ix86_can_eliminate (int from, int to)
7722 if (stack_realign_fp)
7723 return ((from == ARG_POINTER_REGNUM
7724 && to == HARD_FRAME_POINTER_REGNUM)
7725 || (from == FRAME_POINTER_REGNUM
7726 && to == STACK_POINTER_REGNUM));
7728 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7731 /* Return the offset between two registers, one to be eliminated, and the other
7732 its replacement, at the start of a routine. */
7735 ix86_initial_elimination_offset (int from, int to)
7737 struct ix86_frame frame;
7738 ix86_compute_frame_layout (&frame);
7740 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7741 return frame.hard_frame_pointer_offset;
7742 else if (from == FRAME_POINTER_REGNUM
7743 && to == HARD_FRAME_POINTER_REGNUM)
7744 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7747 gcc_assert (to == STACK_POINTER_REGNUM);
7749 if (from == ARG_POINTER_REGNUM)
7750 return frame.stack_pointer_offset;
7752 gcc_assert (from == FRAME_POINTER_REGNUM);
7753 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7757 /* In a dynamically-aligned function, we can't know the offset from
7758 stack pointer to frame pointer, so we must ensure that setjmp
7759 eliminates fp against the hard fp (%ebp) rather than trying to
7760 index from %esp up to the top of the frame across a gap that is
7761 of unknown (at compile-time) size. */
7763 ix86_builtin_setjmp_frame_value (void)
7765 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7768 /* Fill structure ix86_frame about frame of currently computed function. */
7771 ix86_compute_frame_layout (struct ix86_frame *frame)
7773 HOST_WIDE_INT total_size;
7774 unsigned int stack_alignment_needed;
7775 HOST_WIDE_INT offset;
7776 unsigned int preferred_alignment;
7777 HOST_WIDE_INT size = get_frame_size ();
7779 frame->nregs = ix86_nsaved_regs ();
7780 frame->nsseregs = ix86_nsaved_sseregs ();
7783 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7784 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7786 /* MS ABI seem to require stack alignment to be always 16 except for function
7788 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7790 preferred_alignment = 16;
7791 stack_alignment_needed = 16;
7792 crtl->preferred_stack_boundary = 128;
7793 crtl->stack_alignment_needed = 128;
7796 gcc_assert (!size || stack_alignment_needed);
7797 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7798 gcc_assert (preferred_alignment <= stack_alignment_needed);
7800 /* During reload iteration the amount of registers saved can change.
7801 Recompute the value as needed. Do not recompute when amount of registers
7802 didn't change as reload does multiple calls to the function and does not
7803 expect the decision to change within single iteration. */
7804 if (!optimize_function_for_size_p (cfun)
7805 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7807 int count = frame->nregs;
7809 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7810 /* The fast prologue uses move instead of push to save registers. This
7811 is significantly longer, but also executes faster as modern hardware
7812 can execute the moves in parallel, but can't do that for push/pop.
7814 Be careful about choosing what prologue to emit: When function takes
7815 many instructions to execute we may use slow version as well as in
7816 case function is known to be outside hot spot (this is known with
7817 feedback only). Weight the size of function by number of registers
7818 to save as it is cheap to use one or two push instructions but very
7819 slow to use many of them. */
7821 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7822 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7823 || (flag_branch_probabilities
7824 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7825 cfun->machine->use_fast_prologue_epilogue = false;
7827 cfun->machine->use_fast_prologue_epilogue
7828 = !expensive_function_p (count);
7830 if (TARGET_PROLOGUE_USING_MOVE
7831 && cfun->machine->use_fast_prologue_epilogue)
7832 frame->save_regs_using_mov = true;
7834 frame->save_regs_using_mov = false;
7837 /* Skip return address and saved base pointer. */
7838 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7840 frame->hard_frame_pointer_offset = offset;
7842 /* Set offset to aligned because the realigned frame starts from
7844 if (stack_realign_fp)
7845 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7847 /* Register save area */
7848 offset += frame->nregs * UNITS_PER_WORD;
7850 /* Align SSE reg save area. */
7851 if (frame->nsseregs)
7852 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7854 frame->padding0 = 0;
7856 /* SSE register save area. */
7857 offset += frame->padding0 + frame->nsseregs * 16;
7860 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7861 offset += frame->va_arg_size;
7863 /* Align start of frame for local function. */
7864 frame->padding1 = ((offset + stack_alignment_needed - 1)
7865 & -stack_alignment_needed) - offset;
7867 offset += frame->padding1;
7869 /* Frame pointer points here. */
7870 frame->frame_pointer_offset = offset;
7874 /* Add outgoing arguments area. Can be skipped if we eliminated
7875 all the function calls as dead code.
7876 Skipping is however impossible when function calls alloca. Alloca
7877 expander assumes that last crtl->outgoing_args_size
7878 of stack frame are unused. */
7879 if (ACCUMULATE_OUTGOING_ARGS
7880 && (!current_function_is_leaf || cfun->calls_alloca
7881 || ix86_current_function_calls_tls_descriptor))
7883 offset += crtl->outgoing_args_size;
7884 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7887 frame->outgoing_arguments_size = 0;
7889 /* Align stack boundary. Only needed if we're calling another function
7891 if (!current_function_is_leaf || cfun->calls_alloca
7892 || ix86_current_function_calls_tls_descriptor)
7893 frame->padding2 = ((offset + preferred_alignment - 1)
7894 & -preferred_alignment) - offset;
7896 frame->padding2 = 0;
7898 offset += frame->padding2;
7900 /* We've reached end of stack frame. */
7901 frame->stack_pointer_offset = offset;
7903 /* Size prologue needs to allocate. */
7904 frame->to_allocate =
7905 (size + frame->padding1 + frame->padding2
7906 + frame->outgoing_arguments_size + frame->va_arg_size);
7908 if ((!frame->to_allocate && frame->nregs <= 1)
7909 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7910 frame->save_regs_using_mov = false;
7912 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7913 && current_function_is_leaf
7914 && !ix86_current_function_calls_tls_descriptor)
7916 frame->red_zone_size = frame->to_allocate;
7917 if (frame->save_regs_using_mov)
7918 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7919 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7920 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7923 frame->red_zone_size = 0;
7924 frame->to_allocate -= frame->red_zone_size;
7925 frame->stack_pointer_offset -= frame->red_zone_size;
7927 fprintf (stderr, "\n");
7928 fprintf (stderr, "size: %ld\n", (long)size);
7929 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7930 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7931 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7932 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7933 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7934 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7935 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7936 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7937 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7938 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7939 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7940 (long)frame->hard_frame_pointer_offset);
7941 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7942 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7943 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7944 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7948 /* Emit code to save registers in the prologue. */
7951 ix86_emit_save_regs (void)
7956 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7957 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7959 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7960 RTX_FRAME_RELATED_P (insn) = 1;
7964 /* Emit code to save registers using MOV insns. First register
7965 is restored from POINTER + OFFSET. */
7967 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7972 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7973 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7975 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7977 gen_rtx_REG (Pmode, regno));
7978 RTX_FRAME_RELATED_P (insn) = 1;
7979 offset += UNITS_PER_WORD;
7983 /* Emit code to save registers using MOV insns. First register
7984 is restored from POINTER + OFFSET. */
7986 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7992 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7993 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7995 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7996 set_mem_align (mem, 128);
7997 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7998 RTX_FRAME_RELATED_P (insn) = 1;
8003 /* Expand prologue or epilogue stack adjustment.
8004 The pattern exist to put a dependency on all ebp-based memory accesses.
8005 STYLE should be negative if instructions should be marked as frame related,
8006 zero if %r11 register is live and cannot be freely used and positive
8010 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8011 int style, bool set_cfa)
8016 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8017 else if (x86_64_immediate_operand (offset, DImode))
8018 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8022 /* r11 is used by indirect sibcall return as well, set before the
8023 epilogue and used after the epilogue. ATM indirect sibcall
8024 shouldn't be used together with huge frame sizes in one
8025 function because of the frame_size check in sibcall.c. */
8027 r11 = gen_rtx_REG (DImode, R11_REG);
8028 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8030 RTX_FRAME_RELATED_P (insn) = 1;
8031 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8039 gcc_assert (ix86_cfa_state->reg == src);
8040 ix86_cfa_state->offset += INTVAL (offset);
8041 ix86_cfa_state->reg = dest;
8043 r = gen_rtx_PLUS (Pmode, src, offset);
8044 r = gen_rtx_SET (VOIDmode, dest, r);
8045 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8046 RTX_FRAME_RELATED_P (insn) = 1;
8049 RTX_FRAME_RELATED_P (insn) = 1;
8052 /* Find an available register to be used as dynamic realign argument
8053 pointer regsiter. Such a register will be written in prologue and
8054 used in begin of body, so it must not be
8055 1. parameter passing register.
8057 We reuse static-chain register if it is available. Otherwise, we
8058 use DI for i386 and R13 for x86-64. We chose R13 since it has
8061 Return: the regno of chosen register. */
8064 find_drap_reg (void)
8066 tree decl = cfun->decl;
8070 /* Use R13 for nested function or function need static chain.
8071 Since function with tail call may use any caller-saved
8072 registers in epilogue, DRAP must not use caller-saved
8073 register in such case. */
8074 if ((decl_function_context (decl)
8075 && !DECL_NO_STATIC_CHAIN (decl))
8076 || crtl->tail_call_emit)
8083 /* Use DI for nested function or function need static chain.
8084 Since function with tail call may use any caller-saved
8085 registers in epilogue, DRAP must not use caller-saved
8086 register in such case. */
8087 if ((decl_function_context (decl)
8088 && !DECL_NO_STATIC_CHAIN (decl))
8089 || crtl->tail_call_emit)
8092 /* Reuse static chain register if it isn't used for parameter
8094 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8095 && !lookup_attribute ("fastcall",
8096 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8103 /* Update incoming stack boundary and estimated stack alignment. */
8106 ix86_update_stack_boundary (void)
8108 /* Prefer the one specified at command line. */
8109 ix86_incoming_stack_boundary
8110 = (ix86_user_incoming_stack_boundary
8111 ? ix86_user_incoming_stack_boundary
8112 : ix86_default_incoming_stack_boundary);
8114 /* Incoming stack alignment can be changed on individual functions
8115 via force_align_arg_pointer attribute. We use the smallest
8116 incoming stack boundary. */
8117 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8118 && lookup_attribute (ix86_force_align_arg_pointer_string,
8119 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8120 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8122 /* The incoming stack frame has to be aligned at least at
8123 parm_stack_boundary. */
8124 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8125 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8127 /* Stack at entrance of main is aligned by runtime. We use the
8128 smallest incoming stack boundary. */
8129 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8130 && DECL_NAME (current_function_decl)
8131 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8132 && DECL_FILE_SCOPE_P (current_function_decl))
8133 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8135 /* x86_64 vararg needs 16byte stack alignment for register save
8139 && crtl->stack_alignment_estimated < 128)
8140 crtl->stack_alignment_estimated = 128;
8143 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8144 needed or an rtx for DRAP otherwise. */
8147 ix86_get_drap_rtx (void)
8149 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8150 crtl->need_drap = true;
8152 if (stack_realign_drap)
8154 /* Assign DRAP to vDRAP and returns vDRAP */
8155 unsigned int regno = find_drap_reg ();
8160 arg_ptr = gen_rtx_REG (Pmode, regno);
8161 crtl->drap_reg = arg_ptr;
8164 drap_vreg = copy_to_reg (arg_ptr);
8168 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8169 RTX_FRAME_RELATED_P (insn) = 1;
8176 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8179 ix86_internal_arg_pointer (void)
8181 return virtual_incoming_args_rtx;
8184 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8185 to be generated in correct form. */
8187 ix86_finalize_stack_realign_flags (void)
8189 /* Check if stack realign is really needed after reload, and
8190 stores result in cfun */
8191 unsigned int incoming_stack_boundary
8192 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8193 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8194 unsigned int stack_realign = (incoming_stack_boundary
8195 < (current_function_is_leaf
8196 ? crtl->max_used_stack_slot_alignment
8197 : crtl->stack_alignment_needed));
8199 if (crtl->stack_realign_finalized)
8201 /* After stack_realign_needed is finalized, we can't no longer
8203 gcc_assert (crtl->stack_realign_needed == stack_realign);
8207 crtl->stack_realign_needed = stack_realign;
8208 crtl->stack_realign_finalized = true;
8212 /* Expand the prologue into a bunch of separate insns. */
8215 ix86_expand_prologue (void)
8219 struct ix86_frame frame;
8220 HOST_WIDE_INT allocate;
8222 ix86_finalize_stack_realign_flags ();
8224 /* DRAP should not coexist with stack_realign_fp */
8225 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8227 /* Initialize CFA state for before the prologue. */
8228 ix86_cfa_state->reg = stack_pointer_rtx;
8229 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8231 ix86_compute_frame_layout (&frame);
8233 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8234 of DRAP is needed and stack realignment is really needed after reload */
8235 if (crtl->drap_reg && crtl->stack_realign_needed)
8238 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8239 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8240 ? 0 : UNITS_PER_WORD);
8242 gcc_assert (stack_realign_drap);
8244 /* Grab the argument pointer. */
8245 x = plus_constant (stack_pointer_rtx,
8246 (UNITS_PER_WORD + param_ptr_offset));
8249 /* Only need to push parameter pointer reg if it is caller
8251 if (!call_used_regs[REGNO (crtl->drap_reg)])
8253 /* Push arg pointer reg */
8254 insn = emit_insn (gen_push (y));
8255 RTX_FRAME_RELATED_P (insn) = 1;
8258 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8259 RTX_FRAME_RELATED_P (insn) = 1;
8260 ix86_cfa_state->reg = crtl->drap_reg;
8262 /* Align the stack. */
8263 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8265 GEN_INT (-align_bytes)));
8266 RTX_FRAME_RELATED_P (insn) = 1;
8268 /* Replicate the return address on the stack so that return
8269 address can be reached via (argp - 1) slot. This is needed
8270 to implement macro RETURN_ADDR_RTX and intrinsic function
8271 expand_builtin_return_addr etc. */
8273 x = gen_frame_mem (Pmode,
8274 plus_constant (x, -UNITS_PER_WORD));
8275 insn = emit_insn (gen_push (x));
8276 RTX_FRAME_RELATED_P (insn) = 1;
8279 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8280 slower on all targets. Also sdb doesn't like it. */
8282 if (frame_pointer_needed)
8284 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8285 RTX_FRAME_RELATED_P (insn) = 1;
8287 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8288 RTX_FRAME_RELATED_P (insn) = 1;
8290 if (ix86_cfa_state->reg == stack_pointer_rtx)
8291 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8294 if (stack_realign_fp)
8296 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8297 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8299 /* Align the stack. */
8300 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8302 GEN_INT (-align_bytes)));
8303 RTX_FRAME_RELATED_P (insn) = 1;
8306 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8308 if (!frame.save_regs_using_mov)
8309 ix86_emit_save_regs ();
8311 allocate += frame.nregs * UNITS_PER_WORD;
8313 /* When using red zone we may start register saving before allocating
8314 the stack frame saving one cycle of the prologue. However I will
8315 avoid doing this if I am going to have to probe the stack since
8316 at least on x86_64 the stack probe can turn into a call that clobbers
8317 a red zone location */
8318 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8319 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8320 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8321 && !crtl->stack_realign_needed)
8322 ? hard_frame_pointer_rtx
8323 : stack_pointer_rtx,
8324 -frame.nregs * UNITS_PER_WORD);
8328 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8329 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8330 GEN_INT (-allocate), -1,
8331 ix86_cfa_state->reg == stack_pointer_rtx);
8334 /* Only valid for Win32. */
8335 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8339 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8341 if (cfun->machine->call_abi == MS_ABI)
8344 eax_live = ix86_eax_live_at_start_p ();
8348 emit_insn (gen_push (eax));
8349 allocate -= UNITS_PER_WORD;
8352 emit_move_insn (eax, GEN_INT (allocate));
8355 insn = gen_allocate_stack_worker_64 (eax, eax);
8357 insn = gen_allocate_stack_worker_32 (eax, eax);
8358 insn = emit_insn (insn);
8360 if (ix86_cfa_state->reg == stack_pointer_rtx)
8362 ix86_cfa_state->offset += allocate;
8363 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8364 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8365 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8366 RTX_FRAME_RELATED_P (insn) = 1;
8371 if (frame_pointer_needed)
8372 t = plus_constant (hard_frame_pointer_rtx,
8375 - frame.nregs * UNITS_PER_WORD);
8377 t = plus_constant (stack_pointer_rtx, allocate);
8378 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8382 if (frame.save_regs_using_mov
8383 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8384 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8386 if (!frame_pointer_needed
8387 || !frame.to_allocate
8388 || crtl->stack_realign_needed)
8389 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8391 + frame.nsseregs * 16 + frame.padding0);
8393 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8394 -frame.nregs * UNITS_PER_WORD);
8396 if (!frame_pointer_needed
8397 || !frame.to_allocate
8398 || crtl->stack_realign_needed)
8399 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8402 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8403 - frame.nregs * UNITS_PER_WORD
8404 - frame.nsseregs * 16
8407 pic_reg_used = false;
8408 if (pic_offset_table_rtx
8409 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8412 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8414 if (alt_pic_reg_used != INVALID_REGNUM)
8415 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8417 pic_reg_used = true;
8424 if (ix86_cmodel == CM_LARGE_PIC)
8426 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8427 rtx label = gen_label_rtx ();
8429 LABEL_PRESERVE_P (label) = 1;
8430 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8431 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8432 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8433 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8434 pic_offset_table_rtx, tmp_reg));
8437 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8440 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8443 /* In the pic_reg_used case, make sure that the got load isn't deleted
8444 when mcount needs it. Blockage to avoid call movement across mcount
8445 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8447 if (crtl->profile && pic_reg_used)
8448 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8450 if (crtl->drap_reg && !crtl->stack_realign_needed)
8452 /* vDRAP is setup but after reload it turns out stack realign
8453 isn't necessary, here we will emit prologue to setup DRAP
8454 without stack realign adjustment */
8455 int drap_bp_offset = UNITS_PER_WORD * 2;
8456 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8457 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8460 /* Prevent instructions from being scheduled into register save push
8461 sequence when access to the redzone area is done through frame pointer.
8462 The offset betweeh the frame pointer and the stack pointer is calculated
8463 relative to the value of the stack pointer at the end of the function
8464 prologue, and moving instructions that access redzone area via frame
8465 pointer inside push sequence violates this assumption. */
8466 if (frame_pointer_needed && frame.red_zone_size)
8467 emit_insn (gen_memory_blockage ());
8469 /* Emit cld instruction if stringops are used in the function. */
8470 if (TARGET_CLD && ix86_current_function_needs_cld)
8471 emit_insn (gen_cld ());
8474 /* Emit code to restore REG using a POP insn. */
8477 ix86_emit_restore_reg_using_pop (rtx reg)
8479 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8481 if (ix86_cfa_state->reg == crtl->drap_reg
8482 && REGNO (reg) == REGNO (crtl->drap_reg))
8484 /* Previously we'd represented the CFA as an expression
8485 like *(%ebp - 8). We've just popped that value from
8486 the stack, which means we need to reset the CFA to
8487 the drap register. This will remain until we restore
8488 the stack pointer. */
8489 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8490 RTX_FRAME_RELATED_P (insn) = 1;
8494 if (ix86_cfa_state->reg == stack_pointer_rtx)
8496 ix86_cfa_state->offset -= UNITS_PER_WORD;
8497 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8498 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8501 /* When the frame pointer is the CFA, and we pop it, we are
8502 swapping back to the stack pointer as the CFA. This happens
8503 for stack frames that don't allocate other data, so we assume
8504 the stack pointer is now pointing at the return address, i.e.
8505 the function entry state, which makes the offset be 1 word. */
8506 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8507 && reg == hard_frame_pointer_rtx)
8509 ix86_cfa_state->reg = stack_pointer_rtx;
8510 ix86_cfa_state->offset = UNITS_PER_WORD;
8512 add_reg_note (insn, REG_CFA_DEF_CFA,
8513 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8514 GEN_INT (UNITS_PER_WORD)));
8517 add_reg_note (insn, REG_CFA_RESTORE, reg);
8518 RTX_FRAME_RELATED_P (insn) = 1;
8521 /* Emit code to restore saved registers using POP insns. */
8524 ix86_emit_restore_regs_using_pop (void)
8528 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8529 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8530 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
8533 /* Emit code and notes for the LEAVE instruction. */
8536 ix86_emit_leave (void)
8538 rtx insn = emit_insn (ix86_gen_leave ());
8540 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8542 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8543 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8544 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8545 RTX_FRAME_RELATED_P (insn) = 1;
8549 /* Emit code to restore saved registers using MOV insns. First register
8550 is restored from POINTER + OFFSET. */
8552 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8553 int maybe_eh_return)
8556 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8559 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8560 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8562 rtx reg = gen_rtx_REG (Pmode, regno);
8564 /* Ensure that adjust_address won't be forced to produce pointer
8565 out of range allowed by x86-64 instruction set. */
8566 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8570 r11 = gen_rtx_REG (DImode, R11_REG);
8571 emit_move_insn (r11, GEN_INT (offset));
8572 emit_insn (gen_adddi3 (r11, r11, pointer));
8573 base_address = gen_rtx_MEM (Pmode, r11);
8576 insn = emit_move_insn (reg,
8577 adjust_address (base_address, Pmode, offset));
8578 offset += UNITS_PER_WORD;
8580 if (ix86_cfa_state->reg == crtl->drap_reg
8581 && regno == REGNO (crtl->drap_reg))
8583 /* Previously we'd represented the CFA as an expression
8584 like *(%ebp - 8). We've just popped that value from
8585 the stack, which means we need to reset the CFA to
8586 the drap register. This will remain until we restore
8587 the stack pointer. */
8588 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8591 add_reg_note (insn, REG_CFA_RESTORE, reg);
8592 RTX_FRAME_RELATED_P (insn) = 1;
8596 /* Emit code to restore saved registers using MOV insns. First register
8597 is restored from POINTER + OFFSET. */
8599 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8600 int maybe_eh_return)
8603 rtx base_address = gen_rtx_MEM (TImode, pointer);
8606 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8607 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8609 rtx reg = gen_rtx_REG (TImode, regno);
8611 /* Ensure that adjust_address won't be forced to produce pointer
8612 out of range allowed by x86-64 instruction set. */
8613 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8617 r11 = gen_rtx_REG (DImode, R11_REG);
8618 emit_move_insn (r11, GEN_INT (offset));
8619 emit_insn (gen_adddi3 (r11, r11, pointer));
8620 base_address = gen_rtx_MEM (TImode, r11);
8623 mem = adjust_address (base_address, TImode, offset);
8624 set_mem_align (mem, 128);
8625 insn = emit_move_insn (reg, mem);
8628 add_reg_note (insn, REG_CFA_RESTORE, reg);
8629 RTX_FRAME_RELATED_P (insn) = 1;
8633 /* Restore function stack, frame, and registers. */
8636 ix86_expand_epilogue (int style)
8639 struct ix86_frame frame;
8640 HOST_WIDE_INT offset;
8641 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8644 ix86_finalize_stack_realign_flags ();
8646 /* When stack is realigned, SP must be valid. */
8647 sp_valid = (!frame_pointer_needed
8648 || current_function_sp_is_unchanging
8649 || stack_realign_fp);
8651 ix86_compute_frame_layout (&frame);
8653 /* See the comment about red zone and frame
8654 pointer usage in ix86_expand_prologue. */
8655 if (frame_pointer_needed && frame.red_zone_size)
8656 emit_insn (gen_memory_blockage ());
8658 /* Calculate start of saved registers relative to ebp. Special care
8659 must be taken for the normal return case of a function using
8660 eh_return: the eax and edx registers are marked as saved, but not
8661 restored along this path. */
8662 offset = frame.nregs;
8663 if (crtl->calls_eh_return && style != 2)
8665 offset *= -UNITS_PER_WORD;
8666 offset -= frame.nsseregs * 16 + frame.padding0;
8668 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8669 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8671 /* If we're only restoring one register and sp is not valid then
8672 using a move instruction to restore the register since it's
8673 less work than reloading sp and popping the register.
8675 The default code result in stack adjustment using add/lea instruction,
8676 while this code results in LEAVE instruction (or discrete equivalent),
8677 so it is profitable in some other cases as well. Especially when there
8678 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8679 and there is exactly one register to pop. This heuristic may need some
8680 tuning in future. */
8681 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8682 || (TARGET_EPILOGUE_USING_MOVE
8683 && cfun->machine->use_fast_prologue_epilogue
8684 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8685 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8686 && frame.to_allocate)
8687 || (frame_pointer_needed && TARGET_USE_LEAVE
8688 && cfun->machine->use_fast_prologue_epilogue
8689 && (frame.nregs + frame.nsseregs) == 1)
8690 || crtl->calls_eh_return)
8692 /* Restore registers. We can use ebp or esp to address the memory
8693 locations. If both are available, default to ebp, since offsets
8694 are known to be small. Only exception is esp pointing directly
8695 to the end of block of saved registers, where we may simplify
8698 If we are realigning stack with bp and sp, regs restore can't
8699 be addressed by bp. sp must be used instead. */
8701 if (!frame_pointer_needed
8702 || (sp_valid && !frame.to_allocate)
8703 || stack_realign_fp)
8705 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8706 frame.to_allocate, style == 2);
8707 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8709 + frame.nsseregs * 16
8710 + frame.padding0, style == 2);
8714 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8715 offset, style == 2);
8716 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8718 + frame.nsseregs * 16
8719 + frame.padding0, style == 2);
8722 /* eh_return epilogues need %ecx added to the stack pointer. */
8725 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8727 /* Stack align doesn't work with eh_return. */
8728 gcc_assert (!crtl->stack_realign_needed);
8730 if (frame_pointer_needed)
8732 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8733 tmp = plus_constant (tmp, UNITS_PER_WORD);
8734 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8736 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8737 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8739 /* Note that we use SA as a temporary CFA, as the return
8740 address is at the proper place relative to it. We
8741 pretend this happens at the FP restore insn because
8742 prior to this insn the FP would be stored at the wrong
8743 offset relative to SA, and after this insn we have no
8744 other reasonable register to use for the CFA. We don't
8745 bother resetting the CFA to the SP for the duration of
8747 add_reg_note (tmp, REG_CFA_DEF_CFA,
8748 plus_constant (sa, UNITS_PER_WORD));
8749 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8750 RTX_FRAME_RELATED_P (tmp) = 1;
8751 ix86_cfa_state->reg = sa;
8752 ix86_cfa_state->offset = UNITS_PER_WORD;
8754 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8755 const0_rtx, style, false);
8759 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8760 tmp = plus_constant (tmp, (frame.to_allocate
8761 + frame.nregs * UNITS_PER_WORD
8762 + frame.nsseregs * 16
8764 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8766 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8767 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8769 ix86_cfa_state->offset = UNITS_PER_WORD;
8770 add_reg_note (tmp, REG_CFA_DEF_CFA,
8771 plus_constant (stack_pointer_rtx,
8773 RTX_FRAME_RELATED_P (tmp) = 1;
8777 else if (!frame_pointer_needed)
8778 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8779 GEN_INT (frame.to_allocate
8780 + frame.nregs * UNITS_PER_WORD
8781 + frame.nsseregs * 16
8783 style, !using_drap);
8784 /* If not an i386, mov & pop is faster than "leave". */
8785 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8786 || !cfun->machine->use_fast_prologue_epilogue)
8790 pro_epilogue_adjust_stack (stack_pointer_rtx,
8791 hard_frame_pointer_rtx,
8792 const0_rtx, style, !using_drap);
8794 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
8799 /* First step is to deallocate the stack frame so that we can
8802 If we realign stack with frame pointer, then stack pointer
8803 won't be able to recover via lea $offset(%bp), %sp, because
8804 there is a padding area between bp and sp for realign.
8805 "add $to_allocate, %sp" must be used instead. */
8808 gcc_assert (frame_pointer_needed);
8809 gcc_assert (!stack_realign_fp);
8810 pro_epilogue_adjust_stack (stack_pointer_rtx,
8811 hard_frame_pointer_rtx,
8812 GEN_INT (offset), style, false);
8813 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8814 frame.to_allocate, style == 2);
8815 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8816 GEN_INT (frame.nsseregs * 16),
8819 else if (frame.to_allocate || frame.nsseregs)
8821 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8824 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8825 GEN_INT (frame.to_allocate
8826 + frame.nsseregs * 16
8827 + frame.padding0), style,
8828 !using_drap && !frame_pointer_needed);
8831 ix86_emit_restore_regs_using_pop ();
8833 if (frame_pointer_needed)
8835 /* Leave results in shorter dependency chains on CPUs that are
8836 able to grok it fast. */
8837 if (TARGET_USE_LEAVE)
8841 /* For stack realigned really happens, recover stack
8842 pointer to hard frame pointer is a must, if not using
8844 if (stack_realign_fp)
8845 pro_epilogue_adjust_stack (stack_pointer_rtx,
8846 hard_frame_pointer_rtx,
8847 const0_rtx, style, !using_drap);
8848 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
8855 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8856 ? 0 : UNITS_PER_WORD);
8859 gcc_assert (stack_realign_drap);
8861 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8863 GEN_INT (-(UNITS_PER_WORD
8864 + param_ptr_offset))));
8866 ix86_cfa_state->reg = stack_pointer_rtx;
8867 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
8869 add_reg_note (insn, REG_CFA_DEF_CFA,
8870 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
8871 GEN_INT (ix86_cfa_state->offset)));
8872 RTX_FRAME_RELATED_P (insn) = 1;
8874 if (param_ptr_offset)
8875 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
8878 /* Sibcall epilogues don't want a return instruction. */
8881 *ix86_cfa_state = cfa_state_save;
8885 if (crtl->args.pops_args && crtl->args.size)
8887 rtx popc = GEN_INT (crtl->args.pops_args);
8889 /* i386 can only pop 64K bytes. If asked to pop more, pop return
8890 address, do explicit add, and jump indirectly to the caller. */
8892 if (crtl->args.pops_args >= 65536)
8894 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8897 /* There is no "pascal" calling convention in any 64bit ABI. */
8898 gcc_assert (!TARGET_64BIT);
8900 insn = emit_insn (gen_popsi1 (ecx));
8901 ix86_cfa_state->offset -= UNITS_PER_WORD;
8903 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8904 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8905 add_reg_note (insn, REG_CFA_REGISTER,
8906 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
8907 RTX_FRAME_RELATED_P (insn) = 1;
8909 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8911 emit_jump_insn (gen_return_indirect_internal (ecx));
8914 emit_jump_insn (gen_return_pop_internal (popc));
8917 emit_jump_insn (gen_return_internal ());
8919 /* Restore the state back to the state from the prologue,
8920 so that it's correct for the next epilogue. */
8921 *ix86_cfa_state = cfa_state_save;
8924 /* Reset from the function's potential modifications. */
8927 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8928 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8930 if (pic_offset_table_rtx)
8931 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8933 /* Mach-O doesn't support labels at the end of objects, so if
8934 it looks like we might want one, insert a NOP. */
8936 rtx insn = get_last_insn ();
8939 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8940 insn = PREV_INSN (insn);
8944 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8945 fputs ("\tnop\n", file);
8951 /* Extract the parts of an RTL expression that is a valid memory address
8952 for an instruction. Return 0 if the structure of the address is
8953 grossly off. Return -1 if the address contains ASHIFT, so it is not
8954 strictly valid, but still used for computing length of lea instruction. */
8957 ix86_decompose_address (rtx addr, struct ix86_address *out)
8959 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8960 rtx base_reg, index_reg;
8961 HOST_WIDE_INT scale = 1;
8962 rtx scale_rtx = NULL_RTX;
8964 enum ix86_address_seg seg = SEG_DEFAULT;
8966 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8968 else if (GET_CODE (addr) == PLUS)
8978 addends[n++] = XEXP (op, 1);
8981 while (GET_CODE (op) == PLUS);
8986 for (i = n; i >= 0; --i)
8989 switch (GET_CODE (op))
8994 index = XEXP (op, 0);
8995 scale_rtx = XEXP (op, 1);
8999 if (XINT (op, 1) == UNSPEC_TP
9000 && TARGET_TLS_DIRECT_SEG_REFS
9001 && seg == SEG_DEFAULT)
9002 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9031 else if (GET_CODE (addr) == MULT)
9033 index = XEXP (addr, 0); /* index*scale */
9034 scale_rtx = XEXP (addr, 1);
9036 else if (GET_CODE (addr) == ASHIFT)
9040 /* We're called for lea too, which implements ashift on occasion. */
9041 index = XEXP (addr, 0);
9042 tmp = XEXP (addr, 1);
9043 if (!CONST_INT_P (tmp))
9045 scale = INTVAL (tmp);
9046 if ((unsigned HOST_WIDE_INT) scale > 3)
9052 disp = addr; /* displacement */
9054 /* Extract the integral value of scale. */
9057 if (!CONST_INT_P (scale_rtx))
9059 scale = INTVAL (scale_rtx);
9062 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9063 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9065 /* Avoid useless 0 displacement. */
9066 if (disp == const0_rtx && (base || index))
9069 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9070 if (base_reg && index_reg && scale == 1
9071 && (index_reg == arg_pointer_rtx
9072 || index_reg == frame_pointer_rtx
9073 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9076 tmp = base, base = index, index = tmp;
9077 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9080 /* Special case: %ebp cannot be encoded as a base without a displacement.
9084 && (base_reg == hard_frame_pointer_rtx
9085 || base_reg == frame_pointer_rtx
9086 || base_reg == arg_pointer_rtx
9087 || (REG_P (base_reg)
9088 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9089 || REGNO (base_reg) == R13_REG))))
9092 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9093 Avoid this by transforming to [%esi+0].
9094 Reload calls address legitimization without cfun defined, so we need
9095 to test cfun for being non-NULL. */
9096 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9097 && base_reg && !index_reg && !disp
9099 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9102 /* Special case: encode reg+reg instead of reg*2. */
9103 if (!base && index && scale == 2)
9104 base = index, base_reg = index_reg, scale = 1;
9106 /* Special case: scaling cannot be encoded without base or displacement. */
9107 if (!base && !disp && index && scale != 1)
9119 /* Return cost of the memory address x.
9120 For i386, it is better to use a complex address than let gcc copy
9121 the address into a reg and make a new pseudo. But not if the address
9122 requires to two regs - that would mean more pseudos with longer
9125 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9127 struct ix86_address parts;
9129 int ok = ix86_decompose_address (x, &parts);
9133 if (parts.base && GET_CODE (parts.base) == SUBREG)
9134 parts.base = SUBREG_REG (parts.base);
9135 if (parts.index && GET_CODE (parts.index) == SUBREG)
9136 parts.index = SUBREG_REG (parts.index);
9138 /* Attempt to minimize number of registers in the address. */
9140 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9142 && (!REG_P (parts.index)
9143 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9147 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9149 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9150 && parts.base != parts.index)
9153 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9154 since it's predecode logic can't detect the length of instructions
9155 and it degenerates to vector decoded. Increase cost of such
9156 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9157 to split such addresses or even refuse such addresses at all.
9159 Following addressing modes are affected:
9164 The first and last case may be avoidable by explicitly coding the zero in
9165 memory address, but I don't have AMD-K6 machine handy to check this
9169 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9170 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9171 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9177 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9178 this is used for to form addresses to local data when -fPIC is in
9182 darwin_local_data_pic (rtx disp)
9184 return (GET_CODE (disp) == UNSPEC
9185 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9188 /* Determine if a given RTX is a valid constant. We already know this
9189 satisfies CONSTANT_P. */
9192 legitimate_constant_p (rtx x)
9194 switch (GET_CODE (x))
9199 if (GET_CODE (x) == PLUS)
9201 if (!CONST_INT_P (XEXP (x, 1)))
9206 if (TARGET_MACHO && darwin_local_data_pic (x))
9209 /* Only some unspecs are valid as "constants". */
9210 if (GET_CODE (x) == UNSPEC)
9211 switch (XINT (x, 1))
9216 return TARGET_64BIT;
9219 x = XVECEXP (x, 0, 0);
9220 return (GET_CODE (x) == SYMBOL_REF
9221 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9223 x = XVECEXP (x, 0, 0);
9224 return (GET_CODE (x) == SYMBOL_REF
9225 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9230 /* We must have drilled down to a symbol. */
9231 if (GET_CODE (x) == LABEL_REF)
9233 if (GET_CODE (x) != SYMBOL_REF)
9238 /* TLS symbols are never valid. */
9239 if (SYMBOL_REF_TLS_MODEL (x))
9242 /* DLLIMPORT symbols are never valid. */
9243 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9244 && SYMBOL_REF_DLLIMPORT_P (x))
9249 if (GET_MODE (x) == TImode
9250 && x != CONST0_RTX (TImode)
9256 if (!standard_sse_constant_p (x))
9263 /* Otherwise we handle everything else in the move patterns. */
9267 /* Determine if it's legal to put X into the constant pool. This
9268 is not possible for the address of thread-local symbols, which
9269 is checked above. */
9272 ix86_cannot_force_const_mem (rtx x)
9274 /* We can always put integral constants and vectors in memory. */
9275 switch (GET_CODE (x))
9285 return !legitimate_constant_p (x);
9289 /* Nonzero if the constant value X is a legitimate general operand
9290 when generating PIC code. It is given that flag_pic is on and
9291 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9294 legitimate_pic_operand_p (rtx x)
9298 switch (GET_CODE (x))
9301 inner = XEXP (x, 0);
9302 if (GET_CODE (inner) == PLUS
9303 && CONST_INT_P (XEXP (inner, 1)))
9304 inner = XEXP (inner, 0);
9306 /* Only some unspecs are valid as "constants". */
9307 if (GET_CODE (inner) == UNSPEC)
9308 switch (XINT (inner, 1))
9313 return TARGET_64BIT;
9315 x = XVECEXP (inner, 0, 0);
9316 return (GET_CODE (x) == SYMBOL_REF
9317 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9318 case UNSPEC_MACHOPIC_OFFSET:
9319 return legitimate_pic_address_disp_p (x);
9327 return legitimate_pic_address_disp_p (x);
9334 /* Determine if a given CONST RTX is a valid memory displacement
9338 legitimate_pic_address_disp_p (rtx disp)
9342 /* In 64bit mode we can allow direct addresses of symbols and labels
9343 when they are not dynamic symbols. */
9346 rtx op0 = disp, op1;
9348 switch (GET_CODE (disp))
9354 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9356 op0 = XEXP (XEXP (disp, 0), 0);
9357 op1 = XEXP (XEXP (disp, 0), 1);
9358 if (!CONST_INT_P (op1)
9359 || INTVAL (op1) >= 16*1024*1024
9360 || INTVAL (op1) < -16*1024*1024)
9362 if (GET_CODE (op0) == LABEL_REF)
9364 if (GET_CODE (op0) != SYMBOL_REF)
9369 /* TLS references should always be enclosed in UNSPEC. */
9370 if (SYMBOL_REF_TLS_MODEL (op0))
9372 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9373 && ix86_cmodel != CM_LARGE_PIC)
9381 if (GET_CODE (disp) != CONST)
9383 disp = XEXP (disp, 0);
9387 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9388 of GOT tables. We should not need these anyway. */
9389 if (GET_CODE (disp) != UNSPEC
9390 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9391 && XINT (disp, 1) != UNSPEC_GOTOFF
9392 && XINT (disp, 1) != UNSPEC_PLTOFF))
9395 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9396 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9402 if (GET_CODE (disp) == PLUS)
9404 if (!CONST_INT_P (XEXP (disp, 1)))
9406 disp = XEXP (disp, 0);
9410 if (TARGET_MACHO && darwin_local_data_pic (disp))
9413 if (GET_CODE (disp) != UNSPEC)
9416 switch (XINT (disp, 1))
9421 /* We need to check for both symbols and labels because VxWorks loads
9422 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9424 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9425 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9427 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9428 While ABI specify also 32bit relocation but we don't produce it in
9429 small PIC model at all. */
9430 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9431 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9433 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9435 case UNSPEC_GOTTPOFF:
9436 case UNSPEC_GOTNTPOFF:
9437 case UNSPEC_INDNTPOFF:
9440 disp = XVECEXP (disp, 0, 0);
9441 return (GET_CODE (disp) == SYMBOL_REF
9442 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9444 disp = XVECEXP (disp, 0, 0);
9445 return (GET_CODE (disp) == SYMBOL_REF
9446 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9448 disp = XVECEXP (disp, 0, 0);
9449 return (GET_CODE (disp) == SYMBOL_REF
9450 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9456 /* Recognizes RTL expressions that are valid memory addresses for an
9457 instruction. The MODE argument is the machine mode for the MEM
9458 expression that wants to use this address.
9460 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9461 convert common non-canonical forms to canonical form so that they will
9465 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9466 rtx addr, bool strict)
9468 struct ix86_address parts;
9469 rtx base, index, disp;
9470 HOST_WIDE_INT scale;
9471 const char *reason = NULL;
9472 rtx reason_rtx = NULL_RTX;
9474 if (ix86_decompose_address (addr, &parts) <= 0)
9476 reason = "decomposition failed";
9481 index = parts.index;
9483 scale = parts.scale;
9485 /* Validate base register.
9487 Don't allow SUBREG's that span more than a word here. It can lead to spill
9488 failures when the base is one word out of a two word structure, which is
9489 represented internally as a DImode int. */
9498 else if (GET_CODE (base) == SUBREG
9499 && REG_P (SUBREG_REG (base))
9500 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9502 reg = SUBREG_REG (base);
9505 reason = "base is not a register";
9509 if (GET_MODE (base) != Pmode)
9511 reason = "base is not in Pmode";
9515 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9516 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9518 reason = "base is not valid";
9523 /* Validate index register.
9525 Don't allow SUBREG's that span more than a word here -- same as above. */
9534 else if (GET_CODE (index) == SUBREG
9535 && REG_P (SUBREG_REG (index))
9536 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9538 reg = SUBREG_REG (index);
9541 reason = "index is not a register";
9545 if (GET_MODE (index) != Pmode)
9547 reason = "index is not in Pmode";
9551 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9552 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9554 reason = "index is not valid";
9559 /* Validate scale factor. */
9562 reason_rtx = GEN_INT (scale);
9565 reason = "scale without index";
9569 if (scale != 2 && scale != 4 && scale != 8)
9571 reason = "scale is not a valid multiplier";
9576 /* Validate displacement. */
9581 if (GET_CODE (disp) == CONST
9582 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9583 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9584 switch (XINT (XEXP (disp, 0), 1))
9586 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9587 used. While ABI specify also 32bit relocations, we don't produce
9588 them at all and use IP relative instead. */
9591 gcc_assert (flag_pic);
9593 goto is_legitimate_pic;
9594 reason = "64bit address unspec";
9597 case UNSPEC_GOTPCREL:
9598 gcc_assert (flag_pic);
9599 goto is_legitimate_pic;
9601 case UNSPEC_GOTTPOFF:
9602 case UNSPEC_GOTNTPOFF:
9603 case UNSPEC_INDNTPOFF:
9609 reason = "invalid address unspec";
9613 else if (SYMBOLIC_CONST (disp)
9617 && MACHOPIC_INDIRECT
9618 && !machopic_operand_p (disp)
9624 if (TARGET_64BIT && (index || base))
9626 /* foo@dtpoff(%rX) is ok. */
9627 if (GET_CODE (disp) != CONST
9628 || GET_CODE (XEXP (disp, 0)) != PLUS
9629 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9630 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9631 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9632 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9634 reason = "non-constant pic memory reference";
9638 else if (! legitimate_pic_address_disp_p (disp))
9640 reason = "displacement is an invalid pic construct";
9644 /* This code used to verify that a symbolic pic displacement
9645 includes the pic_offset_table_rtx register.
9647 While this is good idea, unfortunately these constructs may
9648 be created by "adds using lea" optimization for incorrect
9657 This code is nonsensical, but results in addressing
9658 GOT table with pic_offset_table_rtx base. We can't
9659 just refuse it easily, since it gets matched by
9660 "addsi3" pattern, that later gets split to lea in the
9661 case output register differs from input. While this
9662 can be handled by separate addsi pattern for this case
9663 that never results in lea, this seems to be easier and
9664 correct fix for crash to disable this test. */
9666 else if (GET_CODE (disp) != LABEL_REF
9667 && !CONST_INT_P (disp)
9668 && (GET_CODE (disp) != CONST
9669 || !legitimate_constant_p (disp))
9670 && (GET_CODE (disp) != SYMBOL_REF
9671 || !legitimate_constant_p (disp)))
9673 reason = "displacement is not constant";
9676 else if (TARGET_64BIT
9677 && !x86_64_immediate_operand (disp, VOIDmode))
9679 reason = "displacement is out of range";
9684 /* Everything looks valid. */
9691 /* Determine if a given RTX is a valid constant address. */
9694 constant_address_p (rtx x)
9696 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9699 /* Return a unique alias set for the GOT. */
9701 static alias_set_type
9702 ix86_GOT_alias_set (void)
9704 static alias_set_type set = -1;
9706 set = new_alias_set ();
9710 /* Return a legitimate reference for ORIG (an address) using the
9711 register REG. If REG is 0, a new pseudo is generated.
9713 There are two types of references that must be handled:
9715 1. Global data references must load the address from the GOT, via
9716 the PIC reg. An insn is emitted to do this load, and the reg is
9719 2. Static data references, constant pool addresses, and code labels
9720 compute the address as an offset from the GOT, whose base is in
9721 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9722 differentiate them from global data objects. The returned
9723 address is the PIC reg + an unspec constant.
9725 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9726 reg also appears in the address. */
9729 legitimize_pic_address (rtx orig, rtx reg)
9736 if (TARGET_MACHO && !TARGET_64BIT)
9739 reg = gen_reg_rtx (Pmode);
9740 /* Use the generic Mach-O PIC machinery. */
9741 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9745 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9747 else if (TARGET_64BIT
9748 && ix86_cmodel != CM_SMALL_PIC
9749 && gotoff_operand (addr, Pmode))
9752 /* This symbol may be referenced via a displacement from the PIC
9753 base address (@GOTOFF). */
9755 if (reload_in_progress)
9756 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9757 if (GET_CODE (addr) == CONST)
9758 addr = XEXP (addr, 0);
9759 if (GET_CODE (addr) == PLUS)
9761 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9763 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9766 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9767 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9769 tmpreg = gen_reg_rtx (Pmode);
9772 emit_move_insn (tmpreg, new_rtx);
9776 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9777 tmpreg, 1, OPTAB_DIRECT);
9780 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9782 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9784 /* This symbol may be referenced via a displacement from the PIC
9785 base address (@GOTOFF). */
9787 if (reload_in_progress)
9788 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9789 if (GET_CODE (addr) == CONST)
9790 addr = XEXP (addr, 0);
9791 if (GET_CODE (addr) == PLUS)
9793 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9795 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9798 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9799 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9800 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9804 emit_move_insn (reg, new_rtx);
9808 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9809 /* We can't use @GOTOFF for text labels on VxWorks;
9810 see gotoff_operand. */
9811 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9813 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9815 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9816 return legitimize_dllimport_symbol (addr, true);
9817 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9818 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9819 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9821 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9822 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9826 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9828 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9829 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9830 new_rtx = gen_const_mem (Pmode, new_rtx);
9831 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9834 reg = gen_reg_rtx (Pmode);
9835 /* Use directly gen_movsi, otherwise the address is loaded
9836 into register for CSE. We don't want to CSE this addresses,
9837 instead we CSE addresses from the GOT table, so skip this. */
9838 emit_insn (gen_movsi (reg, new_rtx));
9843 /* This symbol must be referenced via a load from the
9844 Global Offset Table (@GOT). */
9846 if (reload_in_progress)
9847 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9848 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9849 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9851 new_rtx = force_reg (Pmode, new_rtx);
9852 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9853 new_rtx = gen_const_mem (Pmode, new_rtx);
9854 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9857 reg = gen_reg_rtx (Pmode);
9858 emit_move_insn (reg, new_rtx);
9864 if (CONST_INT_P (addr)
9865 && !x86_64_immediate_operand (addr, VOIDmode))
9869 emit_move_insn (reg, addr);
9873 new_rtx = force_reg (Pmode, addr);
9875 else if (GET_CODE (addr) == CONST)
9877 addr = XEXP (addr, 0);
9879 /* We must match stuff we generate before. Assume the only
9880 unspecs that can get here are ours. Not that we could do
9881 anything with them anyway.... */
9882 if (GET_CODE (addr) == UNSPEC
9883 || (GET_CODE (addr) == PLUS
9884 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9886 gcc_assert (GET_CODE (addr) == PLUS);
9888 if (GET_CODE (addr) == PLUS)
9890 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9892 /* Check first to see if this is a constant offset from a @GOTOFF
9893 symbol reference. */
9894 if (gotoff_operand (op0, Pmode)
9895 && CONST_INT_P (op1))
9899 if (reload_in_progress)
9900 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9901 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9903 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9904 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9905 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9909 emit_move_insn (reg, new_rtx);
9915 if (INTVAL (op1) < -16*1024*1024
9916 || INTVAL (op1) >= 16*1024*1024)
9918 if (!x86_64_immediate_operand (op1, Pmode))
9919 op1 = force_reg (Pmode, op1);
9920 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9926 base = legitimize_pic_address (XEXP (addr, 0), reg);
9927 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9928 base == reg ? NULL_RTX : reg);
9930 if (CONST_INT_P (new_rtx))
9931 new_rtx = plus_constant (base, INTVAL (new_rtx));
9934 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9936 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9937 new_rtx = XEXP (new_rtx, 1);
9939 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9947 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9950 get_thread_pointer (int to_reg)
9954 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9958 reg = gen_reg_rtx (Pmode);
9959 insn = gen_rtx_SET (VOIDmode, reg, tp);
9960 insn = emit_insn (insn);
9965 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9966 false if we expect this to be used for a memory address and true if
9967 we expect to load the address into a register. */
9970 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9972 rtx dest, base, off, pic, tp;
9977 case TLS_MODEL_GLOBAL_DYNAMIC:
9978 dest = gen_reg_rtx (Pmode);
9979 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9981 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9983 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9986 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9987 insns = get_insns ();
9990 RTL_CONST_CALL_P (insns) = 1;
9991 emit_libcall_block (insns, dest, rax, x);
9993 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9994 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9996 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9998 if (TARGET_GNU2_TLS)
10000 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10002 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10006 case TLS_MODEL_LOCAL_DYNAMIC:
10007 base = gen_reg_rtx (Pmode);
10008 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10010 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10012 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10015 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10016 insns = get_insns ();
10019 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10020 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10021 RTL_CONST_CALL_P (insns) = 1;
10022 emit_libcall_block (insns, base, rax, note);
10024 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10025 emit_insn (gen_tls_local_dynamic_base_64 (base));
10027 emit_insn (gen_tls_local_dynamic_base_32 (base));
10029 if (TARGET_GNU2_TLS)
10031 rtx x = ix86_tls_module_base ();
10033 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10034 gen_rtx_MINUS (Pmode, x, tp));
10037 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10038 off = gen_rtx_CONST (Pmode, off);
10040 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10042 if (TARGET_GNU2_TLS)
10044 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10046 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10051 case TLS_MODEL_INITIAL_EXEC:
10055 type = UNSPEC_GOTNTPOFF;
10059 if (reload_in_progress)
10060 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10061 pic = pic_offset_table_rtx;
10062 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10064 else if (!TARGET_ANY_GNU_TLS)
10066 pic = gen_reg_rtx (Pmode);
10067 emit_insn (gen_set_got (pic));
10068 type = UNSPEC_GOTTPOFF;
10073 type = UNSPEC_INDNTPOFF;
10076 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10077 off = gen_rtx_CONST (Pmode, off);
10079 off = gen_rtx_PLUS (Pmode, pic, off);
10080 off = gen_const_mem (Pmode, off);
10081 set_mem_alias_set (off, ix86_GOT_alias_set ());
10083 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10085 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10086 off = force_reg (Pmode, off);
10087 return gen_rtx_PLUS (Pmode, base, off);
10091 base = get_thread_pointer (true);
10092 dest = gen_reg_rtx (Pmode);
10093 emit_insn (gen_subsi3 (dest, base, off));
10097 case TLS_MODEL_LOCAL_EXEC:
10098 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10099 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10100 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10101 off = gen_rtx_CONST (Pmode, off);
10103 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10105 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10106 return gen_rtx_PLUS (Pmode, base, off);
10110 base = get_thread_pointer (true);
10111 dest = gen_reg_rtx (Pmode);
10112 emit_insn (gen_subsi3 (dest, base, off));
10117 gcc_unreachable ();
10123 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10126 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10127 htab_t dllimport_map;
10130 get_dllimport_decl (tree decl)
10132 struct tree_map *h, in;
10135 const char *prefix;
10136 size_t namelen, prefixlen;
10141 if (!dllimport_map)
10142 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10144 in.hash = htab_hash_pointer (decl);
10145 in.base.from = decl;
10146 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10147 h = (struct tree_map *) *loc;
10151 *loc = h = GGC_NEW (struct tree_map);
10153 h->base.from = decl;
10154 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10155 DECL_ARTIFICIAL (to) = 1;
10156 DECL_IGNORED_P (to) = 1;
10157 DECL_EXTERNAL (to) = 1;
10158 TREE_READONLY (to) = 1;
10160 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10161 name = targetm.strip_name_encoding (name);
10162 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10163 ? "*__imp_" : "*__imp__";
10164 namelen = strlen (name);
10165 prefixlen = strlen (prefix);
10166 imp_name = (char *) alloca (namelen + prefixlen + 1);
10167 memcpy (imp_name, prefix, prefixlen);
10168 memcpy (imp_name + prefixlen, name, namelen + 1);
10170 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10171 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10172 SET_SYMBOL_REF_DECL (rtl, to);
10173 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10175 rtl = gen_const_mem (Pmode, rtl);
10176 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10178 SET_DECL_RTL (to, rtl);
10179 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10184 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10185 true if we require the result be a register. */
10188 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10193 gcc_assert (SYMBOL_REF_DECL (symbol));
10194 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10196 x = DECL_RTL (imp_decl);
10198 x = force_reg (Pmode, x);
10202 /* Try machine-dependent ways of modifying an illegitimate address
10203 to be legitimate. If we find one, return the new, valid address.
10204 This macro is used in only one place: `memory_address' in explow.c.
10206 OLDX is the address as it was before break_out_memory_refs was called.
10207 In some cases it is useful to look at this to decide what needs to be done.
10209 It is always safe for this macro to do nothing. It exists to recognize
10210 opportunities to optimize the output.
10212 For the 80386, we handle X+REG by loading X into a register R and
10213 using R+REG. R will go in a general reg and indexing will be used.
10214 However, if REG is a broken-out memory address or multiplication,
10215 nothing needs to be done because REG can certainly go in a general reg.
10217 When -fpic is used, special handling is needed for symbolic references.
10218 See comments by legitimize_pic_address in i386.c for details. */
10221 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10222 enum machine_mode mode)
10227 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10229 return legitimize_tls_address (x, (enum tls_model) log, false);
10230 if (GET_CODE (x) == CONST
10231 && GET_CODE (XEXP (x, 0)) == PLUS
10232 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10233 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10235 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10236 (enum tls_model) log, false);
10237 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10240 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10242 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10243 return legitimize_dllimport_symbol (x, true);
10244 if (GET_CODE (x) == CONST
10245 && GET_CODE (XEXP (x, 0)) == PLUS
10246 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10247 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10249 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10250 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10254 if (flag_pic && SYMBOLIC_CONST (x))
10255 return legitimize_pic_address (x, 0);
10257 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10258 if (GET_CODE (x) == ASHIFT
10259 && CONST_INT_P (XEXP (x, 1))
10260 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10263 log = INTVAL (XEXP (x, 1));
10264 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10265 GEN_INT (1 << log));
10268 if (GET_CODE (x) == PLUS)
10270 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10272 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10273 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10274 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10277 log = INTVAL (XEXP (XEXP (x, 0), 1));
10278 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10279 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10280 GEN_INT (1 << log));
10283 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10284 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10285 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10288 log = INTVAL (XEXP (XEXP (x, 1), 1));
10289 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10290 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10291 GEN_INT (1 << log));
10294 /* Put multiply first if it isn't already. */
10295 if (GET_CODE (XEXP (x, 1)) == MULT)
10297 rtx tmp = XEXP (x, 0);
10298 XEXP (x, 0) = XEXP (x, 1);
10303 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10304 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10305 created by virtual register instantiation, register elimination, and
10306 similar optimizations. */
10307 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10310 x = gen_rtx_PLUS (Pmode,
10311 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10312 XEXP (XEXP (x, 1), 0)),
10313 XEXP (XEXP (x, 1), 1));
10317 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10318 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10319 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10320 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10321 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10322 && CONSTANT_P (XEXP (x, 1)))
10325 rtx other = NULL_RTX;
10327 if (CONST_INT_P (XEXP (x, 1)))
10329 constant = XEXP (x, 1);
10330 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10332 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10334 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10335 other = XEXP (x, 1);
10343 x = gen_rtx_PLUS (Pmode,
10344 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10345 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10346 plus_constant (other, INTVAL (constant)));
10350 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10353 if (GET_CODE (XEXP (x, 0)) == MULT)
10356 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10359 if (GET_CODE (XEXP (x, 1)) == MULT)
10362 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10366 && REG_P (XEXP (x, 1))
10367 && REG_P (XEXP (x, 0)))
10370 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10373 x = legitimize_pic_address (x, 0);
10376 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10379 if (REG_P (XEXP (x, 0)))
10381 rtx temp = gen_reg_rtx (Pmode);
10382 rtx val = force_operand (XEXP (x, 1), temp);
10384 emit_move_insn (temp, val);
10386 XEXP (x, 1) = temp;
10390 else if (REG_P (XEXP (x, 1)))
10392 rtx temp = gen_reg_rtx (Pmode);
10393 rtx val = force_operand (XEXP (x, 0), temp);
10395 emit_move_insn (temp, val);
10397 XEXP (x, 0) = temp;
10405 /* Print an integer constant expression in assembler syntax. Addition
10406 and subtraction are the only arithmetic that may appear in these
10407 expressions. FILE is the stdio stream to write to, X is the rtx, and
10408 CODE is the operand print code from the output string. */
10411 output_pic_addr_const (FILE *file, rtx x, int code)
10415 switch (GET_CODE (x))
10418 gcc_assert (flag_pic);
10423 if (! TARGET_MACHO || TARGET_64BIT)
10424 output_addr_const (file, x);
10427 const char *name = XSTR (x, 0);
10429 /* Mark the decl as referenced so that cgraph will
10430 output the function. */
10431 if (SYMBOL_REF_DECL (x))
10432 mark_decl_referenced (SYMBOL_REF_DECL (x));
10435 if (MACHOPIC_INDIRECT
10436 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10437 name = machopic_indirection_name (x, /*stub_p=*/true);
10439 assemble_name (file, name);
10441 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10442 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10443 fputs ("@PLT", file);
10450 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10451 assemble_name (asm_out_file, buf);
10455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10459 /* This used to output parentheses around the expression,
10460 but that does not work on the 386 (either ATT or BSD assembler). */
10461 output_pic_addr_const (file, XEXP (x, 0), code);
10465 if (GET_MODE (x) == VOIDmode)
10467 /* We can use %d if the number is <32 bits and positive. */
10468 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10469 fprintf (file, "0x%lx%08lx",
10470 (unsigned long) CONST_DOUBLE_HIGH (x),
10471 (unsigned long) CONST_DOUBLE_LOW (x));
10473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10476 /* We can't handle floating point constants;
10477 PRINT_OPERAND must handle them. */
10478 output_operand_lossage ("floating constant misused");
10482 /* Some assemblers need integer constants to appear first. */
10483 if (CONST_INT_P (XEXP (x, 0)))
10485 output_pic_addr_const (file, XEXP (x, 0), code);
10487 output_pic_addr_const (file, XEXP (x, 1), code);
10491 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10492 output_pic_addr_const (file, XEXP (x, 1), code);
10494 output_pic_addr_const (file, XEXP (x, 0), code);
10500 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10501 output_pic_addr_const (file, XEXP (x, 0), code);
10503 output_pic_addr_const (file, XEXP (x, 1), code);
10505 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10509 gcc_assert (XVECLEN (x, 0) == 1);
10510 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10511 switch (XINT (x, 1))
10514 fputs ("@GOT", file);
10516 case UNSPEC_GOTOFF:
10517 fputs ("@GOTOFF", file);
10519 case UNSPEC_PLTOFF:
10520 fputs ("@PLTOFF", file);
10522 case UNSPEC_GOTPCREL:
10523 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10524 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10526 case UNSPEC_GOTTPOFF:
10527 /* FIXME: This might be @TPOFF in Sun ld too. */
10528 fputs ("@GOTTPOFF", file);
10531 fputs ("@TPOFF", file);
10533 case UNSPEC_NTPOFF:
10535 fputs ("@TPOFF", file);
10537 fputs ("@NTPOFF", file);
10539 case UNSPEC_DTPOFF:
10540 fputs ("@DTPOFF", file);
10542 case UNSPEC_GOTNTPOFF:
10544 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10545 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10547 fputs ("@GOTNTPOFF", file);
10549 case UNSPEC_INDNTPOFF:
10550 fputs ("@INDNTPOFF", file);
10553 case UNSPEC_MACHOPIC_OFFSET:
10555 machopic_output_function_base_name (file);
10559 output_operand_lossage ("invalid UNSPEC as operand");
10565 output_operand_lossage ("invalid expression as operand");
10569 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10570 We need to emit DTP-relative relocations. */
10572 static void ATTRIBUTE_UNUSED
10573 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10575 fputs (ASM_LONG, file);
10576 output_addr_const (file, x);
10577 fputs ("@DTPOFF", file);
10583 fputs (", 0", file);
10586 gcc_unreachable ();
10590 /* Return true if X is a representation of the PIC register. This copes
10591 with calls from ix86_find_base_term, where the register might have
10592 been replaced by a cselib value. */
10595 ix86_pic_register_p (rtx x)
10597 if (GET_CODE (x) == VALUE)
10598 return (pic_offset_table_rtx
10599 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10601 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10604 /* In the name of slightly smaller debug output, and to cater to
10605 general assembler lossage, recognize PIC+GOTOFF and turn it back
10606 into a direct symbol reference.
10608 On Darwin, this is necessary to avoid a crash, because Darwin
10609 has a different PIC label for each routine but the DWARF debugging
10610 information is not associated with any particular routine, so it's
10611 necessary to remove references to the PIC label from RTL stored by
10612 the DWARF output code. */
10615 ix86_delegitimize_address (rtx orig_x)
10618 /* reg_addend is NULL or a multiple of some register. */
10619 rtx reg_addend = NULL_RTX;
10620 /* const_addend is NULL or a const_int. */
10621 rtx const_addend = NULL_RTX;
10622 /* This is the result, or NULL. */
10623 rtx result = NULL_RTX;
10630 if (GET_CODE (x) != CONST
10631 || GET_CODE (XEXP (x, 0)) != UNSPEC
10632 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10633 || !MEM_P (orig_x))
10635 return XVECEXP (XEXP (x, 0), 0, 0);
10638 if (GET_CODE (x) != PLUS
10639 || GET_CODE (XEXP (x, 1)) != CONST)
10642 if (ix86_pic_register_p (XEXP (x, 0)))
10643 /* %ebx + GOT/GOTOFF */
10645 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10647 /* %ebx + %reg * scale + GOT/GOTOFF */
10648 reg_addend = XEXP (x, 0);
10649 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10650 reg_addend = XEXP (reg_addend, 1);
10651 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10652 reg_addend = XEXP (reg_addend, 0);
10655 if (!REG_P (reg_addend)
10656 && GET_CODE (reg_addend) != MULT
10657 && GET_CODE (reg_addend) != ASHIFT)
10663 x = XEXP (XEXP (x, 1), 0);
10664 if (GET_CODE (x) == PLUS
10665 && CONST_INT_P (XEXP (x, 1)))
10667 const_addend = XEXP (x, 1);
10671 if (GET_CODE (x) == UNSPEC
10672 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10673 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10674 result = XVECEXP (x, 0, 0);
10676 if (TARGET_MACHO && darwin_local_data_pic (x)
10677 && !MEM_P (orig_x))
10678 result = XVECEXP (x, 0, 0);
10684 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10686 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10690 /* If X is a machine specific address (i.e. a symbol or label being
10691 referenced as a displacement from the GOT implemented using an
10692 UNSPEC), then return the base term. Otherwise return X. */
10695 ix86_find_base_term (rtx x)
10701 if (GET_CODE (x) != CONST)
10703 term = XEXP (x, 0);
10704 if (GET_CODE (term) == PLUS
10705 && (CONST_INT_P (XEXP (term, 1))
10706 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10707 term = XEXP (term, 0);
10708 if (GET_CODE (term) != UNSPEC
10709 || XINT (term, 1) != UNSPEC_GOTPCREL)
10712 return XVECEXP (term, 0, 0);
10715 return ix86_delegitimize_address (x);
10719 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10720 int fp, FILE *file)
10722 const char *suffix;
10724 if (mode == CCFPmode || mode == CCFPUmode)
10726 enum rtx_code second_code, bypass_code;
10727 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10728 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10729 code = ix86_fp_compare_code_to_integer (code);
10733 code = reverse_condition (code);
10784 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10788 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10789 Those same assemblers have the same but opposite lossage on cmov. */
10790 if (mode == CCmode)
10791 suffix = fp ? "nbe" : "a";
10792 else if (mode == CCCmode)
10795 gcc_unreachable ();
10811 gcc_unreachable ();
10815 gcc_assert (mode == CCmode || mode == CCCmode);
10832 gcc_unreachable ();
10836 /* ??? As above. */
10837 gcc_assert (mode == CCmode || mode == CCCmode);
10838 suffix = fp ? "nb" : "ae";
10841 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10845 /* ??? As above. */
10846 if (mode == CCmode)
10848 else if (mode == CCCmode)
10849 suffix = fp ? "nb" : "ae";
10851 gcc_unreachable ();
10854 suffix = fp ? "u" : "p";
10857 suffix = fp ? "nu" : "np";
10860 gcc_unreachable ();
10862 fputs (suffix, file);
10865 /* Print the name of register X to FILE based on its machine mode and number.
10866 If CODE is 'w', pretend the mode is HImode.
10867 If CODE is 'b', pretend the mode is QImode.
10868 If CODE is 'k', pretend the mode is SImode.
10869 If CODE is 'q', pretend the mode is DImode.
10870 If CODE is 'x', pretend the mode is V4SFmode.
10871 If CODE is 't', pretend the mode is V8SFmode.
10872 If CODE is 'h', pretend the reg is the 'high' byte register.
10873 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10874 If CODE is 'd', duplicate the operand for AVX instruction.
10878 print_reg (rtx x, int code, FILE *file)
10881 bool duplicated = code == 'd' && TARGET_AVX;
10883 gcc_assert (x == pc_rtx
10884 || (REGNO (x) != ARG_POINTER_REGNUM
10885 && REGNO (x) != FRAME_POINTER_REGNUM
10886 && REGNO (x) != FLAGS_REG
10887 && REGNO (x) != FPSR_REG
10888 && REGNO (x) != FPCR_REG));
10890 if (ASSEMBLER_DIALECT == ASM_ATT)
10895 gcc_assert (TARGET_64BIT);
10896 fputs ("rip", file);
10900 if (code == 'w' || MMX_REG_P (x))
10902 else if (code == 'b')
10904 else if (code == 'k')
10906 else if (code == 'q')
10908 else if (code == 'y')
10910 else if (code == 'h')
10912 else if (code == 'x')
10914 else if (code == 't')
10917 code = GET_MODE_SIZE (GET_MODE (x));
10919 /* Irritatingly, AMD extended registers use different naming convention
10920 from the normal registers. */
10921 if (REX_INT_REG_P (x))
10923 gcc_assert (TARGET_64BIT);
10927 error ("extended registers have no high halves");
10930 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10933 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10936 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10939 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10942 error ("unsupported operand size for extended register");
10952 if (STACK_TOP_P (x))
10961 if (! ANY_FP_REG_P (x))
10962 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10967 reg = hi_reg_name[REGNO (x)];
10970 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10972 reg = qi_reg_name[REGNO (x)];
10975 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10977 reg = qi_high_reg_name[REGNO (x)];
10982 gcc_assert (!duplicated);
10984 fputs (hi_reg_name[REGNO (x)] + 1, file);
10989 gcc_unreachable ();
10995 if (ASSEMBLER_DIALECT == ASM_ATT)
10996 fprintf (file, ", %%%s", reg);
10998 fprintf (file, ", %s", reg);
11002 /* Locate some local-dynamic symbol still in use by this function
11003 so that we can print its name in some tls_local_dynamic_base
11007 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11011 if (GET_CODE (x) == SYMBOL_REF
11012 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11014 cfun->machine->some_ld_name = XSTR (x, 0);
11021 static const char *
11022 get_some_local_dynamic_name (void)
11026 if (cfun->machine->some_ld_name)
11027 return cfun->machine->some_ld_name;
11029 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11031 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11032 return cfun->machine->some_ld_name;
11034 gcc_unreachable ();
11037 /* Meaning of CODE:
11038 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11039 C -- print opcode suffix for set/cmov insn.
11040 c -- like C, but print reversed condition
11041 E,e -- likewise, but for compare-and-branch fused insn.
11042 F,f -- likewise, but for floating-point.
11043 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11045 R -- print the prefix for register names.
11046 z -- print the opcode suffix for the size of the current operand.
11047 Z -- likewise, with special suffixes for x87 instructions.
11048 * -- print a star (in certain assembler syntax)
11049 A -- print an absolute memory reference.
11050 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11051 s -- print a shift double count, followed by the assemblers argument
11053 b -- print the QImode name of the register for the indicated operand.
11054 %b0 would print %al if operands[0] is reg 0.
11055 w -- likewise, print the HImode name of the register.
11056 k -- likewise, print the SImode name of the register.
11057 q -- likewise, print the DImode name of the register.
11058 x -- likewise, print the V4SFmode name of the register.
11059 t -- likewise, print the V8SFmode name of the register.
11060 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11061 y -- print "st(0)" instead of "st" as a register.
11062 d -- print duplicated register operand for AVX instruction.
11063 D -- print condition for SSE cmp instruction.
11064 P -- if PIC, print an @PLT suffix.
11065 X -- don't print any sort of PIC '@' suffix for a symbol.
11066 & -- print some in-use local-dynamic symbol name.
11067 H -- print a memory address offset by 8; used for sse high-parts
11068 Y -- print condition for SSE5 com* instruction.
11069 + -- print a branch hint as 'cs' or 'ds' prefix
11070 ; -- print a semicolon (after prefixes due to bug in older gas).
11074 print_operand (FILE *file, rtx x, int code)
11081 if (ASSEMBLER_DIALECT == ASM_ATT)
11086 assemble_name (file, get_some_local_dynamic_name ());
11090 switch (ASSEMBLER_DIALECT)
11097 /* Intel syntax. For absolute addresses, registers should not
11098 be surrounded by braces. */
11102 PRINT_OPERAND (file, x, 0);
11109 gcc_unreachable ();
11112 PRINT_OPERAND (file, x, 0);
11117 if (ASSEMBLER_DIALECT == ASM_ATT)
11122 if (ASSEMBLER_DIALECT == ASM_ATT)
11127 if (ASSEMBLER_DIALECT == ASM_ATT)
11132 if (ASSEMBLER_DIALECT == ASM_ATT)
11137 if (ASSEMBLER_DIALECT == ASM_ATT)
11142 if (ASSEMBLER_DIALECT == ASM_ATT)
11147 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11149 /* Opcodes don't get size suffixes if using Intel opcodes. */
11150 if (ASSEMBLER_DIALECT == ASM_INTEL)
11153 switch (GET_MODE_SIZE (GET_MODE (x)))
11172 output_operand_lossage
11173 ("invalid operand size for operand code '%c'", code);
11178 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11180 (0, "non-integer operand used with operand code '%c'", code);
11184 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11185 if (ASSEMBLER_DIALECT == ASM_INTEL)
11188 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11190 switch (GET_MODE_SIZE (GET_MODE (x)))
11193 #ifdef HAVE_AS_IX86_FILDS
11203 #ifdef HAVE_AS_IX86_FILDQ
11206 fputs ("ll", file);
11214 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11216 /* 387 opcodes don't get size suffixes
11217 if the operands are registers. */
11218 if (STACK_REG_P (x))
11221 switch (GET_MODE_SIZE (GET_MODE (x)))
11242 output_operand_lossage
11243 ("invalid operand type used with operand code '%c'", code);
11247 output_operand_lossage
11248 ("invalid operand size for operand code '%c'", code);
11265 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11267 PRINT_OPERAND (file, x, 0);
11268 fputs (", ", file);
11273 /* Little bit of braindamage here. The SSE compare instructions
11274 does use completely different names for the comparisons that the
11275 fp conditional moves. */
11278 switch (GET_CODE (x))
11281 fputs ("eq", file);
11284 fputs ("eq_us", file);
11287 fputs ("lt", file);
11290 fputs ("nge", file);
11293 fputs ("le", file);
11296 fputs ("ngt", file);
11299 fputs ("unord", file);
11302 fputs ("neq", file);
11305 fputs ("neq_oq", file);
11308 fputs ("ge", file);
11311 fputs ("nlt", file);
11314 fputs ("gt", file);
11317 fputs ("nle", file);
11320 fputs ("ord", file);
11323 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11329 switch (GET_CODE (x))
11333 fputs ("eq", file);
11337 fputs ("lt", file);
11341 fputs ("le", file);
11344 fputs ("unord", file);
11348 fputs ("neq", file);
11352 fputs ("nlt", file);
11356 fputs ("nle", file);
11359 fputs ("ord", file);
11362 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11368 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11369 if (ASSEMBLER_DIALECT == ASM_ATT)
11371 switch (GET_MODE (x))
11373 case HImode: putc ('w', file); break;
11375 case SFmode: putc ('l', file); break;
11377 case DFmode: putc ('q', file); break;
11378 default: gcc_unreachable ();
11385 if (!COMPARISON_P (x))
11387 output_operand_lossage ("operand is neither a constant nor a "
11388 "condition code, invalid operand code "
11392 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11395 if (!COMPARISON_P (x))
11397 output_operand_lossage ("operand is neither a constant nor a "
11398 "condition code, invalid operand code "
11402 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11403 if (ASSEMBLER_DIALECT == ASM_ATT)
11406 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11409 /* Like above, but reverse condition */
11411 /* Check to see if argument to %c is really a constant
11412 and not a condition code which needs to be reversed. */
11413 if (!COMPARISON_P (x))
11415 output_operand_lossage ("operand is neither a constant nor a "
11416 "condition code, invalid operand "
11420 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11423 if (!COMPARISON_P (x))
11425 output_operand_lossage ("operand is neither a constant nor a "
11426 "condition code, invalid operand "
11430 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11431 if (ASSEMBLER_DIALECT == ASM_ATT)
11434 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11438 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11442 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11446 /* It doesn't actually matter what mode we use here, as we're
11447 only going to use this for printing. */
11448 x = adjust_address_nv (x, DImode, 8);
11456 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11459 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11462 int pred_val = INTVAL (XEXP (x, 0));
11464 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11465 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11467 int taken = pred_val > REG_BR_PROB_BASE / 2;
11468 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11470 /* Emit hints only in the case default branch prediction
11471 heuristics would fail. */
11472 if (taken != cputaken)
11474 /* We use 3e (DS) prefix for taken branches and
11475 2e (CS) prefix for not taken branches. */
11477 fputs ("ds ; ", file);
11479 fputs ("cs ; ", file);
11487 switch (GET_CODE (x))
11490 fputs ("neq", file);
11493 fputs ("eq", file);
11497 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11501 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11505 fputs ("le", file);
11509 fputs ("lt", file);
11512 fputs ("unord", file);
11515 fputs ("ord", file);
11518 fputs ("ueq", file);
11521 fputs ("nlt", file);
11524 fputs ("nle", file);
11527 fputs ("ule", file);
11530 fputs ("ult", file);
11533 fputs ("une", file);
11536 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11543 fputs (" ; ", file);
11550 output_operand_lossage ("invalid operand code '%c'", code);
11555 print_reg (x, code, file);
11557 else if (MEM_P (x))
11559 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11560 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11561 && GET_MODE (x) != BLKmode)
11564 switch (GET_MODE_SIZE (GET_MODE (x)))
11566 case 1: size = "BYTE"; break;
11567 case 2: size = "WORD"; break;
11568 case 4: size = "DWORD"; break;
11569 case 8: size = "QWORD"; break;
11570 case 12: size = "XWORD"; break;
11572 if (GET_MODE (x) == XFmode)
11578 gcc_unreachable ();
11581 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11584 else if (code == 'w')
11586 else if (code == 'k')
11589 fputs (size, file);
11590 fputs (" PTR ", file);
11594 /* Avoid (%rip) for call operands. */
11595 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11596 && !CONST_INT_P (x))
11597 output_addr_const (file, x);
11598 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11599 output_operand_lossage ("invalid constraints for operand");
11601 output_address (x);
11604 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11609 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11610 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11612 if (ASSEMBLER_DIALECT == ASM_ATT)
11614 fprintf (file, "0x%08lx", (long unsigned int) l);
11617 /* These float cases don't actually occur as immediate operands. */
11618 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11622 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11623 fprintf (file, "%s", dstr);
11626 else if (GET_CODE (x) == CONST_DOUBLE
11627 && GET_MODE (x) == XFmode)
11631 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11632 fprintf (file, "%s", dstr);
11637 /* We have patterns that allow zero sets of memory, for instance.
11638 In 64-bit mode, we should probably support all 8-byte vectors,
11639 since we can in fact encode that into an immediate. */
11640 if (GET_CODE (x) == CONST_VECTOR)
11642 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11648 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11650 if (ASSEMBLER_DIALECT == ASM_ATT)
11653 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11654 || GET_CODE (x) == LABEL_REF)
11656 if (ASSEMBLER_DIALECT == ASM_ATT)
11659 fputs ("OFFSET FLAT:", file);
11662 if (CONST_INT_P (x))
11663 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11665 output_pic_addr_const (file, x, code);
11667 output_addr_const (file, x);
11671 /* Print a memory operand whose address is ADDR. */
11674 print_operand_address (FILE *file, rtx addr)
11676 struct ix86_address parts;
11677 rtx base, index, disp;
11679 int ok = ix86_decompose_address (addr, &parts);
11684 index = parts.index;
11686 scale = parts.scale;
11694 if (ASSEMBLER_DIALECT == ASM_ATT)
11696 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11699 gcc_unreachable ();
11702 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11703 if (TARGET_64BIT && !base && !index)
11707 if (GET_CODE (disp) == CONST
11708 && GET_CODE (XEXP (disp, 0)) == PLUS
11709 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11710 symbol = XEXP (XEXP (disp, 0), 0);
11712 if (GET_CODE (symbol) == LABEL_REF
11713 || (GET_CODE (symbol) == SYMBOL_REF
11714 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11717 if (!base && !index)
11719 /* Displacement only requires special attention. */
11721 if (CONST_INT_P (disp))
11723 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11724 fputs ("ds:", file);
11725 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11728 output_pic_addr_const (file, disp, 0);
11730 output_addr_const (file, disp);
11734 if (ASSEMBLER_DIALECT == ASM_ATT)
11739 output_pic_addr_const (file, disp, 0);
11740 else if (GET_CODE (disp) == LABEL_REF)
11741 output_asm_label (disp);
11743 output_addr_const (file, disp);
11748 print_reg (base, 0, file);
11752 print_reg (index, 0, file);
11754 fprintf (file, ",%d", scale);
11760 rtx offset = NULL_RTX;
11764 /* Pull out the offset of a symbol; print any symbol itself. */
11765 if (GET_CODE (disp) == CONST
11766 && GET_CODE (XEXP (disp, 0)) == PLUS
11767 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11769 offset = XEXP (XEXP (disp, 0), 1);
11770 disp = gen_rtx_CONST (VOIDmode,
11771 XEXP (XEXP (disp, 0), 0));
11775 output_pic_addr_const (file, disp, 0);
11776 else if (GET_CODE (disp) == LABEL_REF)
11777 output_asm_label (disp);
11778 else if (CONST_INT_P (disp))
11781 output_addr_const (file, disp);
11787 print_reg (base, 0, file);
11790 if (INTVAL (offset) >= 0)
11792 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11796 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11803 print_reg (index, 0, file);
11805 fprintf (file, "*%d", scale);
11813 output_addr_const_extra (FILE *file, rtx x)
11817 if (GET_CODE (x) != UNSPEC)
11820 op = XVECEXP (x, 0, 0);
11821 switch (XINT (x, 1))
11823 case UNSPEC_GOTTPOFF:
11824 output_addr_const (file, op);
11825 /* FIXME: This might be @TPOFF in Sun ld. */
11826 fputs ("@GOTTPOFF", file);
11829 output_addr_const (file, op);
11830 fputs ("@TPOFF", file);
11832 case UNSPEC_NTPOFF:
11833 output_addr_const (file, op);
11835 fputs ("@TPOFF", file);
11837 fputs ("@NTPOFF", file);
11839 case UNSPEC_DTPOFF:
11840 output_addr_const (file, op);
11841 fputs ("@DTPOFF", file);
11843 case UNSPEC_GOTNTPOFF:
11844 output_addr_const (file, op);
11846 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11847 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11849 fputs ("@GOTNTPOFF", file);
11851 case UNSPEC_INDNTPOFF:
11852 output_addr_const (file, op);
11853 fputs ("@INDNTPOFF", file);
11856 case UNSPEC_MACHOPIC_OFFSET:
11857 output_addr_const (file, op);
11859 machopic_output_function_base_name (file);
11870 /* Split one or more DImode RTL references into pairs of SImode
11871 references. The RTL can be REG, offsettable MEM, integer constant, or
11872 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11873 split and "num" is its length. lo_half and hi_half are output arrays
11874 that parallel "operands". */
11877 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11881 rtx op = operands[num];
11883 /* simplify_subreg refuse to split volatile memory addresses,
11884 but we still have to handle it. */
11887 lo_half[num] = adjust_address (op, SImode, 0);
11888 hi_half[num] = adjust_address (op, SImode, 4);
11892 lo_half[num] = simplify_gen_subreg (SImode, op,
11893 GET_MODE (op) == VOIDmode
11894 ? DImode : GET_MODE (op), 0);
11895 hi_half[num] = simplify_gen_subreg (SImode, op,
11896 GET_MODE (op) == VOIDmode
11897 ? DImode : GET_MODE (op), 4);
11901 /* Split one or more TImode RTL references into pairs of DImode
11902 references. The RTL can be REG, offsettable MEM, integer constant, or
11903 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11904 split and "num" is its length. lo_half and hi_half are output arrays
11905 that parallel "operands". */
11908 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11912 rtx op = operands[num];
11914 /* simplify_subreg refuse to split volatile memory addresses, but we
11915 still have to handle it. */
11918 lo_half[num] = adjust_address (op, DImode, 0);
11919 hi_half[num] = adjust_address (op, DImode, 8);
11923 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11924 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11929 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11930 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11931 is the expression of the binary operation. The output may either be
11932 emitted here, or returned to the caller, like all output_* functions.
11934 There is no guarantee that the operands are the same mode, as they
11935 might be within FLOAT or FLOAT_EXTEND expressions. */
11937 #ifndef SYSV386_COMPAT
11938 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11939 wants to fix the assemblers because that causes incompatibility
11940 with gcc. No-one wants to fix gcc because that causes
11941 incompatibility with assemblers... You can use the option of
11942 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11943 #define SYSV386_COMPAT 1
11947 output_387_binary_op (rtx insn, rtx *operands)
11949 static char buf[40];
11952 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11954 #ifdef ENABLE_CHECKING
11955 /* Even if we do not want to check the inputs, this documents input
11956 constraints. Which helps in understanding the following code. */
11957 if (STACK_REG_P (operands[0])
11958 && ((REG_P (operands[1])
11959 && REGNO (operands[0]) == REGNO (operands[1])
11960 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11961 || (REG_P (operands[2])
11962 && REGNO (operands[0]) == REGNO (operands[2])
11963 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11964 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11967 gcc_assert (is_sse);
11970 switch (GET_CODE (operands[3]))
11973 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11974 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11982 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11983 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11991 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11992 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12000 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12001 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12009 gcc_unreachable ();
12016 strcpy (buf, ssep);
12017 if (GET_MODE (operands[0]) == SFmode)
12018 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12020 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12024 strcpy (buf, ssep + 1);
12025 if (GET_MODE (operands[0]) == SFmode)
12026 strcat (buf, "ss\t{%2, %0|%0, %2}");
12028 strcat (buf, "sd\t{%2, %0|%0, %2}");
12034 switch (GET_CODE (operands[3]))
12038 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12040 rtx temp = operands[2];
12041 operands[2] = operands[1];
12042 operands[1] = temp;
12045 /* know operands[0] == operands[1]. */
12047 if (MEM_P (operands[2]))
12053 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12055 if (STACK_TOP_P (operands[0]))
12056 /* How is it that we are storing to a dead operand[2]?
12057 Well, presumably operands[1] is dead too. We can't
12058 store the result to st(0) as st(0) gets popped on this
12059 instruction. Instead store to operands[2] (which I
12060 think has to be st(1)). st(1) will be popped later.
12061 gcc <= 2.8.1 didn't have this check and generated
12062 assembly code that the Unixware assembler rejected. */
12063 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12065 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12069 if (STACK_TOP_P (operands[0]))
12070 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12072 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12077 if (MEM_P (operands[1]))
12083 if (MEM_P (operands[2]))
12089 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12092 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12093 derived assemblers, confusingly reverse the direction of
12094 the operation for fsub{r} and fdiv{r} when the
12095 destination register is not st(0). The Intel assembler
12096 doesn't have this brain damage. Read !SYSV386_COMPAT to
12097 figure out what the hardware really does. */
12098 if (STACK_TOP_P (operands[0]))
12099 p = "{p\t%0, %2|rp\t%2, %0}";
12101 p = "{rp\t%2, %0|p\t%0, %2}";
12103 if (STACK_TOP_P (operands[0]))
12104 /* As above for fmul/fadd, we can't store to st(0). */
12105 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12107 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12112 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12115 if (STACK_TOP_P (operands[0]))
12116 p = "{rp\t%0, %1|p\t%1, %0}";
12118 p = "{p\t%1, %0|rp\t%0, %1}";
12120 if (STACK_TOP_P (operands[0]))
12121 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12123 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12128 if (STACK_TOP_P (operands[0]))
12130 if (STACK_TOP_P (operands[1]))
12131 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12133 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12136 else if (STACK_TOP_P (operands[1]))
12139 p = "{\t%1, %0|r\t%0, %1}";
12141 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12147 p = "{r\t%2, %0|\t%0, %2}";
12149 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12155 gcc_unreachable ();
12162 /* Return needed mode for entity in optimize_mode_switching pass. */
12165 ix86_mode_needed (int entity, rtx insn)
12167 enum attr_i387_cw mode;
12169 /* The mode UNINITIALIZED is used to store control word after a
12170 function call or ASM pattern. The mode ANY specify that function
12171 has no requirements on the control word and make no changes in the
12172 bits we are interested in. */
12175 || (NONJUMP_INSN_P (insn)
12176 && (asm_noperands (PATTERN (insn)) >= 0
12177 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12178 return I387_CW_UNINITIALIZED;
12180 if (recog_memoized (insn) < 0)
12181 return I387_CW_ANY;
12183 mode = get_attr_i387_cw (insn);
12188 if (mode == I387_CW_TRUNC)
12193 if (mode == I387_CW_FLOOR)
12198 if (mode == I387_CW_CEIL)
12203 if (mode == I387_CW_MASK_PM)
12208 gcc_unreachable ();
12211 return I387_CW_ANY;
12214 /* Output code to initialize control word copies used by trunc?f?i and
12215 rounding patterns. CURRENT_MODE is set to current control word,
12216 while NEW_MODE is set to new control word. */
12219 emit_i387_cw_initialization (int mode)
12221 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12224 enum ix86_stack_slot slot;
12226 rtx reg = gen_reg_rtx (HImode);
12228 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12229 emit_move_insn (reg, copy_rtx (stored_mode));
12231 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12232 || optimize_function_for_size_p (cfun))
12236 case I387_CW_TRUNC:
12237 /* round toward zero (truncate) */
12238 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12239 slot = SLOT_CW_TRUNC;
12242 case I387_CW_FLOOR:
12243 /* round down toward -oo */
12244 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12245 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12246 slot = SLOT_CW_FLOOR;
12250 /* round up toward +oo */
12251 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12252 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12253 slot = SLOT_CW_CEIL;
12256 case I387_CW_MASK_PM:
12257 /* mask precision exception for nearbyint() */
12258 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12259 slot = SLOT_CW_MASK_PM;
12263 gcc_unreachable ();
12270 case I387_CW_TRUNC:
12271 /* round toward zero (truncate) */
12272 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12273 slot = SLOT_CW_TRUNC;
12276 case I387_CW_FLOOR:
12277 /* round down toward -oo */
12278 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12279 slot = SLOT_CW_FLOOR;
12283 /* round up toward +oo */
12284 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12285 slot = SLOT_CW_CEIL;
12288 case I387_CW_MASK_PM:
12289 /* mask precision exception for nearbyint() */
12290 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12291 slot = SLOT_CW_MASK_PM;
12295 gcc_unreachable ();
12299 gcc_assert (slot < MAX_386_STACK_LOCALS);
12301 new_mode = assign_386_stack_local (HImode, slot);
12302 emit_move_insn (new_mode, reg);
12305 /* Output code for INSN to convert a float to a signed int. OPERANDS
12306 are the insn operands. The output may be [HSD]Imode and the input
12307 operand may be [SDX]Fmode. */
12310 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12312 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12313 int dimode_p = GET_MODE (operands[0]) == DImode;
12314 int round_mode = get_attr_i387_cw (insn);
12316 /* Jump through a hoop or two for DImode, since the hardware has no
12317 non-popping instruction. We used to do this a different way, but
12318 that was somewhat fragile and broke with post-reload splitters. */
12319 if ((dimode_p || fisttp) && !stack_top_dies)
12320 output_asm_insn ("fld\t%y1", operands);
12322 gcc_assert (STACK_TOP_P (operands[1]));
12323 gcc_assert (MEM_P (operands[0]));
12324 gcc_assert (GET_MODE (operands[1]) != TFmode);
12327 output_asm_insn ("fisttp%Z0\t%0", operands);
12330 if (round_mode != I387_CW_ANY)
12331 output_asm_insn ("fldcw\t%3", operands);
12332 if (stack_top_dies || dimode_p)
12333 output_asm_insn ("fistp%Z0\t%0", operands);
12335 output_asm_insn ("fist%Z0\t%0", operands);
12336 if (round_mode != I387_CW_ANY)
12337 output_asm_insn ("fldcw\t%2", operands);
12343 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12344 have the values zero or one, indicates the ffreep insn's operand
12345 from the OPERANDS array. */
12347 static const char *
12348 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12350 if (TARGET_USE_FFREEP)
12351 #if HAVE_AS_IX86_FFREEP
12352 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12355 static char retval[] = ".word\t0xc_df";
12356 int regno = REGNO (operands[opno]);
12358 gcc_assert (FP_REGNO_P (regno));
12360 retval[9] = '0' + (regno - FIRST_STACK_REG);
12365 return opno ? "fstp\t%y1" : "fstp\t%y0";
12369 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12370 should be used. UNORDERED_P is true when fucom should be used. */
12373 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12375 int stack_top_dies;
12376 rtx cmp_op0, cmp_op1;
12377 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12381 cmp_op0 = operands[0];
12382 cmp_op1 = operands[1];
12386 cmp_op0 = operands[1];
12387 cmp_op1 = operands[2];
12392 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12393 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12394 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12395 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12397 if (GET_MODE (operands[0]) == SFmode)
12399 return &ucomiss[TARGET_AVX ? 0 : 1];
12401 return &comiss[TARGET_AVX ? 0 : 1];
12404 return &ucomisd[TARGET_AVX ? 0 : 1];
12406 return &comisd[TARGET_AVX ? 0 : 1];
12409 gcc_assert (STACK_TOP_P (cmp_op0));
12411 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12413 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12415 if (stack_top_dies)
12417 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12418 return output_387_ffreep (operands, 1);
12421 return "ftst\n\tfnstsw\t%0";
12424 if (STACK_REG_P (cmp_op1)
12426 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12427 && REGNO (cmp_op1) != FIRST_STACK_REG)
12429 /* If both the top of the 387 stack dies, and the other operand
12430 is also a stack register that dies, then this must be a
12431 `fcompp' float compare */
12435 /* There is no double popping fcomi variant. Fortunately,
12436 eflags is immune from the fstp's cc clobbering. */
12438 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12440 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12441 return output_387_ffreep (operands, 0);
12446 return "fucompp\n\tfnstsw\t%0";
12448 return "fcompp\n\tfnstsw\t%0";
12453 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12455 static const char * const alt[16] =
12457 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12458 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12459 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12460 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12462 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12463 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12467 "fcomi\t{%y1, %0|%0, %y1}",
12468 "fcomip\t{%y1, %0|%0, %y1}",
12469 "fucomi\t{%y1, %0|%0, %y1}",
12470 "fucomip\t{%y1, %0|%0, %y1}",
12481 mask = eflags_p << 3;
12482 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12483 mask |= unordered_p << 1;
12484 mask |= stack_top_dies;
12486 gcc_assert (mask < 16);
12495 ix86_output_addr_vec_elt (FILE *file, int value)
12497 const char *directive = ASM_LONG;
12501 directive = ASM_QUAD;
12503 gcc_assert (!TARGET_64BIT);
12506 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12510 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12512 const char *directive = ASM_LONG;
12515 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12516 directive = ASM_QUAD;
12518 gcc_assert (!TARGET_64BIT);
12520 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12521 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12522 fprintf (file, "%s%s%d-%s%d\n",
12523 directive, LPREFIX, value, LPREFIX, rel);
12524 else if (HAVE_AS_GOTOFF_IN_DATA)
12525 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12527 else if (TARGET_MACHO)
12529 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12530 machopic_output_function_base_name (file);
12531 fprintf(file, "\n");
12535 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12536 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12539 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12543 ix86_expand_clear (rtx dest)
12547 /* We play register width games, which are only valid after reload. */
12548 gcc_assert (reload_completed);
12550 /* Avoid HImode and its attendant prefix byte. */
12551 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12552 dest = gen_rtx_REG (SImode, REGNO (dest));
12553 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12555 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12556 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12558 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12565 /* X is an unchanging MEM. If it is a constant pool reference, return
12566 the constant pool rtx, else NULL. */
12569 maybe_get_pool_constant (rtx x)
12571 x = ix86_delegitimize_address (XEXP (x, 0));
12573 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12574 return get_pool_constant (x);
12580 ix86_expand_move (enum machine_mode mode, rtx operands[])
12583 enum tls_model model;
12588 if (GET_CODE (op1) == SYMBOL_REF)
12590 model = SYMBOL_REF_TLS_MODEL (op1);
12593 op1 = legitimize_tls_address (op1, model, true);
12594 op1 = force_operand (op1, op0);
12598 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12599 && SYMBOL_REF_DLLIMPORT_P (op1))
12600 op1 = legitimize_dllimport_symbol (op1, false);
12602 else if (GET_CODE (op1) == CONST
12603 && GET_CODE (XEXP (op1, 0)) == PLUS
12604 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12606 rtx addend = XEXP (XEXP (op1, 0), 1);
12607 rtx symbol = XEXP (XEXP (op1, 0), 0);
12610 model = SYMBOL_REF_TLS_MODEL (symbol);
12612 tmp = legitimize_tls_address (symbol, model, true);
12613 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12614 && SYMBOL_REF_DLLIMPORT_P (symbol))
12615 tmp = legitimize_dllimport_symbol (symbol, true);
12619 tmp = force_operand (tmp, NULL);
12620 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12621 op0, 1, OPTAB_DIRECT);
12627 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12629 if (TARGET_MACHO && !TARGET_64BIT)
12634 rtx temp = ((reload_in_progress
12635 || ((op0 && REG_P (op0))
12637 ? op0 : gen_reg_rtx (Pmode));
12638 op1 = machopic_indirect_data_reference (op1, temp);
12639 op1 = machopic_legitimize_pic_address (op1, mode,
12640 temp == op1 ? 0 : temp);
12642 else if (MACHOPIC_INDIRECT)
12643 op1 = machopic_indirect_data_reference (op1, 0);
12651 op1 = force_reg (Pmode, op1);
12652 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12654 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12655 op1 = legitimize_pic_address (op1, reg);
12664 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12665 || !push_operand (op0, mode))
12667 op1 = force_reg (mode, op1);
12669 if (push_operand (op0, mode)
12670 && ! general_no_elim_operand (op1, mode))
12671 op1 = copy_to_mode_reg (mode, op1);
12673 /* Force large constants in 64bit compilation into register
12674 to get them CSEed. */
12675 if (can_create_pseudo_p ()
12676 && (mode == DImode) && TARGET_64BIT
12677 && immediate_operand (op1, mode)
12678 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12679 && !register_operand (op0, mode)
12681 op1 = copy_to_mode_reg (mode, op1);
12683 if (can_create_pseudo_p ()
12684 && FLOAT_MODE_P (mode)
12685 && GET_CODE (op1) == CONST_DOUBLE)
12687 /* If we are loading a floating point constant to a register,
12688 force the value to memory now, since we'll get better code
12689 out the back end. */
12691 op1 = validize_mem (force_const_mem (mode, op1));
12692 if (!register_operand (op0, mode))
12694 rtx temp = gen_reg_rtx (mode);
12695 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12696 emit_move_insn (op0, temp);
12702 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12706 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12708 rtx op0 = operands[0], op1 = operands[1];
12709 unsigned int align = GET_MODE_ALIGNMENT (mode);
12711 /* Force constants other than zero into memory. We do not know how
12712 the instructions used to build constants modify the upper 64 bits
12713 of the register, once we have that information we may be able
12714 to handle some of them more efficiently. */
12715 if (can_create_pseudo_p ()
12716 && register_operand (op0, mode)
12717 && (CONSTANT_P (op1)
12718 || (GET_CODE (op1) == SUBREG
12719 && CONSTANT_P (SUBREG_REG (op1))))
12720 && standard_sse_constant_p (op1) <= 0)
12721 op1 = validize_mem (force_const_mem (mode, op1));
12723 /* We need to check memory alignment for SSE mode since attribute
12724 can make operands unaligned. */
12725 if (can_create_pseudo_p ()
12726 && SSE_REG_MODE_P (mode)
12727 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12728 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12732 /* ix86_expand_vector_move_misalign() does not like constants ... */
12733 if (CONSTANT_P (op1)
12734 || (GET_CODE (op1) == SUBREG
12735 && CONSTANT_P (SUBREG_REG (op1))))
12736 op1 = validize_mem (force_const_mem (mode, op1));
12738 /* ... nor both arguments in memory. */
12739 if (!register_operand (op0, mode)
12740 && !register_operand (op1, mode))
12741 op1 = force_reg (mode, op1);
12743 tmp[0] = op0; tmp[1] = op1;
12744 ix86_expand_vector_move_misalign (mode, tmp);
12748 /* Make operand1 a register if it isn't already. */
12749 if (can_create_pseudo_p ()
12750 && !register_operand (op0, mode)
12751 && !register_operand (op1, mode))
12753 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12757 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12760 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12761 straight to ix86_expand_vector_move. */
12762 /* Code generation for scalar reg-reg moves of single and double precision data:
12763 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12767 if (x86_sse_partial_reg_dependency == true)
12772 Code generation for scalar loads of double precision data:
12773 if (x86_sse_split_regs == true)
12774 movlpd mem, reg (gas syntax)
12778 Code generation for unaligned packed loads of single precision data
12779 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12780 if (x86_sse_unaligned_move_optimal)
12783 if (x86_sse_partial_reg_dependency == true)
12795 Code generation for unaligned packed loads of double precision data
12796 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12797 if (x86_sse_unaligned_move_optimal)
12800 if (x86_sse_split_regs == true)
12813 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12822 switch (GET_MODE_CLASS (mode))
12824 case MODE_VECTOR_INT:
12826 switch (GET_MODE_SIZE (mode))
12829 op0 = gen_lowpart (V16QImode, op0);
12830 op1 = gen_lowpart (V16QImode, op1);
12831 emit_insn (gen_avx_movdqu (op0, op1));
12834 op0 = gen_lowpart (V32QImode, op0);
12835 op1 = gen_lowpart (V32QImode, op1);
12836 emit_insn (gen_avx_movdqu256 (op0, op1));
12839 gcc_unreachable ();
12842 case MODE_VECTOR_FLOAT:
12843 op0 = gen_lowpart (mode, op0);
12844 op1 = gen_lowpart (mode, op1);
12849 emit_insn (gen_avx_movups (op0, op1));
12852 emit_insn (gen_avx_movups256 (op0, op1));
12855 emit_insn (gen_avx_movupd (op0, op1));
12858 emit_insn (gen_avx_movupd256 (op0, op1));
12861 gcc_unreachable ();
12866 gcc_unreachable ();
12874 /* If we're optimizing for size, movups is the smallest. */
12875 if (optimize_insn_for_size_p ())
12877 op0 = gen_lowpart (V4SFmode, op0);
12878 op1 = gen_lowpart (V4SFmode, op1);
12879 emit_insn (gen_sse_movups (op0, op1));
12883 /* ??? If we have typed data, then it would appear that using
12884 movdqu is the only way to get unaligned data loaded with
12886 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12888 op0 = gen_lowpart (V16QImode, op0);
12889 op1 = gen_lowpart (V16QImode, op1);
12890 emit_insn (gen_sse2_movdqu (op0, op1));
12894 if (TARGET_SSE2 && mode == V2DFmode)
12898 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12900 op0 = gen_lowpart (V2DFmode, op0);
12901 op1 = gen_lowpart (V2DFmode, op1);
12902 emit_insn (gen_sse2_movupd (op0, op1));
12906 /* When SSE registers are split into halves, we can avoid
12907 writing to the top half twice. */
12908 if (TARGET_SSE_SPLIT_REGS)
12910 emit_clobber (op0);
12915 /* ??? Not sure about the best option for the Intel chips.
12916 The following would seem to satisfy; the register is
12917 entirely cleared, breaking the dependency chain. We
12918 then store to the upper half, with a dependency depth
12919 of one. A rumor has it that Intel recommends two movsd
12920 followed by an unpacklpd, but this is unconfirmed. And
12921 given that the dependency depth of the unpacklpd would
12922 still be one, I'm not sure why this would be better. */
12923 zero = CONST0_RTX (V2DFmode);
12926 m = adjust_address (op1, DFmode, 0);
12927 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12928 m = adjust_address (op1, DFmode, 8);
12929 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12933 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12935 op0 = gen_lowpart (V4SFmode, op0);
12936 op1 = gen_lowpart (V4SFmode, op1);
12937 emit_insn (gen_sse_movups (op0, op1));
12941 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12942 emit_move_insn (op0, CONST0_RTX (mode));
12944 emit_clobber (op0);
12946 if (mode != V4SFmode)
12947 op0 = gen_lowpart (V4SFmode, op0);
12948 m = adjust_address (op1, V2SFmode, 0);
12949 emit_insn (gen_sse_loadlps (op0, op0, m));
12950 m = adjust_address (op1, V2SFmode, 8);
12951 emit_insn (gen_sse_loadhps (op0, op0, m));
12954 else if (MEM_P (op0))
12956 /* If we're optimizing for size, movups is the smallest. */
12957 if (optimize_insn_for_size_p ())
12959 op0 = gen_lowpart (V4SFmode, op0);
12960 op1 = gen_lowpart (V4SFmode, op1);
12961 emit_insn (gen_sse_movups (op0, op1));
12965 /* ??? Similar to above, only less clear because of quote
12966 typeless stores unquote. */
12967 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12968 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12970 op0 = gen_lowpart (V16QImode, op0);
12971 op1 = gen_lowpart (V16QImode, op1);
12972 emit_insn (gen_sse2_movdqu (op0, op1));
12976 if (TARGET_SSE2 && mode == V2DFmode)
12978 m = adjust_address (op0, DFmode, 0);
12979 emit_insn (gen_sse2_storelpd (m, op1));
12980 m = adjust_address (op0, DFmode, 8);
12981 emit_insn (gen_sse2_storehpd (m, op1));
12985 if (mode != V4SFmode)
12986 op1 = gen_lowpart (V4SFmode, op1);
12987 m = adjust_address (op0, V2SFmode, 0);
12988 emit_insn (gen_sse_storelps (m, op1));
12989 m = adjust_address (op0, V2SFmode, 8);
12990 emit_insn (gen_sse_storehps (m, op1));
12994 gcc_unreachable ();
12997 /* Expand a push in MODE. This is some mode for which we do not support
12998 proper push instructions, at least from the registers that we expect
12999 the value to live in. */
13002 ix86_expand_push (enum machine_mode mode, rtx x)
13006 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13007 GEN_INT (-GET_MODE_SIZE (mode)),
13008 stack_pointer_rtx, 1, OPTAB_DIRECT);
13009 if (tmp != stack_pointer_rtx)
13010 emit_move_insn (stack_pointer_rtx, tmp);
13012 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13014 /* When we push an operand onto stack, it has to be aligned at least
13015 at the function argument boundary. However since we don't have
13016 the argument type, we can't determine the actual argument
13018 emit_move_insn (tmp, x);
13021 /* Helper function of ix86_fixup_binary_operands to canonicalize
13022 operand order. Returns true if the operands should be swapped. */
13025 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13028 rtx dst = operands[0];
13029 rtx src1 = operands[1];
13030 rtx src2 = operands[2];
13032 /* If the operation is not commutative, we can't do anything. */
13033 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13036 /* Highest priority is that src1 should match dst. */
13037 if (rtx_equal_p (dst, src1))
13039 if (rtx_equal_p (dst, src2))
13042 /* Next highest priority is that immediate constants come second. */
13043 if (immediate_operand (src2, mode))
13045 if (immediate_operand (src1, mode))
13048 /* Lowest priority is that memory references should come second. */
13058 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13059 destination to use for the operation. If different from the true
13060 destination in operands[0], a copy operation will be required. */
13063 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13066 rtx dst = operands[0];
13067 rtx src1 = operands[1];
13068 rtx src2 = operands[2];
13070 /* Canonicalize operand order. */
13071 if (ix86_swap_binary_operands_p (code, mode, operands))
13075 /* It is invalid to swap operands of different modes. */
13076 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13083 /* Both source operands cannot be in memory. */
13084 if (MEM_P (src1) && MEM_P (src2))
13086 /* Optimization: Only read from memory once. */
13087 if (rtx_equal_p (src1, src2))
13089 src2 = force_reg (mode, src2);
13093 src2 = force_reg (mode, src2);
13096 /* If the destination is memory, and we do not have matching source
13097 operands, do things in registers. */
13098 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13099 dst = gen_reg_rtx (mode);
13101 /* Source 1 cannot be a constant. */
13102 if (CONSTANT_P (src1))
13103 src1 = force_reg (mode, src1);
13105 /* Source 1 cannot be a non-matching memory. */
13106 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13107 src1 = force_reg (mode, src1);
13109 operands[1] = src1;
13110 operands[2] = src2;
13114 /* Similarly, but assume that the destination has already been
13115 set up properly. */
13118 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13119 enum machine_mode mode, rtx operands[])
13121 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13122 gcc_assert (dst == operands[0]);
13125 /* Attempt to expand a binary operator. Make the expansion closer to the
13126 actual machine, then just general_operand, which will allow 3 separate
13127 memory references (one output, two input) in a single insn. */
13130 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13133 rtx src1, src2, dst, op, clob;
13135 dst = ix86_fixup_binary_operands (code, mode, operands);
13136 src1 = operands[1];
13137 src2 = operands[2];
13139 /* Emit the instruction. */
13141 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13142 if (reload_in_progress)
13144 /* Reload doesn't know about the flags register, and doesn't know that
13145 it doesn't want to clobber it. We can only do this with PLUS. */
13146 gcc_assert (code == PLUS);
13151 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13152 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13155 /* Fix up the destination if needed. */
13156 if (dst != operands[0])
13157 emit_move_insn (operands[0], dst);
13160 /* Return TRUE or FALSE depending on whether the binary operator meets the
13161 appropriate constraints. */
13164 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13167 rtx dst = operands[0];
13168 rtx src1 = operands[1];
13169 rtx src2 = operands[2];
13171 /* Both source operands cannot be in memory. */
13172 if (MEM_P (src1) && MEM_P (src2))
13175 /* Canonicalize operand order for commutative operators. */
13176 if (ix86_swap_binary_operands_p (code, mode, operands))
13183 /* If the destination is memory, we must have a matching source operand. */
13184 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13187 /* Source 1 cannot be a constant. */
13188 if (CONSTANT_P (src1))
13191 /* Source 1 cannot be a non-matching memory. */
13192 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13198 /* Attempt to expand a unary operator. Make the expansion closer to the
13199 actual machine, then just general_operand, which will allow 2 separate
13200 memory references (one output, one input) in a single insn. */
13203 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13206 int matching_memory;
13207 rtx src, dst, op, clob;
13212 /* If the destination is memory, and we do not have matching source
13213 operands, do things in registers. */
13214 matching_memory = 0;
13217 if (rtx_equal_p (dst, src))
13218 matching_memory = 1;
13220 dst = gen_reg_rtx (mode);
13223 /* When source operand is memory, destination must match. */
13224 if (MEM_P (src) && !matching_memory)
13225 src = force_reg (mode, src);
13227 /* Emit the instruction. */
13229 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13230 if (reload_in_progress || code == NOT)
13232 /* Reload doesn't know about the flags register, and doesn't know that
13233 it doesn't want to clobber it. */
13234 gcc_assert (code == NOT);
13239 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13240 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13243 /* Fix up the destination if needed. */
13244 if (dst != operands[0])
13245 emit_move_insn (operands[0], dst);
13248 #define LEA_SEARCH_THRESHOLD 12
13250 /* Search backward for non-agu definition of register number REGNO1
13251 or register number REGNO2 in INSN's basic block until
13252 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13253 2. Reach BB boundary, or
13254 3. Reach agu definition.
13255 Returns the distance between the non-agu definition point and INSN.
13256 If no definition point, returns -1. */
13259 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13262 basic_block bb = BLOCK_FOR_INSN (insn);
13265 enum attr_type insn_type;
13267 if (insn != BB_HEAD (bb))
13269 rtx prev = PREV_INSN (insn);
13270 while (prev && distance < LEA_SEARCH_THRESHOLD)
13275 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13276 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13277 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13278 && (regno1 == DF_REF_REGNO (*def_rec)
13279 || regno2 == DF_REF_REGNO (*def_rec)))
13281 insn_type = get_attr_type (prev);
13282 if (insn_type != TYPE_LEA)
13286 if (prev == BB_HEAD (bb))
13288 prev = PREV_INSN (prev);
13292 if (distance < LEA_SEARCH_THRESHOLD)
13296 bool simple_loop = false;
13298 FOR_EACH_EDGE (e, ei, bb->preds)
13301 simple_loop = true;
13307 rtx prev = BB_END (bb);
13310 && distance < LEA_SEARCH_THRESHOLD)
13315 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13316 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13317 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13318 && (regno1 == DF_REF_REGNO (*def_rec)
13319 || regno2 == DF_REF_REGNO (*def_rec)))
13321 insn_type = get_attr_type (prev);
13322 if (insn_type != TYPE_LEA)
13326 prev = PREV_INSN (prev);
13334 /* get_attr_type may modify recog data. We want to make sure
13335 that recog data is valid for instruction INSN, on which
13336 distance_non_agu_define is called. INSN is unchanged here. */
13337 extract_insn_cached (insn);
13341 /* Return the distance between INSN and the next insn that uses
13342 register number REGNO0 in memory address. Return -1 if no such
13343 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13346 distance_agu_use (unsigned int regno0, rtx insn)
13348 basic_block bb = BLOCK_FOR_INSN (insn);
13353 if (insn != BB_END (bb))
13355 rtx next = NEXT_INSN (insn);
13356 while (next && distance < LEA_SEARCH_THRESHOLD)
13362 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13363 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13364 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13365 && regno0 == DF_REF_REGNO (*use_rec))
13367 /* Return DISTANCE if OP0 is used in memory
13368 address in NEXT. */
13372 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13373 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13374 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13375 && regno0 == DF_REF_REGNO (*def_rec))
13377 /* Return -1 if OP0 is set in NEXT. */
13381 if (next == BB_END (bb))
13383 next = NEXT_INSN (next);
13387 if (distance < LEA_SEARCH_THRESHOLD)
13391 bool simple_loop = false;
13393 FOR_EACH_EDGE (e, ei, bb->succs)
13396 simple_loop = true;
13402 rtx next = BB_HEAD (bb);
13405 && distance < LEA_SEARCH_THRESHOLD)
13411 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13412 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13413 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13414 && regno0 == DF_REF_REGNO (*use_rec))
13416 /* Return DISTANCE if OP0 is used in memory
13417 address in NEXT. */
13421 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13422 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13423 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13424 && regno0 == DF_REF_REGNO (*def_rec))
13426 /* Return -1 if OP0 is set in NEXT. */
13431 next = NEXT_INSN (next);
13439 /* Define this macro to tune LEA priority vs ADD, it take effect when
13440 there is a dilemma of choicing LEA or ADD
13441 Negative value: ADD is more preferred than LEA
13443 Positive value: LEA is more preferred than ADD*/
13444 #define IX86_LEA_PRIORITY 2
13446 /* Return true if it is ok to optimize an ADD operation to LEA
13447 operation to avoid flag register consumation. For the processors
13448 like ATOM, if the destination register of LEA holds an actual
13449 address which will be used soon, LEA is better and otherwise ADD
13453 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13454 rtx insn, rtx operands[])
13456 unsigned int regno0 = true_regnum (operands[0]);
13457 unsigned int regno1 = true_regnum (operands[1]);
13458 unsigned int regno2;
13460 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13461 return regno0 != regno1;
13463 regno2 = true_regnum (operands[2]);
13465 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13466 if (regno0 != regno1 && regno0 != regno2)
13470 int dist_define, dist_use;
13471 dist_define = distance_non_agu_define (regno1, regno2, insn);
13472 if (dist_define <= 0)
13475 /* If this insn has both backward non-agu dependence and forward
13476 agu dependence, the one with short distance take effect. */
13477 dist_use = distance_agu_use (regno0, insn);
13479 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13486 /* Return true if destination reg of SET_BODY is shift count of
13490 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13496 /* Retrieve destination of SET_BODY. */
13497 switch (GET_CODE (set_body))
13500 set_dest = SET_DEST (set_body);
13501 if (!set_dest || !REG_P (set_dest))
13505 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13506 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13514 /* Retrieve shift count of USE_BODY. */
13515 switch (GET_CODE (use_body))
13518 shift_rtx = XEXP (use_body, 1);
13521 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13522 if (ix86_dep_by_shift_count_body (set_body,
13523 XVECEXP (use_body, 0, i)))
13531 && (GET_CODE (shift_rtx) == ASHIFT
13532 || GET_CODE (shift_rtx) == LSHIFTRT
13533 || GET_CODE (shift_rtx) == ASHIFTRT
13534 || GET_CODE (shift_rtx) == ROTATE
13535 || GET_CODE (shift_rtx) == ROTATERT))
13537 rtx shift_count = XEXP (shift_rtx, 1);
13539 /* Return true if shift count is dest of SET_BODY. */
13540 if (REG_P (shift_count)
13541 && true_regnum (set_dest) == true_regnum (shift_count))
13548 /* Return true if destination reg of SET_INSN is shift count of
13552 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13554 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13555 PATTERN (use_insn));
13558 /* Return TRUE or FALSE depending on whether the unary operator meets the
13559 appropriate constraints. */
13562 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13563 enum machine_mode mode ATTRIBUTE_UNUSED,
13564 rtx operands[2] ATTRIBUTE_UNUSED)
13566 /* If one of operands is memory, source and destination must match. */
13567 if ((MEM_P (operands[0])
13568 || MEM_P (operands[1]))
13569 && ! rtx_equal_p (operands[0], operands[1]))
13574 /* Post-reload splitter for converting an SF or DFmode value in an
13575 SSE register into an unsigned SImode. */
13578 ix86_split_convert_uns_si_sse (rtx operands[])
13580 enum machine_mode vecmode;
13581 rtx value, large, zero_or_two31, input, two31, x;
13583 large = operands[1];
13584 zero_or_two31 = operands[2];
13585 input = operands[3];
13586 two31 = operands[4];
13587 vecmode = GET_MODE (large);
13588 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13590 /* Load up the value into the low element. We must ensure that the other
13591 elements are valid floats -- zero is the easiest such value. */
13594 if (vecmode == V4SFmode)
13595 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13597 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13601 input = gen_rtx_REG (vecmode, REGNO (input));
13602 emit_move_insn (value, CONST0_RTX (vecmode));
13603 if (vecmode == V4SFmode)
13604 emit_insn (gen_sse_movss (value, value, input));
13606 emit_insn (gen_sse2_movsd (value, value, input));
13609 emit_move_insn (large, two31);
13610 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13612 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13613 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13615 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13616 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13618 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13619 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13621 large = gen_rtx_REG (V4SImode, REGNO (large));
13622 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13624 x = gen_rtx_REG (V4SImode, REGNO (value));
13625 if (vecmode == V4SFmode)
13626 emit_insn (gen_sse2_cvttps2dq (x, value));
13628 emit_insn (gen_sse2_cvttpd2dq (x, value));
13631 emit_insn (gen_xorv4si3 (value, value, large));
13634 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13635 Expects the 64-bit DImode to be supplied in a pair of integral
13636 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13637 -mfpmath=sse, !optimize_size only. */
13640 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13642 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13643 rtx int_xmm, fp_xmm;
13644 rtx biases, exponents;
13647 int_xmm = gen_reg_rtx (V4SImode);
13648 if (TARGET_INTER_UNIT_MOVES)
13649 emit_insn (gen_movdi_to_sse (int_xmm, input));
13650 else if (TARGET_SSE_SPLIT_REGS)
13652 emit_clobber (int_xmm);
13653 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13657 x = gen_reg_rtx (V2DImode);
13658 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13659 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13662 x = gen_rtx_CONST_VECTOR (V4SImode,
13663 gen_rtvec (4, GEN_INT (0x43300000UL),
13664 GEN_INT (0x45300000UL),
13665 const0_rtx, const0_rtx));
13666 exponents = validize_mem (force_const_mem (V4SImode, x));
13668 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13669 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13671 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13672 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13673 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13674 (0x1.0p84 + double(fp_value_hi_xmm)).
13675 Note these exponents differ by 32. */
13677 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13679 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13680 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13681 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13682 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13683 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13684 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13685 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13686 biases = validize_mem (force_const_mem (V2DFmode, biases));
13687 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13689 /* Add the upper and lower DFmode values together. */
13691 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13694 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13695 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13696 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13699 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13702 /* Not used, but eases macroization of patterns. */
13704 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13705 rtx input ATTRIBUTE_UNUSED)
13707 gcc_unreachable ();
13710 /* Convert an unsigned SImode value into a DFmode. Only currently used
13711 for SSE, but applicable anywhere. */
13714 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13716 REAL_VALUE_TYPE TWO31r;
13719 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13720 NULL, 1, OPTAB_DIRECT);
13722 fp = gen_reg_rtx (DFmode);
13723 emit_insn (gen_floatsidf2 (fp, x));
13725 real_ldexp (&TWO31r, &dconst1, 31);
13726 x = const_double_from_real_value (TWO31r, DFmode);
13728 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13730 emit_move_insn (target, x);
13733 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13734 32-bit mode; otherwise we have a direct convert instruction. */
13737 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13739 REAL_VALUE_TYPE TWO32r;
13740 rtx fp_lo, fp_hi, x;
13742 fp_lo = gen_reg_rtx (DFmode);
13743 fp_hi = gen_reg_rtx (DFmode);
13745 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13747 real_ldexp (&TWO32r, &dconst1, 32);
13748 x = const_double_from_real_value (TWO32r, DFmode);
13749 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13751 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13753 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13756 emit_move_insn (target, x);
13759 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13760 For x86_32, -mfpmath=sse, !optimize_size only. */
13762 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13764 REAL_VALUE_TYPE ONE16r;
13765 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13767 real_ldexp (&ONE16r, &dconst1, 16);
13768 x = const_double_from_real_value (ONE16r, SFmode);
13769 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13770 NULL, 0, OPTAB_DIRECT);
13771 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13772 NULL, 0, OPTAB_DIRECT);
13773 fp_hi = gen_reg_rtx (SFmode);
13774 fp_lo = gen_reg_rtx (SFmode);
13775 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13776 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13777 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13779 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13781 if (!rtx_equal_p (target, fp_hi))
13782 emit_move_insn (target, fp_hi);
13785 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13786 then replicate the value for all elements of the vector
13790 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13797 v = gen_rtvec (4, value, value, value, value);
13798 return gen_rtx_CONST_VECTOR (V4SImode, v);
13802 v = gen_rtvec (2, value, value);
13803 return gen_rtx_CONST_VECTOR (V2DImode, v);
13807 v = gen_rtvec (4, value, value, value, value);
13809 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13810 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13811 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13815 v = gen_rtvec (2, value, value);
13817 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13818 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13821 gcc_unreachable ();
13825 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13826 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13827 for an SSE register. If VECT is true, then replicate the mask for
13828 all elements of the vector register. If INVERT is true, then create
13829 a mask excluding the sign bit. */
13832 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13834 enum machine_mode vec_mode, imode;
13835 HOST_WIDE_INT hi, lo;
13840 /* Find the sign bit, sign extended to 2*HWI. */
13846 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13847 lo = 0x80000000, hi = lo < 0;
13853 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13854 if (HOST_BITS_PER_WIDE_INT >= 64)
13855 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13857 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13862 vec_mode = VOIDmode;
13863 if (HOST_BITS_PER_WIDE_INT >= 64)
13866 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13873 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13877 lo = ~lo, hi = ~hi;
13883 mask = immed_double_const (lo, hi, imode);
13885 vec = gen_rtvec (2, v, mask);
13886 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13887 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13894 gcc_unreachable ();
13898 lo = ~lo, hi = ~hi;
13900 /* Force this value into the low part of a fp vector constant. */
13901 mask = immed_double_const (lo, hi, imode);
13902 mask = gen_lowpart (mode, mask);
13904 if (vec_mode == VOIDmode)
13905 return force_reg (mode, mask);
13907 v = ix86_build_const_vector (mode, vect, mask);
13908 return force_reg (vec_mode, v);
13911 /* Generate code for floating point ABS or NEG. */
13914 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13917 rtx mask, set, use, clob, dst, src;
13918 bool use_sse = false;
13919 bool vector_mode = VECTOR_MODE_P (mode);
13920 enum machine_mode elt_mode = mode;
13924 elt_mode = GET_MODE_INNER (mode);
13927 else if (mode == TFmode)
13929 else if (TARGET_SSE_MATH)
13930 use_sse = SSE_FLOAT_MODE_P (mode);
13932 /* NEG and ABS performed with SSE use bitwise mask operations.
13933 Create the appropriate mask now. */
13935 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13944 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13945 set = gen_rtx_SET (VOIDmode, dst, set);
13950 set = gen_rtx_fmt_e (code, mode, src);
13951 set = gen_rtx_SET (VOIDmode, dst, set);
13954 use = gen_rtx_USE (VOIDmode, mask);
13955 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13956 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13957 gen_rtvec (3, set, use, clob)));
13964 /* Expand a copysign operation. Special case operand 0 being a constant. */
13967 ix86_expand_copysign (rtx operands[])
13969 enum machine_mode mode;
13970 rtx dest, op0, op1, mask, nmask;
13972 dest = operands[0];
13976 mode = GET_MODE (dest);
13978 if (GET_CODE (op0) == CONST_DOUBLE)
13980 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13982 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13983 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13985 if (mode == SFmode || mode == DFmode)
13987 enum machine_mode vmode;
13989 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13991 if (op0 == CONST0_RTX (mode))
13992 op0 = CONST0_RTX (vmode);
13997 if (mode == SFmode)
13998 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13999 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14001 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14003 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14006 else if (op0 != CONST0_RTX (mode))
14007 op0 = force_reg (mode, op0);
14009 mask = ix86_build_signbit_mask (mode, 0, 0);
14011 if (mode == SFmode)
14012 copysign_insn = gen_copysignsf3_const;
14013 else if (mode == DFmode)
14014 copysign_insn = gen_copysigndf3_const;
14016 copysign_insn = gen_copysigntf3_const;
14018 emit_insn (copysign_insn (dest, op0, op1, mask));
14022 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14024 nmask = ix86_build_signbit_mask (mode, 0, 1);
14025 mask = ix86_build_signbit_mask (mode, 0, 0);
14027 if (mode == SFmode)
14028 copysign_insn = gen_copysignsf3_var;
14029 else if (mode == DFmode)
14030 copysign_insn = gen_copysigndf3_var;
14032 copysign_insn = gen_copysigntf3_var;
14034 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14038 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14039 be a constant, and so has already been expanded into a vector constant. */
14042 ix86_split_copysign_const (rtx operands[])
14044 enum machine_mode mode, vmode;
14045 rtx dest, op0, op1, mask, x;
14047 dest = operands[0];
14050 mask = operands[3];
14052 mode = GET_MODE (dest);
14053 vmode = GET_MODE (mask);
14055 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14056 x = gen_rtx_AND (vmode, dest, mask);
14057 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14059 if (op0 != CONST0_RTX (vmode))
14061 x = gen_rtx_IOR (vmode, dest, op0);
14062 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14066 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14067 so we have to do two masks. */
14070 ix86_split_copysign_var (rtx operands[])
14072 enum machine_mode mode, vmode;
14073 rtx dest, scratch, op0, op1, mask, nmask, x;
14075 dest = operands[0];
14076 scratch = operands[1];
14079 nmask = operands[4];
14080 mask = operands[5];
14082 mode = GET_MODE (dest);
14083 vmode = GET_MODE (mask);
14085 if (rtx_equal_p (op0, op1))
14087 /* Shouldn't happen often (it's useless, obviously), but when it does
14088 we'd generate incorrect code if we continue below. */
14089 emit_move_insn (dest, op0);
14093 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14095 gcc_assert (REGNO (op1) == REGNO (scratch));
14097 x = gen_rtx_AND (vmode, scratch, mask);
14098 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14101 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14102 x = gen_rtx_NOT (vmode, dest);
14103 x = gen_rtx_AND (vmode, x, op0);
14104 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14108 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14110 x = gen_rtx_AND (vmode, scratch, mask);
14112 else /* alternative 2,4 */
14114 gcc_assert (REGNO (mask) == REGNO (scratch));
14115 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14116 x = gen_rtx_AND (vmode, scratch, op1);
14118 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14120 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14122 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14123 x = gen_rtx_AND (vmode, dest, nmask);
14125 else /* alternative 3,4 */
14127 gcc_assert (REGNO (nmask) == REGNO (dest));
14129 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14130 x = gen_rtx_AND (vmode, dest, op0);
14132 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14135 x = gen_rtx_IOR (vmode, dest, scratch);
14136 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14139 /* Return TRUE or FALSE depending on whether the first SET in INSN
14140 has source and destination with matching CC modes, and that the
14141 CC mode is at least as constrained as REQ_MODE. */
14144 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14147 enum machine_mode set_mode;
14149 set = PATTERN (insn);
14150 if (GET_CODE (set) == PARALLEL)
14151 set = XVECEXP (set, 0, 0);
14152 gcc_assert (GET_CODE (set) == SET);
14153 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14155 set_mode = GET_MODE (SET_DEST (set));
14159 if (req_mode != CCNOmode
14160 && (req_mode != CCmode
14161 || XEXP (SET_SRC (set), 1) != const0_rtx))
14165 if (req_mode == CCGCmode)
14169 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14173 if (req_mode == CCZmode)
14184 gcc_unreachable ();
14187 return (GET_MODE (SET_SRC (set)) == set_mode);
14190 /* Generate insn patterns to do an integer compare of OPERANDS. */
14193 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14195 enum machine_mode cmpmode;
14198 cmpmode = SELECT_CC_MODE (code, op0, op1);
14199 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14201 /* This is very simple, but making the interface the same as in the
14202 FP case makes the rest of the code easier. */
14203 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14204 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14206 /* Return the test that should be put into the flags user, i.e.
14207 the bcc, scc, or cmov instruction. */
14208 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14211 /* Figure out whether to use ordered or unordered fp comparisons.
14212 Return the appropriate mode to use. */
14215 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14217 /* ??? In order to make all comparisons reversible, we do all comparisons
14218 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14219 all forms trapping and nontrapping comparisons, we can make inequality
14220 comparisons trapping again, since it results in better code when using
14221 FCOM based compares. */
14222 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14226 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14228 enum machine_mode mode = GET_MODE (op0);
14230 if (SCALAR_FLOAT_MODE_P (mode))
14232 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14233 return ix86_fp_compare_mode (code);
14238 /* Only zero flag is needed. */
14239 case EQ: /* ZF=0 */
14240 case NE: /* ZF!=0 */
14242 /* Codes needing carry flag. */
14243 case GEU: /* CF=0 */
14244 case LTU: /* CF=1 */
14245 /* Detect overflow checks. They need just the carry flag. */
14246 if (GET_CODE (op0) == PLUS
14247 && rtx_equal_p (op1, XEXP (op0, 0)))
14251 case GTU: /* CF=0 & ZF=0 */
14252 case LEU: /* CF=1 | ZF=1 */
14253 /* Detect overflow checks. They need just the carry flag. */
14254 if (GET_CODE (op0) == MINUS
14255 && rtx_equal_p (op1, XEXP (op0, 0)))
14259 /* Codes possibly doable only with sign flag when
14260 comparing against zero. */
14261 case GE: /* SF=OF or SF=0 */
14262 case LT: /* SF<>OF or SF=1 */
14263 if (op1 == const0_rtx)
14266 /* For other cases Carry flag is not required. */
14268 /* Codes doable only with sign flag when comparing
14269 against zero, but we miss jump instruction for it
14270 so we need to use relational tests against overflow
14271 that thus needs to be zero. */
14272 case GT: /* ZF=0 & SF=OF */
14273 case LE: /* ZF=1 | SF<>OF */
14274 if (op1 == const0_rtx)
14278 /* strcmp pattern do (use flags) and combine may ask us for proper
14283 gcc_unreachable ();
14287 /* Return the fixed registers used for condition codes. */
14290 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14297 /* If two condition code modes are compatible, return a condition code
14298 mode which is compatible with both. Otherwise, return
14301 static enum machine_mode
14302 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14307 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14310 if ((m1 == CCGCmode && m2 == CCGOCmode)
14311 || (m1 == CCGOCmode && m2 == CCGCmode))
14317 gcc_unreachable ();
14347 /* These are only compatible with themselves, which we already
14353 /* Split comparison code CODE into comparisons we can do using branch
14354 instructions. BYPASS_CODE is comparison code for branch that will
14355 branch around FIRST_CODE and SECOND_CODE. If some of branches
14356 is not required, set value to UNKNOWN.
14357 We never require more than two branches. */
14360 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14361 enum rtx_code *first_code,
14362 enum rtx_code *second_code)
14364 *first_code = code;
14365 *bypass_code = UNKNOWN;
14366 *second_code = UNKNOWN;
14368 /* The fcomi comparison sets flags as follows:
14378 case GT: /* GTU - CF=0 & ZF=0 */
14379 case GE: /* GEU - CF=0 */
14380 case ORDERED: /* PF=0 */
14381 case UNORDERED: /* PF=1 */
14382 case UNEQ: /* EQ - ZF=1 */
14383 case UNLT: /* LTU - CF=1 */
14384 case UNLE: /* LEU - CF=1 | ZF=1 */
14385 case LTGT: /* EQ - ZF=0 */
14387 case LT: /* LTU - CF=1 - fails on unordered */
14388 *first_code = UNLT;
14389 *bypass_code = UNORDERED;
14391 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14392 *first_code = UNLE;
14393 *bypass_code = UNORDERED;
14395 case EQ: /* EQ - ZF=1 - fails on unordered */
14396 *first_code = UNEQ;
14397 *bypass_code = UNORDERED;
14399 case NE: /* NE - ZF=0 - fails on unordered */
14400 *first_code = LTGT;
14401 *second_code = UNORDERED;
14403 case UNGE: /* GEU - CF=0 - fails on unordered */
14405 *second_code = UNORDERED;
14407 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14409 *second_code = UNORDERED;
14412 gcc_unreachable ();
14414 if (!TARGET_IEEE_FP)
14416 *second_code = UNKNOWN;
14417 *bypass_code = UNKNOWN;
14421 /* Return cost of comparison done fcom + arithmetics operations on AX.
14422 All following functions do use number of instructions as a cost metrics.
14423 In future this should be tweaked to compute bytes for optimize_size and
14424 take into account performance of various instructions on various CPUs. */
14426 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14428 if (!TARGET_IEEE_FP)
14430 /* The cost of code output by ix86_expand_fp_compare. */
14454 gcc_unreachable ();
14458 /* Return cost of comparison done using fcomi operation.
14459 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14461 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14463 enum rtx_code bypass_code, first_code, second_code;
14464 /* Return arbitrarily high cost when instruction is not supported - this
14465 prevents gcc from using it. */
14468 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14469 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14472 /* Return cost of comparison done using sahf operation.
14473 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14475 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14477 enum rtx_code bypass_code, first_code, second_code;
14478 /* Return arbitrarily high cost when instruction is not preferred - this
14479 avoids gcc from using it. */
14480 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14482 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14483 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14486 /* Compute cost of the comparison done using any method.
14487 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14489 ix86_fp_comparison_cost (enum rtx_code code)
14491 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14494 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14495 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14497 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14498 if (min > sahf_cost)
14500 if (min > fcomi_cost)
14505 /* Return true if we should use an FCOMI instruction for this
14509 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14511 enum rtx_code swapped_code = swap_condition (code);
14513 return ((ix86_fp_comparison_cost (code)
14514 == ix86_fp_comparison_fcomi_cost (code))
14515 || (ix86_fp_comparison_cost (swapped_code)
14516 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14519 /* Swap, force into registers, or otherwise massage the two operands
14520 to a fp comparison. The operands are updated in place; the new
14521 comparison code is returned. */
14523 static enum rtx_code
14524 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14526 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14527 rtx op0 = *pop0, op1 = *pop1;
14528 enum machine_mode op_mode = GET_MODE (op0);
14529 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14531 /* All of the unordered compare instructions only work on registers.
14532 The same is true of the fcomi compare instructions. The XFmode
14533 compare instructions require registers except when comparing
14534 against zero or when converting operand 1 from fixed point to
14538 && (fpcmp_mode == CCFPUmode
14539 || (op_mode == XFmode
14540 && ! (standard_80387_constant_p (op0) == 1
14541 || standard_80387_constant_p (op1) == 1)
14542 && GET_CODE (op1) != FLOAT)
14543 || ix86_use_fcomi_compare (code)))
14545 op0 = force_reg (op_mode, op0);
14546 op1 = force_reg (op_mode, op1);
14550 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14551 things around if they appear profitable, otherwise force op0
14552 into a register. */
14554 if (standard_80387_constant_p (op0) == 0
14556 && ! (standard_80387_constant_p (op1) == 0
14560 tmp = op0, op0 = op1, op1 = tmp;
14561 code = swap_condition (code);
14565 op0 = force_reg (op_mode, op0);
14567 if (CONSTANT_P (op1))
14569 int tmp = standard_80387_constant_p (op1);
14571 op1 = validize_mem (force_const_mem (op_mode, op1));
14575 op1 = force_reg (op_mode, op1);
14578 op1 = force_reg (op_mode, op1);
14582 /* Try to rearrange the comparison to make it cheaper. */
14583 if (ix86_fp_comparison_cost (code)
14584 > ix86_fp_comparison_cost (swap_condition (code))
14585 && (REG_P (op1) || can_create_pseudo_p ()))
14588 tmp = op0, op0 = op1, op1 = tmp;
14589 code = swap_condition (code);
14591 op0 = force_reg (op_mode, op0);
14599 /* Convert comparison codes we use to represent FP comparison to integer
14600 code that will result in proper branch. Return UNKNOWN if no such code
14604 ix86_fp_compare_code_to_integer (enum rtx_code code)
14633 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14636 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14637 rtx *second_test, rtx *bypass_test)
14639 enum machine_mode fpcmp_mode, intcmp_mode;
14641 int cost = ix86_fp_comparison_cost (code);
14642 enum rtx_code bypass_code, first_code, second_code;
14644 fpcmp_mode = ix86_fp_compare_mode (code);
14645 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14648 *second_test = NULL_RTX;
14650 *bypass_test = NULL_RTX;
14652 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14654 /* Do fcomi/sahf based test when profitable. */
14655 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14656 && (bypass_code == UNKNOWN || bypass_test)
14657 && (second_code == UNKNOWN || second_test))
14659 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14660 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14666 gcc_assert (TARGET_SAHF);
14669 scratch = gen_reg_rtx (HImode);
14670 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14672 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14675 /* The FP codes work out to act like unsigned. */
14676 intcmp_mode = fpcmp_mode;
14678 if (bypass_code != UNKNOWN)
14679 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14680 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14682 if (second_code != UNKNOWN)
14683 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14684 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14689 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14690 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14691 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14693 scratch = gen_reg_rtx (HImode);
14694 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14696 /* In the unordered case, we have to check C2 for NaN's, which
14697 doesn't happen to work out to anything nice combination-wise.
14698 So do some bit twiddling on the value we've got in AH to come
14699 up with an appropriate set of condition codes. */
14701 intcmp_mode = CCNOmode;
14706 if (code == GT || !TARGET_IEEE_FP)
14708 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14713 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14714 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14715 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14716 intcmp_mode = CCmode;
14722 if (code == LT && TARGET_IEEE_FP)
14724 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14725 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14726 intcmp_mode = CCmode;
14731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14737 if (code == GE || !TARGET_IEEE_FP)
14739 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14744 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14745 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14752 if (code == LE && TARGET_IEEE_FP)
14754 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14755 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14756 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14757 intcmp_mode = CCmode;
14762 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14768 if (code == EQ && TARGET_IEEE_FP)
14770 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14771 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14772 intcmp_mode = CCmode;
14777 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14784 if (code == NE && TARGET_IEEE_FP)
14786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14787 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14799 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14803 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14808 gcc_unreachable ();
14812 /* Return the test that should be put into the flags user, i.e.
14813 the bcc, scc, or cmov instruction. */
14814 return gen_rtx_fmt_ee (code, VOIDmode,
14815 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14820 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14823 op0 = ix86_compare_op0;
14824 op1 = ix86_compare_op1;
14827 *second_test = NULL_RTX;
14829 *bypass_test = NULL_RTX;
14831 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14832 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14834 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14836 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14837 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14838 second_test, bypass_test);
14841 ret = ix86_expand_int_compare (code, op0, op1);
14846 /* Return true if the CODE will result in nontrivial jump sequence. */
14848 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14850 enum rtx_code bypass_code, first_code, second_code;
14853 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14854 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14858 ix86_expand_branch (enum rtx_code code, rtx label)
14862 switch (GET_MODE (ix86_compare_op0))
14868 tmp = ix86_expand_compare (code, NULL, NULL);
14869 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14870 gen_rtx_LABEL_REF (VOIDmode, label),
14872 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14881 enum rtx_code bypass_code, first_code, second_code;
14883 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14884 &ix86_compare_op1);
14886 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14888 /* Check whether we will use the natural sequence with one jump. If
14889 so, we can expand jump early. Otherwise delay expansion by
14890 creating compound insn to not confuse optimizers. */
14891 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14893 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14894 gen_rtx_LABEL_REF (VOIDmode, label),
14895 pc_rtx, NULL_RTX, NULL_RTX);
14899 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14900 ix86_compare_op0, ix86_compare_op1);
14901 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14902 gen_rtx_LABEL_REF (VOIDmode, label),
14904 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14906 use_fcomi = ix86_use_fcomi_compare (code);
14907 vec = rtvec_alloc (3 + !use_fcomi);
14908 RTVEC_ELT (vec, 0) = tmp;
14910 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14912 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14915 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14917 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14926 /* Expand DImode branch into multiple compare+branch. */
14928 rtx lo[2], hi[2], label2;
14929 enum rtx_code code1, code2, code3;
14930 enum machine_mode submode;
14932 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14934 tmp = ix86_compare_op0;
14935 ix86_compare_op0 = ix86_compare_op1;
14936 ix86_compare_op1 = tmp;
14937 code = swap_condition (code);
14939 if (GET_MODE (ix86_compare_op0) == DImode)
14941 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14942 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14947 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14948 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14952 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14953 avoid two branches. This costs one extra insn, so disable when
14954 optimizing for size. */
14956 if ((code == EQ || code == NE)
14957 && (!optimize_insn_for_size_p ()
14958 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14963 if (hi[1] != const0_rtx)
14964 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14965 NULL_RTX, 0, OPTAB_WIDEN);
14968 if (lo[1] != const0_rtx)
14969 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14970 NULL_RTX, 0, OPTAB_WIDEN);
14972 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14973 NULL_RTX, 0, OPTAB_WIDEN);
14975 ix86_compare_op0 = tmp;
14976 ix86_compare_op1 = const0_rtx;
14977 ix86_expand_branch (code, label);
14981 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14982 op1 is a constant and the low word is zero, then we can just
14983 examine the high word. Similarly for low word -1 and
14984 less-or-equal-than or greater-than. */
14986 if (CONST_INT_P (hi[1]))
14989 case LT: case LTU: case GE: case GEU:
14990 if (lo[1] == const0_rtx)
14992 ix86_compare_op0 = hi[0];
14993 ix86_compare_op1 = hi[1];
14994 ix86_expand_branch (code, label);
14998 case LE: case LEU: case GT: case GTU:
14999 if (lo[1] == constm1_rtx)
15001 ix86_compare_op0 = hi[0];
15002 ix86_compare_op1 = hi[1];
15003 ix86_expand_branch (code, label);
15011 /* Otherwise, we need two or three jumps. */
15013 label2 = gen_label_rtx ();
15016 code2 = swap_condition (code);
15017 code3 = unsigned_condition (code);
15021 case LT: case GT: case LTU: case GTU:
15024 case LE: code1 = LT; code2 = GT; break;
15025 case GE: code1 = GT; code2 = LT; break;
15026 case LEU: code1 = LTU; code2 = GTU; break;
15027 case GEU: code1 = GTU; code2 = LTU; break;
15029 case EQ: code1 = UNKNOWN; code2 = NE; break;
15030 case NE: code2 = UNKNOWN; break;
15033 gcc_unreachable ();
15038 * if (hi(a) < hi(b)) goto true;
15039 * if (hi(a) > hi(b)) goto false;
15040 * if (lo(a) < lo(b)) goto true;
15044 ix86_compare_op0 = hi[0];
15045 ix86_compare_op1 = hi[1];
15047 if (code1 != UNKNOWN)
15048 ix86_expand_branch (code1, label);
15049 if (code2 != UNKNOWN)
15050 ix86_expand_branch (code2, label2);
15052 ix86_compare_op0 = lo[0];
15053 ix86_compare_op1 = lo[1];
15054 ix86_expand_branch (code3, label);
15056 if (code2 != UNKNOWN)
15057 emit_label (label2);
15062 /* If we have already emitted a compare insn, go straight to simple.
15063 ix86_expand_compare won't emit anything if ix86_compare_emitted
15065 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15070 /* Split branch based on floating point condition. */
15072 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15073 rtx target1, rtx target2, rtx tmp, rtx pushed)
15075 rtx second, bypass;
15076 rtx label = NULL_RTX;
15078 int bypass_probability = -1, second_probability = -1, probability = -1;
15081 if (target2 != pc_rtx)
15084 code = reverse_condition_maybe_unordered (code);
15089 condition = ix86_expand_fp_compare (code, op1, op2,
15090 tmp, &second, &bypass);
15092 /* Remove pushed operand from stack. */
15094 ix86_free_from_memory (GET_MODE (pushed));
15096 if (split_branch_probability >= 0)
15098 /* Distribute the probabilities across the jumps.
15099 Assume the BYPASS and SECOND to be always test
15101 probability = split_branch_probability;
15103 /* Value of 1 is low enough to make no need for probability
15104 to be updated. Later we may run some experiments and see
15105 if unordered values are more frequent in practice. */
15107 bypass_probability = 1;
15109 second_probability = 1;
15111 if (bypass != NULL_RTX)
15113 label = gen_label_rtx ();
15114 i = emit_jump_insn (gen_rtx_SET
15116 gen_rtx_IF_THEN_ELSE (VOIDmode,
15118 gen_rtx_LABEL_REF (VOIDmode,
15121 if (bypass_probability >= 0)
15122 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
15124 i = emit_jump_insn (gen_rtx_SET
15126 gen_rtx_IF_THEN_ELSE (VOIDmode,
15127 condition, target1, target2)));
15128 if (probability >= 0)
15129 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
15130 if (second != NULL_RTX)
15132 i = emit_jump_insn (gen_rtx_SET
15134 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
15136 if (second_probability >= 0)
15137 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
15139 if (label != NULL_RTX)
15140 emit_label (label);
15144 ix86_expand_setcc (enum rtx_code code, rtx dest)
15146 rtx ret, tmp, tmpreg, equiv;
15147 rtx second_test, bypass_test;
15149 gcc_assert (GET_MODE (dest) == QImode);
15151 ret = ix86_expand_compare (code, &second_test, &bypass_test);
15152 PUT_MODE (ret, QImode);
15157 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
15158 if (bypass_test || second_test)
15160 rtx test = second_test;
15162 rtx tmp2 = gen_reg_rtx (QImode);
15165 gcc_assert (!second_test);
15166 test = bypass_test;
15168 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
15170 PUT_MODE (test, QImode);
15171 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
15174 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
15176 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
15179 /* Attach a REG_EQUAL note describing the comparison result. */
15180 if (ix86_compare_op0 && ix86_compare_op1)
15182 equiv = simplify_gen_relational (code, QImode,
15183 GET_MODE (ix86_compare_op0),
15184 ix86_compare_op0, ix86_compare_op1);
15185 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
15189 /* Expand comparison setting or clearing carry flag. Return true when
15190 successful and set pop for the operation. */
15192 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15194 enum machine_mode mode =
15195 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15197 /* Do not handle DImode compares that go through special path. */
15198 if (mode == (TARGET_64BIT ? TImode : DImode))
15201 if (SCALAR_FLOAT_MODE_P (mode))
15203 rtx second_test = NULL, bypass_test = NULL;
15204 rtx compare_op, compare_seq;
15206 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15208 /* Shortcut: following common codes never translate
15209 into carry flag compares. */
15210 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15211 || code == ORDERED || code == UNORDERED)
15214 /* These comparisons require zero flag; swap operands so they won't. */
15215 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15216 && !TARGET_IEEE_FP)
15221 code = swap_condition (code);
15224 /* Try to expand the comparison and verify that we end up with
15225 carry flag based comparison. This fails to be true only when
15226 we decide to expand comparison using arithmetic that is not
15227 too common scenario. */
15229 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15230 &second_test, &bypass_test);
15231 compare_seq = get_insns ();
15234 if (second_test || bypass_test)
15237 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15238 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15239 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15241 code = GET_CODE (compare_op);
15243 if (code != LTU && code != GEU)
15246 emit_insn (compare_seq);
15251 if (!INTEGRAL_MODE_P (mode))
15260 /* Convert a==0 into (unsigned)a<1. */
15263 if (op1 != const0_rtx)
15266 code = (code == EQ ? LTU : GEU);
15269 /* Convert a>b into b<a or a>=b-1. */
15272 if (CONST_INT_P (op1))
15274 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15275 /* Bail out on overflow. We still can swap operands but that
15276 would force loading of the constant into register. */
15277 if (op1 == const0_rtx
15278 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15280 code = (code == GTU ? GEU : LTU);
15287 code = (code == GTU ? LTU : GEU);
15291 /* Convert a>=0 into (unsigned)a<0x80000000. */
15294 if (mode == DImode || op1 != const0_rtx)
15296 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15297 code = (code == LT ? GEU : LTU);
15301 if (mode == DImode || op1 != constm1_rtx)
15303 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15304 code = (code == LE ? GEU : LTU);
15310 /* Swapping operands may cause constant to appear as first operand. */
15311 if (!nonimmediate_operand (op0, VOIDmode))
15313 if (!can_create_pseudo_p ())
15315 op0 = force_reg (mode, op0);
15317 ix86_compare_op0 = op0;
15318 ix86_compare_op1 = op1;
15319 *pop = ix86_expand_compare (code, NULL, NULL);
15320 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15325 ix86_expand_int_movcc (rtx operands[])
15327 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15328 rtx compare_seq, compare_op;
15329 rtx second_test, bypass_test;
15330 enum machine_mode mode = GET_MODE (operands[0]);
15331 bool sign_bit_compare_p = false;;
15334 ix86_compare_op0 = XEXP (operands[1], 0);
15335 ix86_compare_op1 = XEXP (operands[1], 1);
15336 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15337 compare_seq = get_insns ();
15340 compare_code = GET_CODE (compare_op);
15342 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15343 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15344 sign_bit_compare_p = true;
15346 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15347 HImode insns, we'd be swallowed in word prefix ops. */
15349 if ((mode != HImode || TARGET_FAST_PREFIX)
15350 && (mode != (TARGET_64BIT ? TImode : DImode))
15351 && CONST_INT_P (operands[2])
15352 && CONST_INT_P (operands[3]))
15354 rtx out = operands[0];
15355 HOST_WIDE_INT ct = INTVAL (operands[2]);
15356 HOST_WIDE_INT cf = INTVAL (operands[3]);
15357 HOST_WIDE_INT diff;
15360 /* Sign bit compares are better done using shifts than we do by using
15362 if (sign_bit_compare_p
15363 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15364 ix86_compare_op1, &compare_op))
15366 /* Detect overlap between destination and compare sources. */
15369 if (!sign_bit_compare_p)
15371 bool fpcmp = false;
15373 compare_code = GET_CODE (compare_op);
15375 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15376 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15379 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15382 /* To simplify rest of code, restrict to the GEU case. */
15383 if (compare_code == LTU)
15385 HOST_WIDE_INT tmp = ct;
15388 compare_code = reverse_condition (compare_code);
15389 code = reverse_condition (code);
15394 PUT_CODE (compare_op,
15395 reverse_condition_maybe_unordered
15396 (GET_CODE (compare_op)));
15398 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15402 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15403 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15404 tmp = gen_reg_rtx (mode);
15406 if (mode == DImode)
15407 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15409 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15413 if (code == GT || code == GE)
15414 code = reverse_condition (code);
15417 HOST_WIDE_INT tmp = ct;
15422 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15423 ix86_compare_op1, VOIDmode, 0, -1);
15436 tmp = expand_simple_binop (mode, PLUS,
15438 copy_rtx (tmp), 1, OPTAB_DIRECT);
15449 tmp = expand_simple_binop (mode, IOR,
15451 copy_rtx (tmp), 1, OPTAB_DIRECT);
15453 else if (diff == -1 && ct)
15463 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15465 tmp = expand_simple_binop (mode, PLUS,
15466 copy_rtx (tmp), GEN_INT (cf),
15467 copy_rtx (tmp), 1, OPTAB_DIRECT);
15475 * andl cf - ct, dest
15485 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15488 tmp = expand_simple_binop (mode, AND,
15490 gen_int_mode (cf - ct, mode),
15491 copy_rtx (tmp), 1, OPTAB_DIRECT);
15493 tmp = expand_simple_binop (mode, PLUS,
15494 copy_rtx (tmp), GEN_INT (ct),
15495 copy_rtx (tmp), 1, OPTAB_DIRECT);
15498 if (!rtx_equal_p (tmp, out))
15499 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15501 return 1; /* DONE */
15506 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15509 tmp = ct, ct = cf, cf = tmp;
15512 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15514 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15516 /* We may be reversing unordered compare to normal compare, that
15517 is not valid in general (we may convert non-trapping condition
15518 to trapping one), however on i386 we currently emit all
15519 comparisons unordered. */
15520 compare_code = reverse_condition_maybe_unordered (compare_code);
15521 code = reverse_condition_maybe_unordered (code);
15525 compare_code = reverse_condition (compare_code);
15526 code = reverse_condition (code);
15530 compare_code = UNKNOWN;
15531 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15532 && CONST_INT_P (ix86_compare_op1))
15534 if (ix86_compare_op1 == const0_rtx
15535 && (code == LT || code == GE))
15536 compare_code = code;
15537 else if (ix86_compare_op1 == constm1_rtx)
15541 else if (code == GT)
15546 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15547 if (compare_code != UNKNOWN
15548 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15549 && (cf == -1 || ct == -1))
15551 /* If lea code below could be used, only optimize
15552 if it results in a 2 insn sequence. */
15554 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15555 || diff == 3 || diff == 5 || diff == 9)
15556 || (compare_code == LT && ct == -1)
15557 || (compare_code == GE && cf == -1))
15560 * notl op1 (if necessary)
15568 code = reverse_condition (code);
15571 out = emit_store_flag (out, code, ix86_compare_op0,
15572 ix86_compare_op1, VOIDmode, 0, -1);
15574 out = expand_simple_binop (mode, IOR,
15576 out, 1, OPTAB_DIRECT);
15577 if (out != operands[0])
15578 emit_move_insn (operands[0], out);
15580 return 1; /* DONE */
15585 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15586 || diff == 3 || diff == 5 || diff == 9)
15587 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15589 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15595 * lea cf(dest*(ct-cf)),dest
15599 * This also catches the degenerate setcc-only case.
15605 out = emit_store_flag (out, code, ix86_compare_op0,
15606 ix86_compare_op1, VOIDmode, 0, 1);
15609 /* On x86_64 the lea instruction operates on Pmode, so we need
15610 to get arithmetics done in proper mode to match. */
15612 tmp = copy_rtx (out);
15616 out1 = copy_rtx (out);
15617 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15621 tmp = gen_rtx_PLUS (mode, tmp, out1);
15627 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15630 if (!rtx_equal_p (tmp, out))
15633 out = force_operand (tmp, copy_rtx (out));
15635 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15637 if (!rtx_equal_p (out, operands[0]))
15638 emit_move_insn (operands[0], copy_rtx (out));
15640 return 1; /* DONE */
15644 * General case: Jumpful:
15645 * xorl dest,dest cmpl op1, op2
15646 * cmpl op1, op2 movl ct, dest
15647 * setcc dest jcc 1f
15648 * decl dest movl cf, dest
15649 * andl (cf-ct),dest 1:
15652 * Size 20. Size 14.
15654 * This is reasonably steep, but branch mispredict costs are
15655 * high on modern cpus, so consider failing only if optimizing
15659 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15660 && BRANCH_COST (optimize_insn_for_speed_p (),
15665 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15670 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15672 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15674 /* We may be reversing unordered compare to normal compare,
15675 that is not valid in general (we may convert non-trapping
15676 condition to trapping one), however on i386 we currently
15677 emit all comparisons unordered. */
15678 code = reverse_condition_maybe_unordered (code);
15682 code = reverse_condition (code);
15683 if (compare_code != UNKNOWN)
15684 compare_code = reverse_condition (compare_code);
15688 if (compare_code != UNKNOWN)
15690 /* notl op1 (if needed)
15695 For x < 0 (resp. x <= -1) there will be no notl,
15696 so if possible swap the constants to get rid of the
15698 True/false will be -1/0 while code below (store flag
15699 followed by decrement) is 0/-1, so the constants need
15700 to be exchanged once more. */
15702 if (compare_code == GE || !cf)
15704 code = reverse_condition (code);
15709 HOST_WIDE_INT tmp = cf;
15714 out = emit_store_flag (out, code, ix86_compare_op0,
15715 ix86_compare_op1, VOIDmode, 0, -1);
15719 out = emit_store_flag (out, code, ix86_compare_op0,
15720 ix86_compare_op1, VOIDmode, 0, 1);
15722 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15723 copy_rtx (out), 1, OPTAB_DIRECT);
15726 out = expand_simple_binop (mode, AND, copy_rtx (out),
15727 gen_int_mode (cf - ct, mode),
15728 copy_rtx (out), 1, OPTAB_DIRECT);
15730 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15731 copy_rtx (out), 1, OPTAB_DIRECT);
15732 if (!rtx_equal_p (out, operands[0]))
15733 emit_move_insn (operands[0], copy_rtx (out));
15735 return 1; /* DONE */
15739 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15741 /* Try a few things more with specific constants and a variable. */
15744 rtx var, orig_out, out, tmp;
15746 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15747 return 0; /* FAIL */
15749 /* If one of the two operands is an interesting constant, load a
15750 constant with the above and mask it in with a logical operation. */
15752 if (CONST_INT_P (operands[2]))
15755 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15756 operands[3] = constm1_rtx, op = and_optab;
15757 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15758 operands[3] = const0_rtx, op = ior_optab;
15760 return 0; /* FAIL */
15762 else if (CONST_INT_P (operands[3]))
15765 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15766 operands[2] = constm1_rtx, op = and_optab;
15767 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15768 operands[2] = const0_rtx, op = ior_optab;
15770 return 0; /* FAIL */
15773 return 0; /* FAIL */
15775 orig_out = operands[0];
15776 tmp = gen_reg_rtx (mode);
15779 /* Recurse to get the constant loaded. */
15780 if (ix86_expand_int_movcc (operands) == 0)
15781 return 0; /* FAIL */
15783 /* Mask in the interesting variable. */
15784 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15786 if (!rtx_equal_p (out, orig_out))
15787 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15789 return 1; /* DONE */
15793 * For comparison with above,
15803 if (! nonimmediate_operand (operands[2], mode))
15804 operands[2] = force_reg (mode, operands[2]);
15805 if (! nonimmediate_operand (operands[3], mode))
15806 operands[3] = force_reg (mode, operands[3]);
15808 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15810 rtx tmp = gen_reg_rtx (mode);
15811 emit_move_insn (tmp, operands[3]);
15814 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15816 rtx tmp = gen_reg_rtx (mode);
15817 emit_move_insn (tmp, operands[2]);
15821 if (! register_operand (operands[2], VOIDmode)
15823 || ! register_operand (operands[3], VOIDmode)))
15824 operands[2] = force_reg (mode, operands[2]);
15827 && ! register_operand (operands[3], VOIDmode))
15828 operands[3] = force_reg (mode, operands[3]);
15830 emit_insn (compare_seq);
15831 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15832 gen_rtx_IF_THEN_ELSE (mode,
15833 compare_op, operands[2],
15836 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15837 gen_rtx_IF_THEN_ELSE (mode,
15839 copy_rtx (operands[3]),
15840 copy_rtx (operands[0]))));
15842 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15843 gen_rtx_IF_THEN_ELSE (mode,
15845 copy_rtx (operands[2]),
15846 copy_rtx (operands[0]))));
15848 return 1; /* DONE */
15851 /* Swap, force into registers, or otherwise massage the two operands
15852 to an sse comparison with a mask result. Thus we differ a bit from
15853 ix86_prepare_fp_compare_args which expects to produce a flags result.
15855 The DEST operand exists to help determine whether to commute commutative
15856 operators. The POP0/POP1 operands are updated in place. The new
15857 comparison code is returned, or UNKNOWN if not implementable. */
15859 static enum rtx_code
15860 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15861 rtx *pop0, rtx *pop1)
15869 /* We have no LTGT as an operator. We could implement it with
15870 NE & ORDERED, but this requires an extra temporary. It's
15871 not clear that it's worth it. */
15878 /* These are supported directly. */
15885 /* For commutative operators, try to canonicalize the destination
15886 operand to be first in the comparison - this helps reload to
15887 avoid extra moves. */
15888 if (!dest || !rtx_equal_p (dest, *pop1))
15896 /* These are not supported directly. Swap the comparison operands
15897 to transform into something that is supported. */
15901 code = swap_condition (code);
15905 gcc_unreachable ();
15911 /* Detect conditional moves that exactly match min/max operational
15912 semantics. Note that this is IEEE safe, as long as we don't
15913 interchange the operands.
15915 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15916 and TRUE if the operation is successful and instructions are emitted. */
15919 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15920 rtx cmp_op1, rtx if_true, rtx if_false)
15922 enum machine_mode mode;
15928 else if (code == UNGE)
15931 if_true = if_false;
15937 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15939 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15944 mode = GET_MODE (dest);
15946 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15947 but MODE may be a vector mode and thus not appropriate. */
15948 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15950 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15953 if_true = force_reg (mode, if_true);
15954 v = gen_rtvec (2, if_true, if_false);
15955 tmp = gen_rtx_UNSPEC (mode, v, u);
15959 code = is_min ? SMIN : SMAX;
15960 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15963 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15967 /* Expand an sse vector comparison. Return the register with the result. */
15970 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15971 rtx op_true, rtx op_false)
15973 enum machine_mode mode = GET_MODE (dest);
15976 cmp_op0 = force_reg (mode, cmp_op0);
15977 if (!nonimmediate_operand (cmp_op1, mode))
15978 cmp_op1 = force_reg (mode, cmp_op1);
15981 || reg_overlap_mentioned_p (dest, op_true)
15982 || reg_overlap_mentioned_p (dest, op_false))
15983 dest = gen_reg_rtx (mode);
15985 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15986 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15991 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15992 operations. This is used for both scalar and vector conditional moves. */
15995 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15997 enum machine_mode mode = GET_MODE (dest);
16000 if (op_false == CONST0_RTX (mode))
16002 op_true = force_reg (mode, op_true);
16003 x = gen_rtx_AND (mode, cmp, op_true);
16004 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16006 else if (op_true == CONST0_RTX (mode))
16008 op_false = force_reg (mode, op_false);
16009 x = gen_rtx_NOT (mode, cmp);
16010 x = gen_rtx_AND (mode, x, op_false);
16011 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16013 else if (TARGET_SSE5)
16015 rtx pcmov = gen_rtx_SET (mode, dest,
16016 gen_rtx_IF_THEN_ELSE (mode, cmp,
16023 op_true = force_reg (mode, op_true);
16024 op_false = force_reg (mode, op_false);
16026 t2 = gen_reg_rtx (mode);
16028 t3 = gen_reg_rtx (mode);
16032 x = gen_rtx_AND (mode, op_true, cmp);
16033 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16035 x = gen_rtx_NOT (mode, cmp);
16036 x = gen_rtx_AND (mode, x, op_false);
16037 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16039 x = gen_rtx_IOR (mode, t3, t2);
16040 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16044 /* Expand a floating-point conditional move. Return true if successful. */
16047 ix86_expand_fp_movcc (rtx operands[])
16049 enum machine_mode mode = GET_MODE (operands[0]);
16050 enum rtx_code code = GET_CODE (operands[1]);
16051 rtx tmp, compare_op, second_test, bypass_test;
16053 ix86_compare_op0 = XEXP (operands[1], 0);
16054 ix86_compare_op1 = XEXP (operands[1], 1);
16055 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16057 enum machine_mode cmode;
16059 /* Since we've no cmove for sse registers, don't force bad register
16060 allocation just to gain access to it. Deny movcc when the
16061 comparison mode doesn't match the move mode. */
16062 cmode = GET_MODE (ix86_compare_op0);
16063 if (cmode == VOIDmode)
16064 cmode = GET_MODE (ix86_compare_op1);
16068 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16070 &ix86_compare_op1);
16071 if (code == UNKNOWN)
16074 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16075 ix86_compare_op1, operands[2],
16079 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16080 ix86_compare_op1, operands[2], operands[3]);
16081 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16085 /* The floating point conditional move instructions don't directly
16086 support conditions resulting from a signed integer comparison. */
16088 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
16090 /* The floating point conditional move instructions don't directly
16091 support signed integer comparisons. */
16093 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16095 gcc_assert (!second_test && !bypass_test);
16096 tmp = gen_reg_rtx (QImode);
16097 ix86_expand_setcc (code, tmp);
16099 ix86_compare_op0 = tmp;
16100 ix86_compare_op1 = const0_rtx;
16101 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
16103 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
16105 tmp = gen_reg_rtx (mode);
16106 emit_move_insn (tmp, operands[3]);
16109 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
16111 tmp = gen_reg_rtx (mode);
16112 emit_move_insn (tmp, operands[2]);
16116 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16117 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16118 operands[2], operands[3])));
16120 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16121 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
16122 operands[3], operands[0])));
16124 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16125 gen_rtx_IF_THEN_ELSE (mode, second_test,
16126 operands[2], operands[0])));
16131 /* Expand a floating-point vector conditional move; a vcond operation
16132 rather than a movcc operation. */
16135 ix86_expand_fp_vcond (rtx operands[])
16137 enum rtx_code code = GET_CODE (operands[3]);
16140 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16141 &operands[4], &operands[5]);
16142 if (code == UNKNOWN)
16145 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16146 operands[5], operands[1], operands[2]))
16149 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16150 operands[1], operands[2]);
16151 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16155 /* Expand a signed/unsigned integral vector conditional move. */
16158 ix86_expand_int_vcond (rtx operands[])
16160 enum machine_mode mode = GET_MODE (operands[0]);
16161 enum rtx_code code = GET_CODE (operands[3]);
16162 bool negate = false;
16165 cop0 = operands[4];
16166 cop1 = operands[5];
16168 /* SSE5 supports all of the comparisons on all vector int types. */
16171 /* Canonicalize the comparison to EQ, GT, GTU. */
16182 code = reverse_condition (code);
16188 code = reverse_condition (code);
16194 code = swap_condition (code);
16195 x = cop0, cop0 = cop1, cop1 = x;
16199 gcc_unreachable ();
16202 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16203 if (mode == V2DImode)
16208 /* SSE4.1 supports EQ. */
16209 if (!TARGET_SSE4_1)
16215 /* SSE4.2 supports GT/GTU. */
16216 if (!TARGET_SSE4_2)
16221 gcc_unreachable ();
16225 /* Unsigned parallel compare is not supported by the hardware. Play some
16226 tricks to turn this into a signed comparison against 0. */
16229 cop0 = force_reg (mode, cop0);
16238 /* Perform a parallel modulo subtraction. */
16239 t1 = gen_reg_rtx (mode);
16240 emit_insn ((mode == V4SImode
16242 : gen_subv2di3) (t1, cop0, cop1));
16244 /* Extract the original sign bit of op0. */
16245 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16247 t2 = gen_reg_rtx (mode);
16248 emit_insn ((mode == V4SImode
16250 : gen_andv2di3) (t2, cop0, mask));
16252 /* XOR it back into the result of the subtraction. This results
16253 in the sign bit set iff we saw unsigned underflow. */
16254 x = gen_reg_rtx (mode);
16255 emit_insn ((mode == V4SImode
16257 : gen_xorv2di3) (x, t1, t2));
16265 /* Perform a parallel unsigned saturating subtraction. */
16266 x = gen_reg_rtx (mode);
16267 emit_insn (gen_rtx_SET (VOIDmode, x,
16268 gen_rtx_US_MINUS (mode, cop0, cop1)));
16275 gcc_unreachable ();
16279 cop1 = CONST0_RTX (mode);
16283 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16284 operands[1+negate], operands[2-negate]);
16286 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16287 operands[2-negate]);
16291 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16292 true if we should do zero extension, else sign extension. HIGH_P is
16293 true if we want the N/2 high elements, else the low elements. */
16296 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16298 enum machine_mode imode = GET_MODE (operands[1]);
16299 rtx (*unpack)(rtx, rtx, rtx);
16306 unpack = gen_vec_interleave_highv16qi;
16308 unpack = gen_vec_interleave_lowv16qi;
16312 unpack = gen_vec_interleave_highv8hi;
16314 unpack = gen_vec_interleave_lowv8hi;
16318 unpack = gen_vec_interleave_highv4si;
16320 unpack = gen_vec_interleave_lowv4si;
16323 gcc_unreachable ();
16326 dest = gen_lowpart (imode, operands[0]);
16329 se = force_reg (imode, CONST0_RTX (imode));
16331 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16332 operands[1], pc_rtx, pc_rtx);
16334 emit_insn (unpack (dest, operands[1], se));
16337 /* This function performs the same task as ix86_expand_sse_unpack,
16338 but with SSE4.1 instructions. */
16341 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16343 enum machine_mode imode = GET_MODE (operands[1]);
16344 rtx (*unpack)(rtx, rtx);
16351 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16353 unpack = gen_sse4_1_extendv8qiv8hi2;
16357 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16359 unpack = gen_sse4_1_extendv4hiv4si2;
16363 unpack = gen_sse4_1_zero_extendv2siv2di2;
16365 unpack = gen_sse4_1_extendv2siv2di2;
16368 gcc_unreachable ();
16371 dest = operands[0];
16374 /* Shift higher 8 bytes to lower 8 bytes. */
16375 src = gen_reg_rtx (imode);
16376 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16377 gen_lowpart (TImode, operands[1]),
16383 emit_insn (unpack (dest, src));
16386 /* This function performs the same task as ix86_expand_sse_unpack,
16387 but with sse5 instructions. */
16390 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16392 enum machine_mode imode = GET_MODE (operands[1]);
16393 int pperm_bytes[16];
16395 int h = (high_p) ? 8 : 0;
16398 rtvec v = rtvec_alloc (16);
16401 rtx op0 = operands[0], op1 = operands[1];
16406 vs = rtvec_alloc (8);
16407 h2 = (high_p) ? 8 : 0;
16408 for (i = 0; i < 8; i++)
16410 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16411 pperm_bytes[2*i+1] = ((unsigned_p)
16413 : PPERM_SIGN | PPERM_SRC2 | i | h);
16416 for (i = 0; i < 16; i++)
16417 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16419 for (i = 0; i < 8; i++)
16420 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16422 p = gen_rtx_PARALLEL (VOIDmode, vs);
16423 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16425 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16427 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16431 vs = rtvec_alloc (4);
16432 h2 = (high_p) ? 4 : 0;
16433 for (i = 0; i < 4; i++)
16435 sign_extend = ((unsigned_p)
16437 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16438 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16439 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16440 pperm_bytes[4*i+2] = sign_extend;
16441 pperm_bytes[4*i+3] = sign_extend;
16444 for (i = 0; i < 16; i++)
16445 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16447 for (i = 0; i < 4; i++)
16448 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16450 p = gen_rtx_PARALLEL (VOIDmode, vs);
16451 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16453 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16455 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16459 vs = rtvec_alloc (2);
16460 h2 = (high_p) ? 2 : 0;
16461 for (i = 0; i < 2; i++)
16463 sign_extend = ((unsigned_p)
16465 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16466 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16467 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16468 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16469 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16470 pperm_bytes[8*i+4] = sign_extend;
16471 pperm_bytes[8*i+5] = sign_extend;
16472 pperm_bytes[8*i+6] = sign_extend;
16473 pperm_bytes[8*i+7] = sign_extend;
16476 for (i = 0; i < 16; i++)
16477 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16479 for (i = 0; i < 2; i++)
16480 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16482 p = gen_rtx_PARALLEL (VOIDmode, vs);
16483 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16485 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16487 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16491 gcc_unreachable ();
16497 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16498 next narrower integer vector type */
16500 ix86_expand_sse5_pack (rtx operands[3])
16502 enum machine_mode imode = GET_MODE (operands[0]);
16503 int pperm_bytes[16];
16505 rtvec v = rtvec_alloc (16);
16507 rtx op0 = operands[0];
16508 rtx op1 = operands[1];
16509 rtx op2 = operands[2];
16514 for (i = 0; i < 8; i++)
16516 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16517 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16520 for (i = 0; i < 16; i++)
16521 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16523 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16524 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16528 for (i = 0; i < 4; i++)
16530 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16531 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16532 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16533 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16536 for (i = 0; i < 16; i++)
16537 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16539 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16540 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16544 for (i = 0; i < 2; i++)
16546 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16547 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16548 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16549 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16550 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16551 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16552 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16553 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16556 for (i = 0; i < 16; i++)
16557 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16559 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16560 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16564 gcc_unreachable ();
16570 /* Expand conditional increment or decrement using adb/sbb instructions.
16571 The default case using setcc followed by the conditional move can be
16572 done by generic code. */
16574 ix86_expand_int_addcc (rtx operands[])
16576 enum rtx_code code = GET_CODE (operands[1]);
16578 rtx val = const0_rtx;
16579 bool fpcmp = false;
16580 enum machine_mode mode = GET_MODE (operands[0]);
16582 ix86_compare_op0 = XEXP (operands[1], 0);
16583 ix86_compare_op1 = XEXP (operands[1], 1);
16584 if (operands[3] != const1_rtx
16585 && operands[3] != constm1_rtx)
16587 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16588 ix86_compare_op1, &compare_op))
16590 code = GET_CODE (compare_op);
16592 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16593 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16596 code = ix86_fp_compare_code_to_integer (code);
16603 PUT_CODE (compare_op,
16604 reverse_condition_maybe_unordered
16605 (GET_CODE (compare_op)));
16607 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16609 PUT_MODE (compare_op, mode);
16611 /* Construct either adc or sbb insn. */
16612 if ((code == LTU) == (operands[3] == constm1_rtx))
16614 switch (GET_MODE (operands[0]))
16617 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16620 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16623 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16626 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16629 gcc_unreachable ();
16634 switch (GET_MODE (operands[0]))
16637 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16640 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16643 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16646 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16649 gcc_unreachable ();
16652 return 1; /* DONE */
16656 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16657 works for floating pointer parameters and nonoffsetable memories.
16658 For pushes, it returns just stack offsets; the values will be saved
16659 in the right order. Maximally three parts are generated. */
16662 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16667 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16669 size = (GET_MODE_SIZE (mode) + 4) / 8;
16671 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16672 gcc_assert (size >= 2 && size <= 4);
16674 /* Optimize constant pool reference to immediates. This is used by fp
16675 moves, that force all constants to memory to allow combining. */
16676 if (MEM_P (operand) && MEM_READONLY_P (operand))
16678 rtx tmp = maybe_get_pool_constant (operand);
16683 if (MEM_P (operand) && !offsettable_memref_p (operand))
16685 /* The only non-offsetable memories we handle are pushes. */
16686 int ok = push_operand (operand, VOIDmode);
16690 operand = copy_rtx (operand);
16691 PUT_MODE (operand, Pmode);
16692 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16696 if (GET_CODE (operand) == CONST_VECTOR)
16698 enum machine_mode imode = int_mode_for_mode (mode);
16699 /* Caution: if we looked through a constant pool memory above,
16700 the operand may actually have a different mode now. That's
16701 ok, since we want to pun this all the way back to an integer. */
16702 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16703 gcc_assert (operand != NULL);
16709 if (mode == DImode)
16710 split_di (&operand, 1, &parts[0], &parts[1]);
16715 if (REG_P (operand))
16717 gcc_assert (reload_completed);
16718 for (i = 0; i < size; i++)
16719 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16721 else if (offsettable_memref_p (operand))
16723 operand = adjust_address (operand, SImode, 0);
16724 parts[0] = operand;
16725 for (i = 1; i < size; i++)
16726 parts[i] = adjust_address (operand, SImode, 4 * i);
16728 else if (GET_CODE (operand) == CONST_DOUBLE)
16733 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16737 real_to_target (l, &r, mode);
16738 parts[3] = gen_int_mode (l[3], SImode);
16739 parts[2] = gen_int_mode (l[2], SImode);
16742 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16743 parts[2] = gen_int_mode (l[2], SImode);
16746 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16749 gcc_unreachable ();
16751 parts[1] = gen_int_mode (l[1], SImode);
16752 parts[0] = gen_int_mode (l[0], SImode);
16755 gcc_unreachable ();
16760 if (mode == TImode)
16761 split_ti (&operand, 1, &parts[0], &parts[1]);
16762 if (mode == XFmode || mode == TFmode)
16764 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16765 if (REG_P (operand))
16767 gcc_assert (reload_completed);
16768 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16769 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16771 else if (offsettable_memref_p (operand))
16773 operand = adjust_address (operand, DImode, 0);
16774 parts[0] = operand;
16775 parts[1] = adjust_address (operand, upper_mode, 8);
16777 else if (GET_CODE (operand) == CONST_DOUBLE)
16782 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16783 real_to_target (l, &r, mode);
16785 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16786 if (HOST_BITS_PER_WIDE_INT >= 64)
16789 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16790 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16793 parts[0] = immed_double_const (l[0], l[1], DImode);
16795 if (upper_mode == SImode)
16796 parts[1] = gen_int_mode (l[2], SImode);
16797 else if (HOST_BITS_PER_WIDE_INT >= 64)
16800 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16801 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16804 parts[1] = immed_double_const (l[2], l[3], DImode);
16807 gcc_unreachable ();
16814 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16815 Return false when normal moves are needed; true when all required
16816 insns have been emitted. Operands 2-4 contain the input values
16817 int the correct order; operands 5-7 contain the output values. */
16820 ix86_split_long_move (rtx operands[])
16825 int collisions = 0;
16826 enum machine_mode mode = GET_MODE (operands[0]);
16827 bool collisionparts[4];
16829 /* The DFmode expanders may ask us to move double.
16830 For 64bit target this is single move. By hiding the fact
16831 here we simplify i386.md splitters. */
16832 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16834 /* Optimize constant pool reference to immediates. This is used by
16835 fp moves, that force all constants to memory to allow combining. */
16837 if (MEM_P (operands[1])
16838 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16839 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16840 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16841 if (push_operand (operands[0], VOIDmode))
16843 operands[0] = copy_rtx (operands[0]);
16844 PUT_MODE (operands[0], Pmode);
16847 operands[0] = gen_lowpart (DImode, operands[0]);
16848 operands[1] = gen_lowpart (DImode, operands[1]);
16849 emit_move_insn (operands[0], operands[1]);
16853 /* The only non-offsettable memory we handle is push. */
16854 if (push_operand (operands[0], VOIDmode))
16857 gcc_assert (!MEM_P (operands[0])
16858 || offsettable_memref_p (operands[0]));
16860 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16861 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16863 /* When emitting push, take care for source operands on the stack. */
16864 if (push && MEM_P (operands[1])
16865 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16866 for (i = 0; i < nparts - 1; i++)
16867 part[1][i] = change_address (part[1][i],
16868 GET_MODE (part[1][i]),
16869 XEXP (part[1][i + 1], 0));
16871 /* We need to do copy in the right order in case an address register
16872 of the source overlaps the destination. */
16873 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16877 for (i = 0; i < nparts; i++)
16880 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16881 if (collisionparts[i])
16885 /* Collision in the middle part can be handled by reordering. */
16886 if (collisions == 1 && nparts == 3 && collisionparts [1])
16888 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16889 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16891 else if (collisions == 1
16893 && (collisionparts [1] || collisionparts [2]))
16895 if (collisionparts [1])
16897 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16898 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16902 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16903 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16907 /* If there are more collisions, we can't handle it by reordering.
16908 Do an lea to the last part and use only one colliding move. */
16909 else if (collisions > 1)
16915 base = part[0][nparts - 1];
16917 /* Handle the case when the last part isn't valid for lea.
16918 Happens in 64-bit mode storing the 12-byte XFmode. */
16919 if (GET_MODE (base) != Pmode)
16920 base = gen_rtx_REG (Pmode, REGNO (base));
16922 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16923 part[1][0] = replace_equiv_address (part[1][0], base);
16924 for (i = 1; i < nparts; i++)
16926 tmp = plus_constant (base, UNITS_PER_WORD * i);
16927 part[1][i] = replace_equiv_address (part[1][i], tmp);
16938 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16939 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16940 emit_move_insn (part[0][2], part[1][2]);
16942 else if (nparts == 4)
16944 emit_move_insn (part[0][3], part[1][3]);
16945 emit_move_insn (part[0][2], part[1][2]);
16950 /* In 64bit mode we don't have 32bit push available. In case this is
16951 register, it is OK - we will just use larger counterpart. We also
16952 retype memory - these comes from attempt to avoid REX prefix on
16953 moving of second half of TFmode value. */
16954 if (GET_MODE (part[1][1]) == SImode)
16956 switch (GET_CODE (part[1][1]))
16959 part[1][1] = adjust_address (part[1][1], DImode, 0);
16963 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16967 gcc_unreachable ();
16970 if (GET_MODE (part[1][0]) == SImode)
16971 part[1][0] = part[1][1];
16974 emit_move_insn (part[0][1], part[1][1]);
16975 emit_move_insn (part[0][0], part[1][0]);
16979 /* Choose correct order to not overwrite the source before it is copied. */
16980 if ((REG_P (part[0][0])
16981 && REG_P (part[1][1])
16982 && (REGNO (part[0][0]) == REGNO (part[1][1])
16984 && REGNO (part[0][0]) == REGNO (part[1][2]))
16986 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16988 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16990 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16992 operands[2 + i] = part[0][j];
16993 operands[6 + i] = part[1][j];
16998 for (i = 0; i < nparts; i++)
17000 operands[2 + i] = part[0][i];
17001 operands[6 + i] = part[1][i];
17005 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17006 if (optimize_insn_for_size_p ())
17008 for (j = 0; j < nparts - 1; j++)
17009 if (CONST_INT_P (operands[6 + j])
17010 && operands[6 + j] != const0_rtx
17011 && REG_P (operands[2 + j]))
17012 for (i = j; i < nparts - 1; i++)
17013 if (CONST_INT_P (operands[7 + i])
17014 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17015 operands[7 + i] = operands[2 + j];
17018 for (i = 0; i < nparts; i++)
17019 emit_move_insn (operands[2 + i], operands[6 + i]);
17024 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17025 left shift by a constant, either using a single shift or
17026 a sequence of add instructions. */
17029 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17033 emit_insn ((mode == DImode
17035 : gen_adddi3) (operand, operand, operand));
17037 else if (!optimize_insn_for_size_p ()
17038 && count * ix86_cost->add <= ix86_cost->shift_const)
17041 for (i=0; i<count; i++)
17043 emit_insn ((mode == DImode
17045 : gen_adddi3) (operand, operand, operand));
17049 emit_insn ((mode == DImode
17051 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17055 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17057 rtx low[2], high[2];
17059 const int single_width = mode == DImode ? 32 : 64;
17061 if (CONST_INT_P (operands[2]))
17063 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17064 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17066 if (count >= single_width)
17068 emit_move_insn (high[0], low[1]);
17069 emit_move_insn (low[0], const0_rtx);
17071 if (count > single_width)
17072 ix86_expand_ashl_const (high[0], count - single_width, mode);
17076 if (!rtx_equal_p (operands[0], operands[1]))
17077 emit_move_insn (operands[0], operands[1]);
17078 emit_insn ((mode == DImode
17080 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17081 ix86_expand_ashl_const (low[0], count, mode);
17086 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17088 if (operands[1] == const1_rtx)
17090 /* Assuming we've chosen a QImode capable registers, then 1 << N
17091 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17092 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17094 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17096 ix86_expand_clear (low[0]);
17097 ix86_expand_clear (high[0]);
17098 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17100 d = gen_lowpart (QImode, low[0]);
17101 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17102 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17103 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17105 d = gen_lowpart (QImode, high[0]);
17106 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17107 s = gen_rtx_NE (QImode, flags, const0_rtx);
17108 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17111 /* Otherwise, we can get the same results by manually performing
17112 a bit extract operation on bit 5/6, and then performing the two
17113 shifts. The two methods of getting 0/1 into low/high are exactly
17114 the same size. Avoiding the shift in the bit extract case helps
17115 pentium4 a bit; no one else seems to care much either way. */
17120 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17121 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17123 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17124 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17126 emit_insn ((mode == DImode
17128 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
17129 emit_insn ((mode == DImode
17131 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
17132 emit_move_insn (low[0], high[0]);
17133 emit_insn ((mode == DImode
17135 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
17138 emit_insn ((mode == DImode
17140 : gen_ashldi3) (low[0], low[0], operands[2]));
17141 emit_insn ((mode == DImode
17143 : gen_ashldi3) (high[0], high[0], operands[2]));
17147 if (operands[1] == constm1_rtx)
17149 /* For -1 << N, we can avoid the shld instruction, because we
17150 know that we're shifting 0...31/63 ones into a -1. */
17151 emit_move_insn (low[0], constm1_rtx);
17152 if (optimize_insn_for_size_p ())
17153 emit_move_insn (high[0], low[0]);
17155 emit_move_insn (high[0], constm1_rtx);
17159 if (!rtx_equal_p (operands[0], operands[1]))
17160 emit_move_insn (operands[0], operands[1]);
17162 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17163 emit_insn ((mode == DImode
17165 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17168 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17170 if (TARGET_CMOVE && scratch)
17172 ix86_expand_clear (scratch);
17173 emit_insn ((mode == DImode
17174 ? gen_x86_shift_adj_1
17175 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17179 emit_insn ((mode == DImode
17180 ? gen_x86_shift_adj_2
17181 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17185 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17187 rtx low[2], high[2];
17189 const int single_width = mode == DImode ? 32 : 64;
17191 if (CONST_INT_P (operands[2]))
17193 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17194 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17196 if (count == single_width * 2 - 1)
17198 emit_move_insn (high[0], high[1]);
17199 emit_insn ((mode == DImode
17201 : gen_ashrdi3) (high[0], high[0],
17202 GEN_INT (single_width - 1)));
17203 emit_move_insn (low[0], high[0]);
17206 else if (count >= single_width)
17208 emit_move_insn (low[0], high[1]);
17209 emit_move_insn (high[0], low[0]);
17210 emit_insn ((mode == DImode
17212 : gen_ashrdi3) (high[0], high[0],
17213 GEN_INT (single_width - 1)));
17214 if (count > single_width)
17215 emit_insn ((mode == DImode
17217 : gen_ashrdi3) (low[0], low[0],
17218 GEN_INT (count - single_width)));
17222 if (!rtx_equal_p (operands[0], operands[1]))
17223 emit_move_insn (operands[0], operands[1]);
17224 emit_insn ((mode == DImode
17226 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17227 emit_insn ((mode == DImode
17229 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17234 if (!rtx_equal_p (operands[0], operands[1]))
17235 emit_move_insn (operands[0], operands[1]);
17237 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17239 emit_insn ((mode == DImode
17241 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17242 emit_insn ((mode == DImode
17244 : gen_ashrdi3) (high[0], high[0], operands[2]));
17246 if (TARGET_CMOVE && scratch)
17248 emit_move_insn (scratch, high[0]);
17249 emit_insn ((mode == DImode
17251 : gen_ashrdi3) (scratch, scratch,
17252 GEN_INT (single_width - 1)));
17253 emit_insn ((mode == DImode
17254 ? gen_x86_shift_adj_1
17255 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17259 emit_insn ((mode == DImode
17260 ? gen_x86_shift_adj_3
17261 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17266 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17268 rtx low[2], high[2];
17270 const int single_width = mode == DImode ? 32 : 64;
17272 if (CONST_INT_P (operands[2]))
17274 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17275 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17277 if (count >= single_width)
17279 emit_move_insn (low[0], high[1]);
17280 ix86_expand_clear (high[0]);
17282 if (count > single_width)
17283 emit_insn ((mode == DImode
17285 : gen_lshrdi3) (low[0], low[0],
17286 GEN_INT (count - single_width)));
17290 if (!rtx_equal_p (operands[0], operands[1]))
17291 emit_move_insn (operands[0], operands[1]);
17292 emit_insn ((mode == DImode
17294 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17295 emit_insn ((mode == DImode
17297 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17302 if (!rtx_equal_p (operands[0], operands[1]))
17303 emit_move_insn (operands[0], operands[1]);
17305 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17307 emit_insn ((mode == DImode
17309 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17310 emit_insn ((mode == DImode
17312 : gen_lshrdi3) (high[0], high[0], operands[2]));
17314 /* Heh. By reversing the arguments, we can reuse this pattern. */
17315 if (TARGET_CMOVE && scratch)
17317 ix86_expand_clear (scratch);
17318 emit_insn ((mode == DImode
17319 ? gen_x86_shift_adj_1
17320 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17324 emit_insn ((mode == DImode
17325 ? gen_x86_shift_adj_2
17326 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17330 /* Predict just emitted jump instruction to be taken with probability PROB. */
17332 predict_jump (int prob)
17334 rtx insn = get_last_insn ();
17335 gcc_assert (JUMP_P (insn));
17336 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17339 /* Helper function for the string operations below. Dest VARIABLE whether
17340 it is aligned to VALUE bytes. If true, jump to the label. */
17342 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17344 rtx label = gen_label_rtx ();
17345 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17346 if (GET_MODE (variable) == DImode)
17347 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17349 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17350 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17353 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17355 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17359 /* Adjust COUNTER by the VALUE. */
17361 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17363 if (GET_MODE (countreg) == DImode)
17364 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17366 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17369 /* Zero extend possibly SImode EXP to Pmode register. */
17371 ix86_zero_extend_to_Pmode (rtx exp)
17374 if (GET_MODE (exp) == VOIDmode)
17375 return force_reg (Pmode, exp);
17376 if (GET_MODE (exp) == Pmode)
17377 return copy_to_mode_reg (Pmode, exp);
17378 r = gen_reg_rtx (Pmode);
17379 emit_insn (gen_zero_extendsidi2 (r, exp));
17383 /* Divide COUNTREG by SCALE. */
17385 scale_counter (rtx countreg, int scale)
17388 rtx piece_size_mask;
17392 if (CONST_INT_P (countreg))
17393 return GEN_INT (INTVAL (countreg) / scale);
17394 gcc_assert (REG_P (countreg));
17396 piece_size_mask = GEN_INT (scale - 1);
17397 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17398 GEN_INT (exact_log2 (scale)),
17399 NULL, 1, OPTAB_DIRECT);
17403 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17404 DImode for constant loop counts. */
17406 static enum machine_mode
17407 counter_mode (rtx count_exp)
17409 if (GET_MODE (count_exp) != VOIDmode)
17410 return GET_MODE (count_exp);
17411 if (!CONST_INT_P (count_exp))
17413 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17418 /* When SRCPTR is non-NULL, output simple loop to move memory
17419 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17420 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17421 equivalent loop to set memory by VALUE (supposed to be in MODE).
17423 The size is rounded down to whole number of chunk size moved at once.
17424 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17428 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17429 rtx destptr, rtx srcptr, rtx value,
17430 rtx count, enum machine_mode mode, int unroll,
17433 rtx out_label, top_label, iter, tmp;
17434 enum machine_mode iter_mode = counter_mode (count);
17435 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17436 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17442 top_label = gen_label_rtx ();
17443 out_label = gen_label_rtx ();
17444 iter = gen_reg_rtx (iter_mode);
17446 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17447 NULL, 1, OPTAB_DIRECT);
17448 /* Those two should combine. */
17449 if (piece_size == const1_rtx)
17451 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17453 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17455 emit_move_insn (iter, const0_rtx);
17457 emit_label (top_label);
17459 tmp = convert_modes (Pmode, iter_mode, iter, true);
17460 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17461 destmem = change_address (destmem, mode, x_addr);
17465 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17466 srcmem = change_address (srcmem, mode, y_addr);
17468 /* When unrolling for chips that reorder memory reads and writes,
17469 we can save registers by using single temporary.
17470 Also using 4 temporaries is overkill in 32bit mode. */
17471 if (!TARGET_64BIT && 0)
17473 for (i = 0; i < unroll; i++)
17478 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17480 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17482 emit_move_insn (destmem, srcmem);
17488 gcc_assert (unroll <= 4);
17489 for (i = 0; i < unroll; i++)
17491 tmpreg[i] = gen_reg_rtx (mode);
17495 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17497 emit_move_insn (tmpreg[i], srcmem);
17499 for (i = 0; i < unroll; i++)
17504 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17506 emit_move_insn (destmem, tmpreg[i]);
17511 for (i = 0; i < unroll; i++)
17515 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17516 emit_move_insn (destmem, value);
17519 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17520 true, OPTAB_LIB_WIDEN);
17522 emit_move_insn (iter, tmp);
17524 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17526 if (expected_size != -1)
17528 expected_size /= GET_MODE_SIZE (mode) * unroll;
17529 if (expected_size == 0)
17531 else if (expected_size > REG_BR_PROB_BASE)
17532 predict_jump (REG_BR_PROB_BASE - 1);
17534 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17537 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17538 iter = ix86_zero_extend_to_Pmode (iter);
17539 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17540 true, OPTAB_LIB_WIDEN);
17541 if (tmp != destptr)
17542 emit_move_insn (destptr, tmp);
17545 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17546 true, OPTAB_LIB_WIDEN);
17548 emit_move_insn (srcptr, tmp);
17550 emit_label (out_label);
17553 /* Output "rep; mov" instruction.
17554 Arguments have same meaning as for previous function */
17556 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17557 rtx destptr, rtx srcptr,
17559 enum machine_mode mode)
17565 /* If the size is known, it is shorter to use rep movs. */
17566 if (mode == QImode && CONST_INT_P (count)
17567 && !(INTVAL (count) & 3))
17570 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17571 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17572 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17573 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17574 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17575 if (mode != QImode)
17577 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17578 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17579 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17580 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17581 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17582 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17586 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17587 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17589 if (CONST_INT_P (count))
17591 count = GEN_INT (INTVAL (count)
17592 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17593 destmem = shallow_copy_rtx (destmem);
17594 srcmem = shallow_copy_rtx (srcmem);
17595 set_mem_size (destmem, count);
17596 set_mem_size (srcmem, count);
17600 if (MEM_SIZE (destmem))
17601 set_mem_size (destmem, NULL_RTX);
17602 if (MEM_SIZE (srcmem))
17603 set_mem_size (srcmem, NULL_RTX);
17605 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17609 /* Output "rep; stos" instruction.
17610 Arguments have same meaning as for previous function */
17612 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17613 rtx count, enum machine_mode mode,
17619 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17620 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17621 value = force_reg (mode, gen_lowpart (mode, value));
17622 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17623 if (mode != QImode)
17625 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17626 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17627 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17630 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17631 if (orig_value == const0_rtx && CONST_INT_P (count))
17633 count = GEN_INT (INTVAL (count)
17634 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17635 destmem = shallow_copy_rtx (destmem);
17636 set_mem_size (destmem, count);
17638 else if (MEM_SIZE (destmem))
17639 set_mem_size (destmem, NULL_RTX);
17640 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17644 emit_strmov (rtx destmem, rtx srcmem,
17645 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17647 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17648 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17649 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17652 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17654 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17655 rtx destptr, rtx srcptr, rtx count, int max_size)
17658 if (CONST_INT_P (count))
17660 HOST_WIDE_INT countval = INTVAL (count);
17663 if ((countval & 0x10) && max_size > 16)
17667 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17668 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17671 gcc_unreachable ();
17674 if ((countval & 0x08) && max_size > 8)
17677 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17680 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17681 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17685 if ((countval & 0x04) && max_size > 4)
17687 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17690 if ((countval & 0x02) && max_size > 2)
17692 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17695 if ((countval & 0x01) && max_size > 1)
17697 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17704 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17705 count, 1, OPTAB_DIRECT);
17706 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17707 count, QImode, 1, 4);
17711 /* When there are stringops, we can cheaply increase dest and src pointers.
17712 Otherwise we save code size by maintaining offset (zero is readily
17713 available from preceding rep operation) and using x86 addressing modes.
17715 if (TARGET_SINGLE_STRINGOP)
17719 rtx label = ix86_expand_aligntest (count, 4, true);
17720 src = change_address (srcmem, SImode, srcptr);
17721 dest = change_address (destmem, SImode, destptr);
17722 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17723 emit_label (label);
17724 LABEL_NUSES (label) = 1;
17728 rtx label = ix86_expand_aligntest (count, 2, true);
17729 src = change_address (srcmem, HImode, srcptr);
17730 dest = change_address (destmem, HImode, destptr);
17731 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17732 emit_label (label);
17733 LABEL_NUSES (label) = 1;
17737 rtx label = ix86_expand_aligntest (count, 1, true);
17738 src = change_address (srcmem, QImode, srcptr);
17739 dest = change_address (destmem, QImode, destptr);
17740 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17741 emit_label (label);
17742 LABEL_NUSES (label) = 1;
17747 rtx offset = force_reg (Pmode, const0_rtx);
17752 rtx label = ix86_expand_aligntest (count, 4, true);
17753 src = change_address (srcmem, SImode, srcptr);
17754 dest = change_address (destmem, SImode, destptr);
17755 emit_move_insn (dest, src);
17756 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17757 true, OPTAB_LIB_WIDEN);
17759 emit_move_insn (offset, tmp);
17760 emit_label (label);
17761 LABEL_NUSES (label) = 1;
17765 rtx label = ix86_expand_aligntest (count, 2, true);
17766 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17767 src = change_address (srcmem, HImode, tmp);
17768 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17769 dest = change_address (destmem, HImode, tmp);
17770 emit_move_insn (dest, src);
17771 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17772 true, OPTAB_LIB_WIDEN);
17774 emit_move_insn (offset, tmp);
17775 emit_label (label);
17776 LABEL_NUSES (label) = 1;
17780 rtx label = ix86_expand_aligntest (count, 1, true);
17781 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17782 src = change_address (srcmem, QImode, tmp);
17783 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17784 dest = change_address (destmem, QImode, tmp);
17785 emit_move_insn (dest, src);
17786 emit_label (label);
17787 LABEL_NUSES (label) = 1;
17792 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17794 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17795 rtx count, int max_size)
17798 expand_simple_binop (counter_mode (count), AND, count,
17799 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17800 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17801 gen_lowpart (QImode, value), count, QImode,
17805 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17807 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17811 if (CONST_INT_P (count))
17813 HOST_WIDE_INT countval = INTVAL (count);
17816 if ((countval & 0x10) && max_size > 16)
17820 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17821 emit_insn (gen_strset (destptr, dest, value));
17822 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17823 emit_insn (gen_strset (destptr, dest, value));
17826 gcc_unreachable ();
17829 if ((countval & 0x08) && max_size > 8)
17833 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17834 emit_insn (gen_strset (destptr, dest, value));
17838 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17839 emit_insn (gen_strset (destptr, dest, value));
17840 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17841 emit_insn (gen_strset (destptr, dest, value));
17845 if ((countval & 0x04) && max_size > 4)
17847 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17848 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17851 if ((countval & 0x02) && max_size > 2)
17853 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17854 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17857 if ((countval & 0x01) && max_size > 1)
17859 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17860 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17867 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17872 rtx label = ix86_expand_aligntest (count, 16, true);
17875 dest = change_address (destmem, DImode, destptr);
17876 emit_insn (gen_strset (destptr, dest, value));
17877 emit_insn (gen_strset (destptr, dest, value));
17881 dest = change_address (destmem, SImode, destptr);
17882 emit_insn (gen_strset (destptr, dest, value));
17883 emit_insn (gen_strset (destptr, dest, value));
17884 emit_insn (gen_strset (destptr, dest, value));
17885 emit_insn (gen_strset (destptr, dest, value));
17887 emit_label (label);
17888 LABEL_NUSES (label) = 1;
17892 rtx label = ix86_expand_aligntest (count, 8, true);
17895 dest = change_address (destmem, DImode, destptr);
17896 emit_insn (gen_strset (destptr, dest, value));
17900 dest = change_address (destmem, SImode, destptr);
17901 emit_insn (gen_strset (destptr, dest, value));
17902 emit_insn (gen_strset (destptr, dest, value));
17904 emit_label (label);
17905 LABEL_NUSES (label) = 1;
17909 rtx label = ix86_expand_aligntest (count, 4, true);
17910 dest = change_address (destmem, SImode, destptr);
17911 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17912 emit_label (label);
17913 LABEL_NUSES (label) = 1;
17917 rtx label = ix86_expand_aligntest (count, 2, true);
17918 dest = change_address (destmem, HImode, destptr);
17919 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17920 emit_label (label);
17921 LABEL_NUSES (label) = 1;
17925 rtx label = ix86_expand_aligntest (count, 1, true);
17926 dest = change_address (destmem, QImode, destptr);
17927 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17928 emit_label (label);
17929 LABEL_NUSES (label) = 1;
17933 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17934 DESIRED_ALIGNMENT. */
17936 expand_movmem_prologue (rtx destmem, rtx srcmem,
17937 rtx destptr, rtx srcptr, rtx count,
17938 int align, int desired_alignment)
17940 if (align <= 1 && desired_alignment > 1)
17942 rtx label = ix86_expand_aligntest (destptr, 1, false);
17943 srcmem = change_address (srcmem, QImode, srcptr);
17944 destmem = change_address (destmem, QImode, destptr);
17945 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17946 ix86_adjust_counter (count, 1);
17947 emit_label (label);
17948 LABEL_NUSES (label) = 1;
17950 if (align <= 2 && desired_alignment > 2)
17952 rtx label = ix86_expand_aligntest (destptr, 2, false);
17953 srcmem = change_address (srcmem, HImode, srcptr);
17954 destmem = change_address (destmem, HImode, destptr);
17955 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17956 ix86_adjust_counter (count, 2);
17957 emit_label (label);
17958 LABEL_NUSES (label) = 1;
17960 if (align <= 4 && desired_alignment > 4)
17962 rtx label = ix86_expand_aligntest (destptr, 4, false);
17963 srcmem = change_address (srcmem, SImode, srcptr);
17964 destmem = change_address (destmem, SImode, destptr);
17965 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17966 ix86_adjust_counter (count, 4);
17967 emit_label (label);
17968 LABEL_NUSES (label) = 1;
17970 gcc_assert (desired_alignment <= 8);
17973 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17974 ALIGN_BYTES is how many bytes need to be copied. */
17976 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17977 int desired_align, int align_bytes)
17980 rtx src_size, dst_size;
17982 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17983 if (src_align_bytes >= 0)
17984 src_align_bytes = desired_align - src_align_bytes;
17985 src_size = MEM_SIZE (src);
17986 dst_size = MEM_SIZE (dst);
17987 if (align_bytes & 1)
17989 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17990 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17992 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17994 if (align_bytes & 2)
17996 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17997 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17998 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17999 set_mem_align (dst, 2 * BITS_PER_UNIT);
18000 if (src_align_bytes >= 0
18001 && (src_align_bytes & 1) == (align_bytes & 1)
18002 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18003 set_mem_align (src, 2 * BITS_PER_UNIT);
18005 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18007 if (align_bytes & 4)
18009 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18010 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18011 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18012 set_mem_align (dst, 4 * BITS_PER_UNIT);
18013 if (src_align_bytes >= 0)
18015 unsigned int src_align = 0;
18016 if ((src_align_bytes & 3) == (align_bytes & 3))
18018 else if ((src_align_bytes & 1) == (align_bytes & 1))
18020 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18021 set_mem_align (src, src_align * BITS_PER_UNIT);
18024 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18026 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18027 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18028 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18029 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18030 if (src_align_bytes >= 0)
18032 unsigned int src_align = 0;
18033 if ((src_align_bytes & 7) == (align_bytes & 7))
18035 else if ((src_align_bytes & 3) == (align_bytes & 3))
18037 else if ((src_align_bytes & 1) == (align_bytes & 1))
18039 if (src_align > (unsigned int) desired_align)
18040 src_align = desired_align;
18041 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18042 set_mem_align (src, src_align * BITS_PER_UNIT);
18045 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18047 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18052 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18053 DESIRED_ALIGNMENT. */
18055 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18056 int align, int desired_alignment)
18058 if (align <= 1 && desired_alignment > 1)
18060 rtx label = ix86_expand_aligntest (destptr, 1, false);
18061 destmem = change_address (destmem, QImode, destptr);
18062 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18063 ix86_adjust_counter (count, 1);
18064 emit_label (label);
18065 LABEL_NUSES (label) = 1;
18067 if (align <= 2 && desired_alignment > 2)
18069 rtx label = ix86_expand_aligntest (destptr, 2, false);
18070 destmem = change_address (destmem, HImode, destptr);
18071 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18072 ix86_adjust_counter (count, 2);
18073 emit_label (label);
18074 LABEL_NUSES (label) = 1;
18076 if (align <= 4 && desired_alignment > 4)
18078 rtx label = ix86_expand_aligntest (destptr, 4, false);
18079 destmem = change_address (destmem, SImode, destptr);
18080 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18081 ix86_adjust_counter (count, 4);
18082 emit_label (label);
18083 LABEL_NUSES (label) = 1;
18085 gcc_assert (desired_alignment <= 8);
18088 /* Set enough from DST to align DST known to by aligned by ALIGN to
18089 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18091 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18092 int desired_align, int align_bytes)
18095 rtx dst_size = MEM_SIZE (dst);
18096 if (align_bytes & 1)
18098 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18100 emit_insn (gen_strset (destreg, dst,
18101 gen_lowpart (QImode, value)));
18103 if (align_bytes & 2)
18105 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18106 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18107 set_mem_align (dst, 2 * BITS_PER_UNIT);
18109 emit_insn (gen_strset (destreg, dst,
18110 gen_lowpart (HImode, value)));
18112 if (align_bytes & 4)
18114 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18115 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18116 set_mem_align (dst, 4 * BITS_PER_UNIT);
18118 emit_insn (gen_strset (destreg, dst,
18119 gen_lowpart (SImode, value)));
18121 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18122 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18123 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18125 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18129 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18130 static enum stringop_alg
18131 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18132 int *dynamic_check)
18134 const struct stringop_algs * algs;
18135 bool optimize_for_speed;
18136 /* Algorithms using the rep prefix want at least edi and ecx;
18137 additionally, memset wants eax and memcpy wants esi. Don't
18138 consider such algorithms if the user has appropriated those
18139 registers for their own purposes. */
18140 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18142 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18144 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18145 || (alg != rep_prefix_1_byte \
18146 && alg != rep_prefix_4_byte \
18147 && alg != rep_prefix_8_byte))
18148 const struct processor_costs *cost;
18150 /* Even if the string operation call is cold, we still might spend a lot
18151 of time processing large blocks. */
18152 if (optimize_function_for_size_p (cfun)
18153 || (optimize_insn_for_size_p ()
18154 && expected_size != -1 && expected_size < 256))
18155 optimize_for_speed = false;
18157 optimize_for_speed = true;
18159 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18161 *dynamic_check = -1;
18163 algs = &cost->memset[TARGET_64BIT != 0];
18165 algs = &cost->memcpy[TARGET_64BIT != 0];
18166 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18167 return stringop_alg;
18168 /* rep; movq or rep; movl is the smallest variant. */
18169 else if (!optimize_for_speed)
18171 if (!count || (count & 3))
18172 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18174 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18176 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18178 else if (expected_size != -1 && expected_size < 4)
18179 return loop_1_byte;
18180 else if (expected_size != -1)
18183 enum stringop_alg alg = libcall;
18184 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18186 /* We get here if the algorithms that were not libcall-based
18187 were rep-prefix based and we are unable to use rep prefixes
18188 based on global register usage. Break out of the loop and
18189 use the heuristic below. */
18190 if (algs->size[i].max == 0)
18192 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18194 enum stringop_alg candidate = algs->size[i].alg;
18196 if (candidate != libcall && ALG_USABLE_P (candidate))
18198 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18199 last non-libcall inline algorithm. */
18200 if (TARGET_INLINE_ALL_STRINGOPS)
18202 /* When the current size is best to be copied by a libcall,
18203 but we are still forced to inline, run the heuristic below
18204 that will pick code for medium sized blocks. */
18205 if (alg != libcall)
18209 else if (ALG_USABLE_P (candidate))
18213 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18215 /* When asked to inline the call anyway, try to pick meaningful choice.
18216 We look for maximal size of block that is faster to copy by hand and
18217 take blocks of at most of that size guessing that average size will
18218 be roughly half of the block.
18220 If this turns out to be bad, we might simply specify the preferred
18221 choice in ix86_costs. */
18222 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18223 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18226 enum stringop_alg alg;
18228 bool any_alg_usable_p = true;
18230 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18232 enum stringop_alg candidate = algs->size[i].alg;
18233 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18235 if (candidate != libcall && candidate
18236 && ALG_USABLE_P (candidate))
18237 max = algs->size[i].max;
18239 /* If there aren't any usable algorithms, then recursing on
18240 smaller sizes isn't going to find anything. Just return the
18241 simple byte-at-a-time copy loop. */
18242 if (!any_alg_usable_p)
18244 /* Pick something reasonable. */
18245 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18246 *dynamic_check = 128;
18247 return loop_1_byte;
18251 alg = decide_alg (count, max / 2, memset, dynamic_check);
18252 gcc_assert (*dynamic_check == -1);
18253 gcc_assert (alg != libcall);
18254 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18255 *dynamic_check = max;
18258 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18259 #undef ALG_USABLE_P
18262 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18263 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18265 decide_alignment (int align,
18266 enum stringop_alg alg,
18269 int desired_align = 0;
18273 gcc_unreachable ();
18275 case unrolled_loop:
18276 desired_align = GET_MODE_SIZE (Pmode);
18278 case rep_prefix_8_byte:
18281 case rep_prefix_4_byte:
18282 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18283 copying whole cacheline at once. */
18284 if (TARGET_PENTIUMPRO)
18289 case rep_prefix_1_byte:
18290 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18291 copying whole cacheline at once. */
18292 if (TARGET_PENTIUMPRO)
18306 if (desired_align < align)
18307 desired_align = align;
18308 if (expected_size != -1 && expected_size < 4)
18309 desired_align = align;
18310 return desired_align;
18313 /* Return the smallest power of 2 greater than VAL. */
18315 smallest_pow2_greater_than (int val)
18323 /* Expand string move (memcpy) operation. Use i386 string operations when
18324 profitable. expand_setmem contains similar code. The code depends upon
18325 architecture, block size and alignment, but always has the same
18328 1) Prologue guard: Conditional that jumps up to epilogues for small
18329 blocks that can be handled by epilogue alone. This is faster but
18330 also needed for correctness, since prologue assume the block is larger
18331 than the desired alignment.
18333 Optional dynamic check for size and libcall for large
18334 blocks is emitted here too, with -minline-stringops-dynamically.
18336 2) Prologue: copy first few bytes in order to get destination aligned
18337 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18338 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18339 We emit either a jump tree on power of two sized blocks, or a byte loop.
18341 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18342 with specified algorithm.
18344 4) Epilogue: code copying tail of the block that is too small to be
18345 handled by main body (or up to size guarded by prologue guard). */
18348 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18349 rtx expected_align_exp, rtx expected_size_exp)
18355 rtx jump_around_label = NULL;
18356 HOST_WIDE_INT align = 1;
18357 unsigned HOST_WIDE_INT count = 0;
18358 HOST_WIDE_INT expected_size = -1;
18359 int size_needed = 0, epilogue_size_needed;
18360 int desired_align = 0, align_bytes = 0;
18361 enum stringop_alg alg;
18363 bool need_zero_guard = false;
18365 if (CONST_INT_P (align_exp))
18366 align = INTVAL (align_exp);
18367 /* i386 can do misaligned access on reasonably increased cost. */
18368 if (CONST_INT_P (expected_align_exp)
18369 && INTVAL (expected_align_exp) > align)
18370 align = INTVAL (expected_align_exp);
18371 /* ALIGN is the minimum of destination and source alignment, but we care here
18372 just about destination alignment. */
18373 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18374 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18376 if (CONST_INT_P (count_exp))
18377 count = expected_size = INTVAL (count_exp);
18378 if (CONST_INT_P (expected_size_exp) && count == 0)
18379 expected_size = INTVAL (expected_size_exp);
18381 /* Make sure we don't need to care about overflow later on. */
18382 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18385 /* Step 0: Decide on preferred algorithm, desired alignment and
18386 size of chunks to be copied by main loop. */
18388 alg = decide_alg (count, expected_size, false, &dynamic_check);
18389 desired_align = decide_alignment (align, alg, expected_size);
18391 if (!TARGET_ALIGN_STRINGOPS)
18392 align = desired_align;
18394 if (alg == libcall)
18396 gcc_assert (alg != no_stringop);
18398 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18399 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18400 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18405 gcc_unreachable ();
18407 need_zero_guard = true;
18408 size_needed = GET_MODE_SIZE (Pmode);
18410 case unrolled_loop:
18411 need_zero_guard = true;
18412 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18414 case rep_prefix_8_byte:
18417 case rep_prefix_4_byte:
18420 case rep_prefix_1_byte:
18424 need_zero_guard = true;
18429 epilogue_size_needed = size_needed;
18431 /* Step 1: Prologue guard. */
18433 /* Alignment code needs count to be in register. */
18434 if (CONST_INT_P (count_exp) && desired_align > align)
18436 if (INTVAL (count_exp) > desired_align
18437 && INTVAL (count_exp) > size_needed)
18440 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18441 if (align_bytes <= 0)
18444 align_bytes = desired_align - align_bytes;
18446 if (align_bytes == 0)
18447 count_exp = force_reg (counter_mode (count_exp), count_exp);
18449 gcc_assert (desired_align >= 1 && align >= 1);
18451 /* Ensure that alignment prologue won't copy past end of block. */
18452 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18454 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18455 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18456 Make sure it is power of 2. */
18457 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18461 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18463 /* If main algorithm works on QImode, no epilogue is needed.
18464 For small sizes just don't align anything. */
18465 if (size_needed == 1)
18466 desired_align = align;
18473 label = gen_label_rtx ();
18474 emit_cmp_and_jump_insns (count_exp,
18475 GEN_INT (epilogue_size_needed),
18476 LTU, 0, counter_mode (count_exp), 1, label);
18477 if (expected_size == -1 || expected_size < epilogue_size_needed)
18478 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18480 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18484 /* Emit code to decide on runtime whether library call or inline should be
18486 if (dynamic_check != -1)
18488 if (CONST_INT_P (count_exp))
18490 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18492 emit_block_move_via_libcall (dst, src, count_exp, false);
18493 count_exp = const0_rtx;
18499 rtx hot_label = gen_label_rtx ();
18500 jump_around_label = gen_label_rtx ();
18501 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18502 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18503 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18504 emit_block_move_via_libcall (dst, src, count_exp, false);
18505 emit_jump (jump_around_label);
18506 emit_label (hot_label);
18510 /* Step 2: Alignment prologue. */
18512 if (desired_align > align)
18514 if (align_bytes == 0)
18516 /* Except for the first move in epilogue, we no longer know
18517 constant offset in aliasing info. It don't seems to worth
18518 the pain to maintain it for the first move, so throw away
18520 src = change_address (src, BLKmode, srcreg);
18521 dst = change_address (dst, BLKmode, destreg);
18522 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18527 /* If we know how many bytes need to be stored before dst is
18528 sufficiently aligned, maintain aliasing info accurately. */
18529 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18530 desired_align, align_bytes);
18531 count_exp = plus_constant (count_exp, -align_bytes);
18532 count -= align_bytes;
18534 if (need_zero_guard
18535 && (count < (unsigned HOST_WIDE_INT) size_needed
18536 || (align_bytes == 0
18537 && count < ((unsigned HOST_WIDE_INT) size_needed
18538 + desired_align - align))))
18540 /* It is possible that we copied enough so the main loop will not
18542 gcc_assert (size_needed > 1);
18543 if (label == NULL_RTX)
18544 label = gen_label_rtx ();
18545 emit_cmp_and_jump_insns (count_exp,
18546 GEN_INT (size_needed),
18547 LTU, 0, counter_mode (count_exp), 1, label);
18548 if (expected_size == -1
18549 || expected_size < (desired_align - align) / 2 + size_needed)
18550 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18552 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18555 if (label && size_needed == 1)
18557 emit_label (label);
18558 LABEL_NUSES (label) = 1;
18560 epilogue_size_needed = 1;
18562 else if (label == NULL_RTX)
18563 epilogue_size_needed = size_needed;
18565 /* Step 3: Main loop. */
18571 gcc_unreachable ();
18573 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18574 count_exp, QImode, 1, expected_size);
18577 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18578 count_exp, Pmode, 1, expected_size);
18580 case unrolled_loop:
18581 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18582 registers for 4 temporaries anyway. */
18583 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18584 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18587 case rep_prefix_8_byte:
18588 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18591 case rep_prefix_4_byte:
18592 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18595 case rep_prefix_1_byte:
18596 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18600 /* Adjust properly the offset of src and dest memory for aliasing. */
18601 if (CONST_INT_P (count_exp))
18603 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18604 (count / size_needed) * size_needed);
18605 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18606 (count / size_needed) * size_needed);
18610 src = change_address (src, BLKmode, srcreg);
18611 dst = change_address (dst, BLKmode, destreg);
18614 /* Step 4: Epilogue to copy the remaining bytes. */
18618 /* When the main loop is done, COUNT_EXP might hold original count,
18619 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18620 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18621 bytes. Compensate if needed. */
18623 if (size_needed < epilogue_size_needed)
18626 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18627 GEN_INT (size_needed - 1), count_exp, 1,
18629 if (tmp != count_exp)
18630 emit_move_insn (count_exp, tmp);
18632 emit_label (label);
18633 LABEL_NUSES (label) = 1;
18636 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18637 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18638 epilogue_size_needed);
18639 if (jump_around_label)
18640 emit_label (jump_around_label);
18644 /* Helper function for memcpy. For QImode value 0xXY produce
18645 0xXYXYXYXY of wide specified by MODE. This is essentially
18646 a * 0x10101010, but we can do slightly better than
18647 synth_mult by unwinding the sequence by hand on CPUs with
18650 promote_duplicated_reg (enum machine_mode mode, rtx val)
18652 enum machine_mode valmode = GET_MODE (val);
18654 int nops = mode == DImode ? 3 : 2;
18656 gcc_assert (mode == SImode || mode == DImode);
18657 if (val == const0_rtx)
18658 return copy_to_mode_reg (mode, const0_rtx);
18659 if (CONST_INT_P (val))
18661 HOST_WIDE_INT v = INTVAL (val) & 255;
18665 if (mode == DImode)
18666 v |= (v << 16) << 16;
18667 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18670 if (valmode == VOIDmode)
18672 if (valmode != QImode)
18673 val = gen_lowpart (QImode, val);
18674 if (mode == QImode)
18676 if (!TARGET_PARTIAL_REG_STALL)
18678 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18679 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18680 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18681 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18683 rtx reg = convert_modes (mode, QImode, val, true);
18684 tmp = promote_duplicated_reg (mode, const1_rtx);
18685 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18690 rtx reg = convert_modes (mode, QImode, val, true);
18692 if (!TARGET_PARTIAL_REG_STALL)
18693 if (mode == SImode)
18694 emit_insn (gen_movsi_insv_1 (reg, reg));
18696 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18699 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18700 NULL, 1, OPTAB_DIRECT);
18702 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18704 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18705 NULL, 1, OPTAB_DIRECT);
18706 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18707 if (mode == SImode)
18709 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18710 NULL, 1, OPTAB_DIRECT);
18711 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18716 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18717 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18718 alignment from ALIGN to DESIRED_ALIGN. */
18720 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18725 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18726 promoted_val = promote_duplicated_reg (DImode, val);
18727 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18728 promoted_val = promote_duplicated_reg (SImode, val);
18729 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18730 promoted_val = promote_duplicated_reg (HImode, val);
18732 promoted_val = val;
18734 return promoted_val;
18737 /* Expand string clear operation (bzero). Use i386 string operations when
18738 profitable. See expand_movmem comment for explanation of individual
18739 steps performed. */
18741 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18742 rtx expected_align_exp, rtx expected_size_exp)
18747 rtx jump_around_label = NULL;
18748 HOST_WIDE_INT align = 1;
18749 unsigned HOST_WIDE_INT count = 0;
18750 HOST_WIDE_INT expected_size = -1;
18751 int size_needed = 0, epilogue_size_needed;
18752 int desired_align = 0, align_bytes = 0;
18753 enum stringop_alg alg;
18754 rtx promoted_val = NULL;
18755 bool force_loopy_epilogue = false;
18757 bool need_zero_guard = false;
18759 if (CONST_INT_P (align_exp))
18760 align = INTVAL (align_exp);
18761 /* i386 can do misaligned access on reasonably increased cost. */
18762 if (CONST_INT_P (expected_align_exp)
18763 && INTVAL (expected_align_exp) > align)
18764 align = INTVAL (expected_align_exp);
18765 if (CONST_INT_P (count_exp))
18766 count = expected_size = INTVAL (count_exp);
18767 if (CONST_INT_P (expected_size_exp) && count == 0)
18768 expected_size = INTVAL (expected_size_exp);
18770 /* Make sure we don't need to care about overflow later on. */
18771 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18774 /* Step 0: Decide on preferred algorithm, desired alignment and
18775 size of chunks to be copied by main loop. */
18777 alg = decide_alg (count, expected_size, true, &dynamic_check);
18778 desired_align = decide_alignment (align, alg, expected_size);
18780 if (!TARGET_ALIGN_STRINGOPS)
18781 align = desired_align;
18783 if (alg == libcall)
18785 gcc_assert (alg != no_stringop);
18787 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18788 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18793 gcc_unreachable ();
18795 need_zero_guard = true;
18796 size_needed = GET_MODE_SIZE (Pmode);
18798 case unrolled_loop:
18799 need_zero_guard = true;
18800 size_needed = GET_MODE_SIZE (Pmode) * 4;
18802 case rep_prefix_8_byte:
18805 case rep_prefix_4_byte:
18808 case rep_prefix_1_byte:
18812 need_zero_guard = true;
18816 epilogue_size_needed = size_needed;
18818 /* Step 1: Prologue guard. */
18820 /* Alignment code needs count to be in register. */
18821 if (CONST_INT_P (count_exp) && desired_align > align)
18823 if (INTVAL (count_exp) > desired_align
18824 && INTVAL (count_exp) > size_needed)
18827 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18828 if (align_bytes <= 0)
18831 align_bytes = desired_align - align_bytes;
18833 if (align_bytes == 0)
18835 enum machine_mode mode = SImode;
18836 if (TARGET_64BIT && (count & ~0xffffffff))
18838 count_exp = force_reg (mode, count_exp);
18841 /* Do the cheap promotion to allow better CSE across the
18842 main loop and epilogue (ie one load of the big constant in the
18843 front of all code. */
18844 if (CONST_INT_P (val_exp))
18845 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18846 desired_align, align);
18847 /* Ensure that alignment prologue won't copy past end of block. */
18848 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18850 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18851 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18852 Make sure it is power of 2. */
18853 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18855 /* To improve performance of small blocks, we jump around the VAL
18856 promoting mode. This mean that if the promoted VAL is not constant,
18857 we might not use it in the epilogue and have to use byte
18859 if (epilogue_size_needed > 2 && !promoted_val)
18860 force_loopy_epilogue = true;
18863 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18865 /* If main algorithm works on QImode, no epilogue is needed.
18866 For small sizes just don't align anything. */
18867 if (size_needed == 1)
18868 desired_align = align;
18875 label = gen_label_rtx ();
18876 emit_cmp_and_jump_insns (count_exp,
18877 GEN_INT (epilogue_size_needed),
18878 LTU, 0, counter_mode (count_exp), 1, label);
18879 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18880 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18882 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18885 if (dynamic_check != -1)
18887 rtx hot_label = gen_label_rtx ();
18888 jump_around_label = gen_label_rtx ();
18889 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18890 LEU, 0, counter_mode (count_exp), 1, hot_label);
18891 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18892 set_storage_via_libcall (dst, count_exp, val_exp, false);
18893 emit_jump (jump_around_label);
18894 emit_label (hot_label);
18897 /* Step 2: Alignment prologue. */
18899 /* Do the expensive promotion once we branched off the small blocks. */
18901 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18902 desired_align, align);
18903 gcc_assert (desired_align >= 1 && align >= 1);
18905 if (desired_align > align)
18907 if (align_bytes == 0)
18909 /* Except for the first move in epilogue, we no longer know
18910 constant offset in aliasing info. It don't seems to worth
18911 the pain to maintain it for the first move, so throw away
18913 dst = change_address (dst, BLKmode, destreg);
18914 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18919 /* If we know how many bytes need to be stored before dst is
18920 sufficiently aligned, maintain aliasing info accurately. */
18921 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18922 desired_align, align_bytes);
18923 count_exp = plus_constant (count_exp, -align_bytes);
18924 count -= align_bytes;
18926 if (need_zero_guard
18927 && (count < (unsigned HOST_WIDE_INT) size_needed
18928 || (align_bytes == 0
18929 && count < ((unsigned HOST_WIDE_INT) size_needed
18930 + desired_align - align))))
18932 /* It is possible that we copied enough so the main loop will not
18934 gcc_assert (size_needed > 1);
18935 if (label == NULL_RTX)
18936 label = gen_label_rtx ();
18937 emit_cmp_and_jump_insns (count_exp,
18938 GEN_INT (size_needed),
18939 LTU, 0, counter_mode (count_exp), 1, label);
18940 if (expected_size == -1
18941 || expected_size < (desired_align - align) / 2 + size_needed)
18942 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18944 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18947 if (label && size_needed == 1)
18949 emit_label (label);
18950 LABEL_NUSES (label) = 1;
18952 promoted_val = val_exp;
18953 epilogue_size_needed = 1;
18955 else if (label == NULL_RTX)
18956 epilogue_size_needed = size_needed;
18958 /* Step 3: Main loop. */
18964 gcc_unreachable ();
18966 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18967 count_exp, QImode, 1, expected_size);
18970 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18971 count_exp, Pmode, 1, expected_size);
18973 case unrolled_loop:
18974 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18975 count_exp, Pmode, 4, expected_size);
18977 case rep_prefix_8_byte:
18978 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18981 case rep_prefix_4_byte:
18982 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18985 case rep_prefix_1_byte:
18986 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18990 /* Adjust properly the offset of src and dest memory for aliasing. */
18991 if (CONST_INT_P (count_exp))
18992 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18993 (count / size_needed) * size_needed);
18995 dst = change_address (dst, BLKmode, destreg);
18997 /* Step 4: Epilogue to copy the remaining bytes. */
19001 /* When the main loop is done, COUNT_EXP might hold original count,
19002 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19003 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19004 bytes. Compensate if needed. */
19006 if (size_needed < epilogue_size_needed)
19009 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19010 GEN_INT (size_needed - 1), count_exp, 1,
19012 if (tmp != count_exp)
19013 emit_move_insn (count_exp, tmp);
19015 emit_label (label);
19016 LABEL_NUSES (label) = 1;
19019 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19021 if (force_loopy_epilogue)
19022 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19023 epilogue_size_needed);
19025 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19026 epilogue_size_needed);
19028 if (jump_around_label)
19029 emit_label (jump_around_label);
19033 /* Expand the appropriate insns for doing strlen if not just doing
19036 out = result, initialized with the start address
19037 align_rtx = alignment of the address.
19038 scratch = scratch register, initialized with the startaddress when
19039 not aligned, otherwise undefined
19041 This is just the body. It needs the initializations mentioned above and
19042 some address computing at the end. These things are done in i386.md. */
19045 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19049 rtx align_2_label = NULL_RTX;
19050 rtx align_3_label = NULL_RTX;
19051 rtx align_4_label = gen_label_rtx ();
19052 rtx end_0_label = gen_label_rtx ();
19054 rtx tmpreg = gen_reg_rtx (SImode);
19055 rtx scratch = gen_reg_rtx (SImode);
19059 if (CONST_INT_P (align_rtx))
19060 align = INTVAL (align_rtx);
19062 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19064 /* Is there a known alignment and is it less than 4? */
19067 rtx scratch1 = gen_reg_rtx (Pmode);
19068 emit_move_insn (scratch1, out);
19069 /* Is there a known alignment and is it not 2? */
19072 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19073 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19075 /* Leave just the 3 lower bits. */
19076 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19077 NULL_RTX, 0, OPTAB_WIDEN);
19079 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19080 Pmode, 1, align_4_label);
19081 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19082 Pmode, 1, align_2_label);
19083 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19084 Pmode, 1, align_3_label);
19088 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19089 check if is aligned to 4 - byte. */
19091 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19092 NULL_RTX, 0, OPTAB_WIDEN);
19094 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19095 Pmode, 1, align_4_label);
19098 mem = change_address (src, QImode, out);
19100 /* Now compare the bytes. */
19102 /* Compare the first n unaligned byte on a byte per byte basis. */
19103 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19104 QImode, 1, end_0_label);
19106 /* Increment the address. */
19107 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19109 /* Not needed with an alignment of 2 */
19112 emit_label (align_2_label);
19114 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19117 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19119 emit_label (align_3_label);
19122 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19125 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19128 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19129 align this loop. It gives only huge programs, but does not help to
19131 emit_label (align_4_label);
19133 mem = change_address (src, SImode, out);
19134 emit_move_insn (scratch, mem);
19135 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19137 /* This formula yields a nonzero result iff one of the bytes is zero.
19138 This saves three branches inside loop and many cycles. */
19140 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19141 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19142 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19143 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19144 gen_int_mode (0x80808080, SImode)));
19145 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19150 rtx reg = gen_reg_rtx (SImode);
19151 rtx reg2 = gen_reg_rtx (Pmode);
19152 emit_move_insn (reg, tmpreg);
19153 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19155 /* If zero is not in the first two bytes, move two bytes forward. */
19156 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19157 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19158 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19159 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19160 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19163 /* Emit lea manually to avoid clobbering of flags. */
19164 emit_insn (gen_rtx_SET (SImode, reg2,
19165 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19167 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19168 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19169 emit_insn (gen_rtx_SET (VOIDmode, out,
19170 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19177 rtx end_2_label = gen_label_rtx ();
19178 /* Is zero in the first two bytes? */
19180 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19181 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19182 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19183 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19184 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19186 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19187 JUMP_LABEL (tmp) = end_2_label;
19189 /* Not in the first two. Move two bytes forward. */
19190 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19191 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19193 emit_label (end_2_label);
19197 /* Avoid branch in fixing the byte. */
19198 tmpreg = gen_lowpart (QImode, tmpreg);
19199 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19200 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19201 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19203 emit_label (end_0_label);
19206 /* Expand strlen. */
19209 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19211 rtx addr, scratch1, scratch2, scratch3, scratch4;
19213 /* The generic case of strlen expander is long. Avoid it's
19214 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19216 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19217 && !TARGET_INLINE_ALL_STRINGOPS
19218 && !optimize_insn_for_size_p ()
19219 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19222 addr = force_reg (Pmode, XEXP (src, 0));
19223 scratch1 = gen_reg_rtx (Pmode);
19225 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19226 && !optimize_insn_for_size_p ())
19228 /* Well it seems that some optimizer does not combine a call like
19229 foo(strlen(bar), strlen(bar));
19230 when the move and the subtraction is done here. It does calculate
19231 the length just once when these instructions are done inside of
19232 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19233 often used and I use one fewer register for the lifetime of
19234 output_strlen_unroll() this is better. */
19236 emit_move_insn (out, addr);
19238 ix86_expand_strlensi_unroll_1 (out, src, align);
19240 /* strlensi_unroll_1 returns the address of the zero at the end of
19241 the string, like memchr(), so compute the length by subtracting
19242 the start address. */
19243 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19249 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19250 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19253 scratch2 = gen_reg_rtx (Pmode);
19254 scratch3 = gen_reg_rtx (Pmode);
19255 scratch4 = force_reg (Pmode, constm1_rtx);
19257 emit_move_insn (scratch3, addr);
19258 eoschar = force_reg (QImode, eoschar);
19260 src = replace_equiv_address_nv (src, scratch3);
19262 /* If .md starts supporting :P, this can be done in .md. */
19263 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19264 scratch4), UNSPEC_SCAS);
19265 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19266 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19267 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19272 /* For given symbol (function) construct code to compute address of it's PLT
19273 entry in large x86-64 PIC model. */
19275 construct_plt_address (rtx symbol)
19277 rtx tmp = gen_reg_rtx (Pmode);
19278 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19280 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19281 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19283 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19284 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19289 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19291 rtx pop, int sibcall)
19293 rtx use = NULL, call;
19295 if (pop == const0_rtx)
19297 gcc_assert (!TARGET_64BIT || !pop);
19299 if (TARGET_MACHO && !TARGET_64BIT)
19302 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19303 fnaddr = machopic_indirect_call_target (fnaddr);
19308 /* Static functions and indirect calls don't need the pic register. */
19309 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19310 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19311 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19312 use_reg (&use, pic_offset_table_rtx);
19315 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19317 rtx al = gen_rtx_REG (QImode, AX_REG);
19318 emit_move_insn (al, callarg2);
19319 use_reg (&use, al);
19322 if (ix86_cmodel == CM_LARGE_PIC
19324 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19325 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19326 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19327 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19329 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19330 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19332 if (sibcall && TARGET_64BIT
19333 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19336 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19337 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19338 emit_move_insn (fnaddr, addr);
19339 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19342 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19344 call = gen_rtx_SET (VOIDmode, retval, call);
19347 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19348 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19349 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19352 && ix86_cfun_abi () == MS_ABI
19353 && (!callarg2 || INTVAL (callarg2) != -2))
19355 /* We need to represent that SI and DI registers are clobbered
19357 static int clobbered_registers[] = {
19358 XMM6_REG, XMM7_REG, XMM8_REG,
19359 XMM9_REG, XMM10_REG, XMM11_REG,
19360 XMM12_REG, XMM13_REG, XMM14_REG,
19361 XMM15_REG, SI_REG, DI_REG
19364 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19365 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19366 UNSPEC_MS_TO_SYSV_CALL);
19370 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19371 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19374 (SSE_REGNO_P (clobbered_registers[i])
19376 clobbered_registers[i]));
19378 call = gen_rtx_PARALLEL (VOIDmode,
19379 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19383 call = emit_call_insn (call);
19385 CALL_INSN_FUNCTION_USAGE (call) = use;
19389 /* Clear stack slot assignments remembered from previous functions.
19390 This is called from INIT_EXPANDERS once before RTL is emitted for each
19393 static struct machine_function *
19394 ix86_init_machine_status (void)
19396 struct machine_function *f;
19398 f = GGC_CNEW (struct machine_function);
19399 f->use_fast_prologue_epilogue_nregs = -1;
19400 f->tls_descriptor_call_expanded_p = 0;
19401 f->call_abi = ix86_abi;
19406 /* Return a MEM corresponding to a stack slot with mode MODE.
19407 Allocate a new slot if necessary.
19409 The RTL for a function can have several slots available: N is
19410 which slot to use. */
19413 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19415 struct stack_local_entry *s;
19417 gcc_assert (n < MAX_386_STACK_LOCALS);
19419 /* Virtual slot is valid only before vregs are instantiated. */
19420 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19422 for (s = ix86_stack_locals; s; s = s->next)
19423 if (s->mode == mode && s->n == n)
19424 return copy_rtx (s->rtl);
19426 s = (struct stack_local_entry *)
19427 ggc_alloc (sizeof (struct stack_local_entry));
19430 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19432 s->next = ix86_stack_locals;
19433 ix86_stack_locals = s;
19437 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19439 static GTY(()) rtx ix86_tls_symbol;
19441 ix86_tls_get_addr (void)
19444 if (!ix86_tls_symbol)
19446 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19447 (TARGET_ANY_GNU_TLS
19449 ? "___tls_get_addr"
19450 : "__tls_get_addr");
19453 return ix86_tls_symbol;
19456 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19458 static GTY(()) rtx ix86_tls_module_base_symbol;
19460 ix86_tls_module_base (void)
19463 if (!ix86_tls_module_base_symbol)
19465 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19466 "_TLS_MODULE_BASE_");
19467 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19468 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19471 return ix86_tls_module_base_symbol;
19474 /* Calculate the length of the memory address in the instruction
19475 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19478 memory_address_length (rtx addr)
19480 struct ix86_address parts;
19481 rtx base, index, disp;
19485 if (GET_CODE (addr) == PRE_DEC
19486 || GET_CODE (addr) == POST_INC
19487 || GET_CODE (addr) == PRE_MODIFY
19488 || GET_CODE (addr) == POST_MODIFY)
19491 ok = ix86_decompose_address (addr, &parts);
19494 if (parts.base && GET_CODE (parts.base) == SUBREG)
19495 parts.base = SUBREG_REG (parts.base);
19496 if (parts.index && GET_CODE (parts.index) == SUBREG)
19497 parts.index = SUBREG_REG (parts.index);
19500 index = parts.index;
19505 - esp as the base always wants an index,
19506 - ebp as the base always wants a displacement,
19507 - r12 as the base always wants an index,
19508 - r13 as the base always wants a displacement. */
19510 /* Register Indirect. */
19511 if (base && !index && !disp)
19513 /* esp (for its index) and ebp (for its displacement) need
19514 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19517 && (addr == arg_pointer_rtx
19518 || addr == frame_pointer_rtx
19519 || REGNO (addr) == SP_REG
19520 || REGNO (addr) == BP_REG
19521 || REGNO (addr) == R12_REG
19522 || REGNO (addr) == R13_REG))
19526 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19527 is not disp32, but disp32(%rip), so for disp32
19528 SIB byte is needed, unless print_operand_address
19529 optimizes it into disp32(%rip) or (%rip) is implied
19531 else if (disp && !base && !index)
19538 if (GET_CODE (disp) == CONST)
19539 symbol = XEXP (disp, 0);
19540 if (GET_CODE (symbol) == PLUS
19541 && CONST_INT_P (XEXP (symbol, 1)))
19542 symbol = XEXP (symbol, 0);
19544 if (GET_CODE (symbol) != LABEL_REF
19545 && (GET_CODE (symbol) != SYMBOL_REF
19546 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19547 && (GET_CODE (symbol) != UNSPEC
19548 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19549 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19556 /* Find the length of the displacement constant. */
19559 if (base && satisfies_constraint_K (disp))
19564 /* ebp always wants a displacement. Similarly r13. */
19565 else if (REG_P (base)
19566 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19569 /* An index requires the two-byte modrm form.... */
19571 /* ...like esp (or r12), which always wants an index. */
19572 || base == arg_pointer_rtx
19573 || base == frame_pointer_rtx
19575 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19592 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19593 is set, expect that insn have 8bit immediate alternative. */
19595 ix86_attr_length_immediate_default (rtx insn, int shortform)
19599 extract_insn_cached (insn);
19600 for (i = recog_data.n_operands - 1; i >= 0; --i)
19601 if (CONSTANT_P (recog_data.operand[i]))
19603 enum attr_mode mode = get_attr_mode (insn);
19606 if (shortform && CONST_INT_P (recog_data.operand[i]))
19608 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19615 ival = trunc_int_for_mode (ival, HImode);
19618 ival = trunc_int_for_mode (ival, SImode);
19623 if (IN_RANGE (ival, -128, 127))
19640 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19645 fatal_insn ("unknown insn mode", insn);
19650 /* Compute default value for "length_address" attribute. */
19652 ix86_attr_length_address_default (rtx insn)
19656 if (get_attr_type (insn) == TYPE_LEA)
19658 rtx set = PATTERN (insn), addr;
19660 if (GET_CODE (set) == PARALLEL)
19661 set = XVECEXP (set, 0, 0);
19663 gcc_assert (GET_CODE (set) == SET);
19665 addr = SET_SRC (set);
19666 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19668 if (GET_CODE (addr) == ZERO_EXTEND)
19669 addr = XEXP (addr, 0);
19670 if (GET_CODE (addr) == SUBREG)
19671 addr = SUBREG_REG (addr);
19674 return memory_address_length (addr);
19677 extract_insn_cached (insn);
19678 for (i = recog_data.n_operands - 1; i >= 0; --i)
19679 if (MEM_P (recog_data.operand[i]))
19681 constrain_operands_cached (reload_completed);
19682 if (which_alternative != -1)
19684 const char *constraints = recog_data.constraints[i];
19685 int alt = which_alternative;
19687 while (*constraints == '=' || *constraints == '+')
19690 while (*constraints++ != ',')
19692 /* Skip ignored operands. */
19693 if (*constraints == 'X')
19696 return memory_address_length (XEXP (recog_data.operand[i], 0));
19701 /* Compute default value for "length_vex" attribute. It includes
19702 2 or 3 byte VEX prefix and 1 opcode byte. */
19705 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19710 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19711 byte VEX prefix. */
19712 if (!has_0f_opcode || has_vex_w)
19715 /* We can always use 2 byte VEX prefix in 32bit. */
19719 extract_insn_cached (insn);
19721 for (i = recog_data.n_operands - 1; i >= 0; --i)
19722 if (REG_P (recog_data.operand[i]))
19724 /* REX.W bit uses 3 byte VEX prefix. */
19725 if (GET_MODE (recog_data.operand[i]) == DImode
19726 && GENERAL_REG_P (recog_data.operand[i]))
19731 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19732 if (MEM_P (recog_data.operand[i])
19733 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19740 /* Return the maximum number of instructions a cpu can issue. */
19743 ix86_issue_rate (void)
19747 case PROCESSOR_PENTIUM:
19748 case PROCESSOR_ATOM:
19752 case PROCESSOR_PENTIUMPRO:
19753 case PROCESSOR_PENTIUM4:
19754 case PROCESSOR_ATHLON:
19756 case PROCESSOR_AMDFAM10:
19757 case PROCESSOR_NOCONA:
19758 case PROCESSOR_GENERIC32:
19759 case PROCESSOR_GENERIC64:
19762 case PROCESSOR_CORE2:
19770 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19771 by DEP_INSN and nothing set by DEP_INSN. */
19774 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19778 /* Simplify the test for uninteresting insns. */
19779 if (insn_type != TYPE_SETCC
19780 && insn_type != TYPE_ICMOV
19781 && insn_type != TYPE_FCMOV
19782 && insn_type != TYPE_IBR)
19785 if ((set = single_set (dep_insn)) != 0)
19787 set = SET_DEST (set);
19790 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19791 && XVECLEN (PATTERN (dep_insn), 0) == 2
19792 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19793 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19795 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19796 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19801 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19804 /* This test is true if the dependent insn reads the flags but
19805 not any other potentially set register. */
19806 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19809 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19815 /* Return true iff USE_INSN has a memory address with operands set by
19819 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19822 extract_insn_cached (use_insn);
19823 for (i = recog_data.n_operands - 1; i >= 0; --i)
19824 if (MEM_P (recog_data.operand[i]))
19826 rtx addr = XEXP (recog_data.operand[i], 0);
19827 return modified_in_p (addr, set_insn) != 0;
19833 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19835 enum attr_type insn_type, dep_insn_type;
19836 enum attr_memory memory;
19838 int dep_insn_code_number;
19840 /* Anti and output dependencies have zero cost on all CPUs. */
19841 if (REG_NOTE_KIND (link) != 0)
19844 dep_insn_code_number = recog_memoized (dep_insn);
19846 /* If we can't recognize the insns, we can't really do anything. */
19847 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19850 insn_type = get_attr_type (insn);
19851 dep_insn_type = get_attr_type (dep_insn);
19855 case PROCESSOR_PENTIUM:
19856 /* Address Generation Interlock adds a cycle of latency. */
19857 if (insn_type == TYPE_LEA)
19859 rtx addr = PATTERN (insn);
19861 if (GET_CODE (addr) == PARALLEL)
19862 addr = XVECEXP (addr, 0, 0);
19864 gcc_assert (GET_CODE (addr) == SET);
19866 addr = SET_SRC (addr);
19867 if (modified_in_p (addr, dep_insn))
19870 else if (ix86_agi_dependent (dep_insn, insn))
19873 /* ??? Compares pair with jump/setcc. */
19874 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19877 /* Floating point stores require value to be ready one cycle earlier. */
19878 if (insn_type == TYPE_FMOV
19879 && get_attr_memory (insn) == MEMORY_STORE
19880 && !ix86_agi_dependent (dep_insn, insn))
19884 case PROCESSOR_PENTIUMPRO:
19885 memory = get_attr_memory (insn);
19887 /* INT->FP conversion is expensive. */
19888 if (get_attr_fp_int_src (dep_insn))
19891 /* There is one cycle extra latency between an FP op and a store. */
19892 if (insn_type == TYPE_FMOV
19893 && (set = single_set (dep_insn)) != NULL_RTX
19894 && (set2 = single_set (insn)) != NULL_RTX
19895 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19896 && MEM_P (SET_DEST (set2)))
19899 /* Show ability of reorder buffer to hide latency of load by executing
19900 in parallel with previous instruction in case
19901 previous instruction is not needed to compute the address. */
19902 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19903 && !ix86_agi_dependent (dep_insn, insn))
19905 /* Claim moves to take one cycle, as core can issue one load
19906 at time and the next load can start cycle later. */
19907 if (dep_insn_type == TYPE_IMOV
19908 || dep_insn_type == TYPE_FMOV)
19916 memory = get_attr_memory (insn);
19918 /* The esp dependency is resolved before the instruction is really
19920 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19921 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19924 /* INT->FP conversion is expensive. */
19925 if (get_attr_fp_int_src (dep_insn))
19928 /* Show ability of reorder buffer to hide latency of load by executing
19929 in parallel with previous instruction in case
19930 previous instruction is not needed to compute the address. */
19931 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19932 && !ix86_agi_dependent (dep_insn, insn))
19934 /* Claim moves to take one cycle, as core can issue one load
19935 at time and the next load can start cycle later. */
19936 if (dep_insn_type == TYPE_IMOV
19937 || dep_insn_type == TYPE_FMOV)
19946 case PROCESSOR_ATHLON:
19948 case PROCESSOR_AMDFAM10:
19949 case PROCESSOR_ATOM:
19950 case PROCESSOR_GENERIC32:
19951 case PROCESSOR_GENERIC64:
19952 memory = get_attr_memory (insn);
19954 /* Show ability of reorder buffer to hide latency of load by executing
19955 in parallel with previous instruction in case
19956 previous instruction is not needed to compute the address. */
19957 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19958 && !ix86_agi_dependent (dep_insn, insn))
19960 enum attr_unit unit = get_attr_unit (insn);
19963 /* Because of the difference between the length of integer and
19964 floating unit pipeline preparation stages, the memory operands
19965 for floating point are cheaper.
19967 ??? For Athlon it the difference is most probably 2. */
19968 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19971 loadcost = TARGET_ATHLON ? 2 : 0;
19973 if (cost >= loadcost)
19986 /* How many alternative schedules to try. This should be as wide as the
19987 scheduling freedom in the DFA, but no wider. Making this value too
19988 large results extra work for the scheduler. */
19991 ia32_multipass_dfa_lookahead (void)
19995 case PROCESSOR_PENTIUM:
19998 case PROCESSOR_PENTIUMPRO:
20008 /* Compute the alignment given to a constant that is being placed in memory.
20009 EXP is the constant and ALIGN is the alignment that the object would
20011 The value of this function is used instead of that alignment to align
20015 ix86_constant_alignment (tree exp, int align)
20017 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20018 || TREE_CODE (exp) == INTEGER_CST)
20020 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20022 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20025 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20026 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20027 return BITS_PER_WORD;
20032 /* Compute the alignment for a static variable.
20033 TYPE is the data type, and ALIGN is the alignment that
20034 the object would ordinarily have. The value of this function is used
20035 instead of that alignment to align the object. */
20038 ix86_data_alignment (tree type, int align)
20040 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20042 if (AGGREGATE_TYPE_P (type)
20043 && TYPE_SIZE (type)
20044 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20045 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20046 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20047 && align < max_align)
20050 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20051 to 16byte boundary. */
20054 if (AGGREGATE_TYPE_P (type)
20055 && TYPE_SIZE (type)
20056 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20057 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20058 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20062 if (TREE_CODE (type) == ARRAY_TYPE)
20064 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20066 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20069 else if (TREE_CODE (type) == COMPLEX_TYPE)
20072 if (TYPE_MODE (type) == DCmode && align < 64)
20074 if ((TYPE_MODE (type) == XCmode
20075 || TYPE_MODE (type) == TCmode) && align < 128)
20078 else if ((TREE_CODE (type) == RECORD_TYPE
20079 || TREE_CODE (type) == UNION_TYPE
20080 || TREE_CODE (type) == QUAL_UNION_TYPE)
20081 && TYPE_FIELDS (type))
20083 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20085 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20088 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20089 || TREE_CODE (type) == INTEGER_TYPE)
20091 if (TYPE_MODE (type) == DFmode && align < 64)
20093 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20100 /* Compute the alignment for a local variable or a stack slot. EXP is
20101 the data type or decl itself, MODE is the widest mode available and
20102 ALIGN is the alignment that the object would ordinarily have. The
20103 value of this macro is used instead of that alignment to align the
20107 ix86_local_alignment (tree exp, enum machine_mode mode,
20108 unsigned int align)
20112 if (exp && DECL_P (exp))
20114 type = TREE_TYPE (exp);
20123 /* Don't do dynamic stack realignment for long long objects with
20124 -mpreferred-stack-boundary=2. */
20127 && ix86_preferred_stack_boundary < 64
20128 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20129 && (!type || !TYPE_USER_ALIGN (type))
20130 && (!decl || !DECL_USER_ALIGN (decl)))
20133 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20134 register in MODE. We will return the largest alignment of XF
20138 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20139 align = GET_MODE_ALIGNMENT (DFmode);
20143 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20144 to 16byte boundary. */
20147 if (AGGREGATE_TYPE_P (type)
20148 && TYPE_SIZE (type)
20149 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20150 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20151 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20154 if (TREE_CODE (type) == ARRAY_TYPE)
20156 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20158 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20161 else if (TREE_CODE (type) == COMPLEX_TYPE)
20163 if (TYPE_MODE (type) == DCmode && align < 64)
20165 if ((TYPE_MODE (type) == XCmode
20166 || TYPE_MODE (type) == TCmode) && align < 128)
20169 else if ((TREE_CODE (type) == RECORD_TYPE
20170 || TREE_CODE (type) == UNION_TYPE
20171 || TREE_CODE (type) == QUAL_UNION_TYPE)
20172 && TYPE_FIELDS (type))
20174 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20176 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20179 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20180 || TREE_CODE (type) == INTEGER_TYPE)
20183 if (TYPE_MODE (type) == DFmode && align < 64)
20185 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20191 /* Emit RTL insns to initialize the variable parts of a trampoline.
20192 FNADDR is an RTX for the address of the function's pure code.
20193 CXT is an RTX for the static chain value for the function. */
20195 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20199 /* Compute offset from the end of the jmp to the target function. */
20200 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20201 plus_constant (tramp, 10),
20202 NULL_RTX, 1, OPTAB_DIRECT);
20203 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20204 gen_int_mode (0xb9, QImode));
20205 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20206 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20207 gen_int_mode (0xe9, QImode));
20208 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20213 /* Try to load address using shorter movl instead of movabs.
20214 We may want to support movq for kernel mode, but kernel does not use
20215 trampolines at the moment. */
20216 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20218 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20219 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20220 gen_int_mode (0xbb41, HImode));
20221 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20222 gen_lowpart (SImode, fnaddr));
20227 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20228 gen_int_mode (0xbb49, HImode));
20229 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20233 /* Load static chain using movabs to r10. */
20234 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20235 gen_int_mode (0xba49, HImode));
20236 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20239 /* Jump to the r11 */
20240 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20241 gen_int_mode (0xff49, HImode));
20242 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20243 gen_int_mode (0xe3, QImode));
20245 gcc_assert (offset <= TRAMPOLINE_SIZE);
20248 #ifdef ENABLE_EXECUTE_STACK
20249 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20250 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20254 /* Codes for all the SSE/MMX builtins. */
20257 IX86_BUILTIN_ADDPS,
20258 IX86_BUILTIN_ADDSS,
20259 IX86_BUILTIN_DIVPS,
20260 IX86_BUILTIN_DIVSS,
20261 IX86_BUILTIN_MULPS,
20262 IX86_BUILTIN_MULSS,
20263 IX86_BUILTIN_SUBPS,
20264 IX86_BUILTIN_SUBSS,
20266 IX86_BUILTIN_CMPEQPS,
20267 IX86_BUILTIN_CMPLTPS,
20268 IX86_BUILTIN_CMPLEPS,
20269 IX86_BUILTIN_CMPGTPS,
20270 IX86_BUILTIN_CMPGEPS,
20271 IX86_BUILTIN_CMPNEQPS,
20272 IX86_BUILTIN_CMPNLTPS,
20273 IX86_BUILTIN_CMPNLEPS,
20274 IX86_BUILTIN_CMPNGTPS,
20275 IX86_BUILTIN_CMPNGEPS,
20276 IX86_BUILTIN_CMPORDPS,
20277 IX86_BUILTIN_CMPUNORDPS,
20278 IX86_BUILTIN_CMPEQSS,
20279 IX86_BUILTIN_CMPLTSS,
20280 IX86_BUILTIN_CMPLESS,
20281 IX86_BUILTIN_CMPNEQSS,
20282 IX86_BUILTIN_CMPNLTSS,
20283 IX86_BUILTIN_CMPNLESS,
20284 IX86_BUILTIN_CMPNGTSS,
20285 IX86_BUILTIN_CMPNGESS,
20286 IX86_BUILTIN_CMPORDSS,
20287 IX86_BUILTIN_CMPUNORDSS,
20289 IX86_BUILTIN_COMIEQSS,
20290 IX86_BUILTIN_COMILTSS,
20291 IX86_BUILTIN_COMILESS,
20292 IX86_BUILTIN_COMIGTSS,
20293 IX86_BUILTIN_COMIGESS,
20294 IX86_BUILTIN_COMINEQSS,
20295 IX86_BUILTIN_UCOMIEQSS,
20296 IX86_BUILTIN_UCOMILTSS,
20297 IX86_BUILTIN_UCOMILESS,
20298 IX86_BUILTIN_UCOMIGTSS,
20299 IX86_BUILTIN_UCOMIGESS,
20300 IX86_BUILTIN_UCOMINEQSS,
20302 IX86_BUILTIN_CVTPI2PS,
20303 IX86_BUILTIN_CVTPS2PI,
20304 IX86_BUILTIN_CVTSI2SS,
20305 IX86_BUILTIN_CVTSI642SS,
20306 IX86_BUILTIN_CVTSS2SI,
20307 IX86_BUILTIN_CVTSS2SI64,
20308 IX86_BUILTIN_CVTTPS2PI,
20309 IX86_BUILTIN_CVTTSS2SI,
20310 IX86_BUILTIN_CVTTSS2SI64,
20312 IX86_BUILTIN_MAXPS,
20313 IX86_BUILTIN_MAXSS,
20314 IX86_BUILTIN_MINPS,
20315 IX86_BUILTIN_MINSS,
20317 IX86_BUILTIN_LOADUPS,
20318 IX86_BUILTIN_STOREUPS,
20319 IX86_BUILTIN_MOVSS,
20321 IX86_BUILTIN_MOVHLPS,
20322 IX86_BUILTIN_MOVLHPS,
20323 IX86_BUILTIN_LOADHPS,
20324 IX86_BUILTIN_LOADLPS,
20325 IX86_BUILTIN_STOREHPS,
20326 IX86_BUILTIN_STORELPS,
20328 IX86_BUILTIN_MASKMOVQ,
20329 IX86_BUILTIN_MOVMSKPS,
20330 IX86_BUILTIN_PMOVMSKB,
20332 IX86_BUILTIN_MOVNTPS,
20333 IX86_BUILTIN_MOVNTQ,
20335 IX86_BUILTIN_LOADDQU,
20336 IX86_BUILTIN_STOREDQU,
20338 IX86_BUILTIN_PACKSSWB,
20339 IX86_BUILTIN_PACKSSDW,
20340 IX86_BUILTIN_PACKUSWB,
20342 IX86_BUILTIN_PADDB,
20343 IX86_BUILTIN_PADDW,
20344 IX86_BUILTIN_PADDD,
20345 IX86_BUILTIN_PADDQ,
20346 IX86_BUILTIN_PADDSB,
20347 IX86_BUILTIN_PADDSW,
20348 IX86_BUILTIN_PADDUSB,
20349 IX86_BUILTIN_PADDUSW,
20350 IX86_BUILTIN_PSUBB,
20351 IX86_BUILTIN_PSUBW,
20352 IX86_BUILTIN_PSUBD,
20353 IX86_BUILTIN_PSUBQ,
20354 IX86_BUILTIN_PSUBSB,
20355 IX86_BUILTIN_PSUBSW,
20356 IX86_BUILTIN_PSUBUSB,
20357 IX86_BUILTIN_PSUBUSW,
20360 IX86_BUILTIN_PANDN,
20364 IX86_BUILTIN_PAVGB,
20365 IX86_BUILTIN_PAVGW,
20367 IX86_BUILTIN_PCMPEQB,
20368 IX86_BUILTIN_PCMPEQW,
20369 IX86_BUILTIN_PCMPEQD,
20370 IX86_BUILTIN_PCMPGTB,
20371 IX86_BUILTIN_PCMPGTW,
20372 IX86_BUILTIN_PCMPGTD,
20374 IX86_BUILTIN_PMADDWD,
20376 IX86_BUILTIN_PMAXSW,
20377 IX86_BUILTIN_PMAXUB,
20378 IX86_BUILTIN_PMINSW,
20379 IX86_BUILTIN_PMINUB,
20381 IX86_BUILTIN_PMULHUW,
20382 IX86_BUILTIN_PMULHW,
20383 IX86_BUILTIN_PMULLW,
20385 IX86_BUILTIN_PSADBW,
20386 IX86_BUILTIN_PSHUFW,
20388 IX86_BUILTIN_PSLLW,
20389 IX86_BUILTIN_PSLLD,
20390 IX86_BUILTIN_PSLLQ,
20391 IX86_BUILTIN_PSRAW,
20392 IX86_BUILTIN_PSRAD,
20393 IX86_BUILTIN_PSRLW,
20394 IX86_BUILTIN_PSRLD,
20395 IX86_BUILTIN_PSRLQ,
20396 IX86_BUILTIN_PSLLWI,
20397 IX86_BUILTIN_PSLLDI,
20398 IX86_BUILTIN_PSLLQI,
20399 IX86_BUILTIN_PSRAWI,
20400 IX86_BUILTIN_PSRADI,
20401 IX86_BUILTIN_PSRLWI,
20402 IX86_BUILTIN_PSRLDI,
20403 IX86_BUILTIN_PSRLQI,
20405 IX86_BUILTIN_PUNPCKHBW,
20406 IX86_BUILTIN_PUNPCKHWD,
20407 IX86_BUILTIN_PUNPCKHDQ,
20408 IX86_BUILTIN_PUNPCKLBW,
20409 IX86_BUILTIN_PUNPCKLWD,
20410 IX86_BUILTIN_PUNPCKLDQ,
20412 IX86_BUILTIN_SHUFPS,
20414 IX86_BUILTIN_RCPPS,
20415 IX86_BUILTIN_RCPSS,
20416 IX86_BUILTIN_RSQRTPS,
20417 IX86_BUILTIN_RSQRTPS_NR,
20418 IX86_BUILTIN_RSQRTSS,
20419 IX86_BUILTIN_RSQRTF,
20420 IX86_BUILTIN_SQRTPS,
20421 IX86_BUILTIN_SQRTPS_NR,
20422 IX86_BUILTIN_SQRTSS,
20424 IX86_BUILTIN_UNPCKHPS,
20425 IX86_BUILTIN_UNPCKLPS,
20427 IX86_BUILTIN_ANDPS,
20428 IX86_BUILTIN_ANDNPS,
20430 IX86_BUILTIN_XORPS,
20433 IX86_BUILTIN_LDMXCSR,
20434 IX86_BUILTIN_STMXCSR,
20435 IX86_BUILTIN_SFENCE,
20437 /* 3DNow! Original */
20438 IX86_BUILTIN_FEMMS,
20439 IX86_BUILTIN_PAVGUSB,
20440 IX86_BUILTIN_PF2ID,
20441 IX86_BUILTIN_PFACC,
20442 IX86_BUILTIN_PFADD,
20443 IX86_BUILTIN_PFCMPEQ,
20444 IX86_BUILTIN_PFCMPGE,
20445 IX86_BUILTIN_PFCMPGT,
20446 IX86_BUILTIN_PFMAX,
20447 IX86_BUILTIN_PFMIN,
20448 IX86_BUILTIN_PFMUL,
20449 IX86_BUILTIN_PFRCP,
20450 IX86_BUILTIN_PFRCPIT1,
20451 IX86_BUILTIN_PFRCPIT2,
20452 IX86_BUILTIN_PFRSQIT1,
20453 IX86_BUILTIN_PFRSQRT,
20454 IX86_BUILTIN_PFSUB,
20455 IX86_BUILTIN_PFSUBR,
20456 IX86_BUILTIN_PI2FD,
20457 IX86_BUILTIN_PMULHRW,
20459 /* 3DNow! Athlon Extensions */
20460 IX86_BUILTIN_PF2IW,
20461 IX86_BUILTIN_PFNACC,
20462 IX86_BUILTIN_PFPNACC,
20463 IX86_BUILTIN_PI2FW,
20464 IX86_BUILTIN_PSWAPDSI,
20465 IX86_BUILTIN_PSWAPDSF,
20468 IX86_BUILTIN_ADDPD,
20469 IX86_BUILTIN_ADDSD,
20470 IX86_BUILTIN_DIVPD,
20471 IX86_BUILTIN_DIVSD,
20472 IX86_BUILTIN_MULPD,
20473 IX86_BUILTIN_MULSD,
20474 IX86_BUILTIN_SUBPD,
20475 IX86_BUILTIN_SUBSD,
20477 IX86_BUILTIN_CMPEQPD,
20478 IX86_BUILTIN_CMPLTPD,
20479 IX86_BUILTIN_CMPLEPD,
20480 IX86_BUILTIN_CMPGTPD,
20481 IX86_BUILTIN_CMPGEPD,
20482 IX86_BUILTIN_CMPNEQPD,
20483 IX86_BUILTIN_CMPNLTPD,
20484 IX86_BUILTIN_CMPNLEPD,
20485 IX86_BUILTIN_CMPNGTPD,
20486 IX86_BUILTIN_CMPNGEPD,
20487 IX86_BUILTIN_CMPORDPD,
20488 IX86_BUILTIN_CMPUNORDPD,
20489 IX86_BUILTIN_CMPEQSD,
20490 IX86_BUILTIN_CMPLTSD,
20491 IX86_BUILTIN_CMPLESD,
20492 IX86_BUILTIN_CMPNEQSD,
20493 IX86_BUILTIN_CMPNLTSD,
20494 IX86_BUILTIN_CMPNLESD,
20495 IX86_BUILTIN_CMPORDSD,
20496 IX86_BUILTIN_CMPUNORDSD,
20498 IX86_BUILTIN_COMIEQSD,
20499 IX86_BUILTIN_COMILTSD,
20500 IX86_BUILTIN_COMILESD,
20501 IX86_BUILTIN_COMIGTSD,
20502 IX86_BUILTIN_COMIGESD,
20503 IX86_BUILTIN_COMINEQSD,
20504 IX86_BUILTIN_UCOMIEQSD,
20505 IX86_BUILTIN_UCOMILTSD,
20506 IX86_BUILTIN_UCOMILESD,
20507 IX86_BUILTIN_UCOMIGTSD,
20508 IX86_BUILTIN_UCOMIGESD,
20509 IX86_BUILTIN_UCOMINEQSD,
20511 IX86_BUILTIN_MAXPD,
20512 IX86_BUILTIN_MAXSD,
20513 IX86_BUILTIN_MINPD,
20514 IX86_BUILTIN_MINSD,
20516 IX86_BUILTIN_ANDPD,
20517 IX86_BUILTIN_ANDNPD,
20519 IX86_BUILTIN_XORPD,
20521 IX86_BUILTIN_SQRTPD,
20522 IX86_BUILTIN_SQRTSD,
20524 IX86_BUILTIN_UNPCKHPD,
20525 IX86_BUILTIN_UNPCKLPD,
20527 IX86_BUILTIN_SHUFPD,
20529 IX86_BUILTIN_LOADUPD,
20530 IX86_BUILTIN_STOREUPD,
20531 IX86_BUILTIN_MOVSD,
20533 IX86_BUILTIN_LOADHPD,
20534 IX86_BUILTIN_LOADLPD,
20536 IX86_BUILTIN_CVTDQ2PD,
20537 IX86_BUILTIN_CVTDQ2PS,
20539 IX86_BUILTIN_CVTPD2DQ,
20540 IX86_BUILTIN_CVTPD2PI,
20541 IX86_BUILTIN_CVTPD2PS,
20542 IX86_BUILTIN_CVTTPD2DQ,
20543 IX86_BUILTIN_CVTTPD2PI,
20545 IX86_BUILTIN_CVTPI2PD,
20546 IX86_BUILTIN_CVTSI2SD,
20547 IX86_BUILTIN_CVTSI642SD,
20549 IX86_BUILTIN_CVTSD2SI,
20550 IX86_BUILTIN_CVTSD2SI64,
20551 IX86_BUILTIN_CVTSD2SS,
20552 IX86_BUILTIN_CVTSS2SD,
20553 IX86_BUILTIN_CVTTSD2SI,
20554 IX86_BUILTIN_CVTTSD2SI64,
20556 IX86_BUILTIN_CVTPS2DQ,
20557 IX86_BUILTIN_CVTPS2PD,
20558 IX86_BUILTIN_CVTTPS2DQ,
20560 IX86_BUILTIN_MOVNTI,
20561 IX86_BUILTIN_MOVNTPD,
20562 IX86_BUILTIN_MOVNTDQ,
20564 IX86_BUILTIN_MOVQ128,
20567 IX86_BUILTIN_MASKMOVDQU,
20568 IX86_BUILTIN_MOVMSKPD,
20569 IX86_BUILTIN_PMOVMSKB128,
20571 IX86_BUILTIN_PACKSSWB128,
20572 IX86_BUILTIN_PACKSSDW128,
20573 IX86_BUILTIN_PACKUSWB128,
20575 IX86_BUILTIN_PADDB128,
20576 IX86_BUILTIN_PADDW128,
20577 IX86_BUILTIN_PADDD128,
20578 IX86_BUILTIN_PADDQ128,
20579 IX86_BUILTIN_PADDSB128,
20580 IX86_BUILTIN_PADDSW128,
20581 IX86_BUILTIN_PADDUSB128,
20582 IX86_BUILTIN_PADDUSW128,
20583 IX86_BUILTIN_PSUBB128,
20584 IX86_BUILTIN_PSUBW128,
20585 IX86_BUILTIN_PSUBD128,
20586 IX86_BUILTIN_PSUBQ128,
20587 IX86_BUILTIN_PSUBSB128,
20588 IX86_BUILTIN_PSUBSW128,
20589 IX86_BUILTIN_PSUBUSB128,
20590 IX86_BUILTIN_PSUBUSW128,
20592 IX86_BUILTIN_PAND128,
20593 IX86_BUILTIN_PANDN128,
20594 IX86_BUILTIN_POR128,
20595 IX86_BUILTIN_PXOR128,
20597 IX86_BUILTIN_PAVGB128,
20598 IX86_BUILTIN_PAVGW128,
20600 IX86_BUILTIN_PCMPEQB128,
20601 IX86_BUILTIN_PCMPEQW128,
20602 IX86_BUILTIN_PCMPEQD128,
20603 IX86_BUILTIN_PCMPGTB128,
20604 IX86_BUILTIN_PCMPGTW128,
20605 IX86_BUILTIN_PCMPGTD128,
20607 IX86_BUILTIN_PMADDWD128,
20609 IX86_BUILTIN_PMAXSW128,
20610 IX86_BUILTIN_PMAXUB128,
20611 IX86_BUILTIN_PMINSW128,
20612 IX86_BUILTIN_PMINUB128,
20614 IX86_BUILTIN_PMULUDQ,
20615 IX86_BUILTIN_PMULUDQ128,
20616 IX86_BUILTIN_PMULHUW128,
20617 IX86_BUILTIN_PMULHW128,
20618 IX86_BUILTIN_PMULLW128,
20620 IX86_BUILTIN_PSADBW128,
20621 IX86_BUILTIN_PSHUFHW,
20622 IX86_BUILTIN_PSHUFLW,
20623 IX86_BUILTIN_PSHUFD,
20625 IX86_BUILTIN_PSLLDQI128,
20626 IX86_BUILTIN_PSLLWI128,
20627 IX86_BUILTIN_PSLLDI128,
20628 IX86_BUILTIN_PSLLQI128,
20629 IX86_BUILTIN_PSRAWI128,
20630 IX86_BUILTIN_PSRADI128,
20631 IX86_BUILTIN_PSRLDQI128,
20632 IX86_BUILTIN_PSRLWI128,
20633 IX86_BUILTIN_PSRLDI128,
20634 IX86_BUILTIN_PSRLQI128,
20636 IX86_BUILTIN_PSLLDQ128,
20637 IX86_BUILTIN_PSLLW128,
20638 IX86_BUILTIN_PSLLD128,
20639 IX86_BUILTIN_PSLLQ128,
20640 IX86_BUILTIN_PSRAW128,
20641 IX86_BUILTIN_PSRAD128,
20642 IX86_BUILTIN_PSRLW128,
20643 IX86_BUILTIN_PSRLD128,
20644 IX86_BUILTIN_PSRLQ128,
20646 IX86_BUILTIN_PUNPCKHBW128,
20647 IX86_BUILTIN_PUNPCKHWD128,
20648 IX86_BUILTIN_PUNPCKHDQ128,
20649 IX86_BUILTIN_PUNPCKHQDQ128,
20650 IX86_BUILTIN_PUNPCKLBW128,
20651 IX86_BUILTIN_PUNPCKLWD128,
20652 IX86_BUILTIN_PUNPCKLDQ128,
20653 IX86_BUILTIN_PUNPCKLQDQ128,
20655 IX86_BUILTIN_CLFLUSH,
20656 IX86_BUILTIN_MFENCE,
20657 IX86_BUILTIN_LFENCE,
20660 IX86_BUILTIN_ADDSUBPS,
20661 IX86_BUILTIN_HADDPS,
20662 IX86_BUILTIN_HSUBPS,
20663 IX86_BUILTIN_MOVSHDUP,
20664 IX86_BUILTIN_MOVSLDUP,
20665 IX86_BUILTIN_ADDSUBPD,
20666 IX86_BUILTIN_HADDPD,
20667 IX86_BUILTIN_HSUBPD,
20668 IX86_BUILTIN_LDDQU,
20670 IX86_BUILTIN_MONITOR,
20671 IX86_BUILTIN_MWAIT,
20674 IX86_BUILTIN_PHADDW,
20675 IX86_BUILTIN_PHADDD,
20676 IX86_BUILTIN_PHADDSW,
20677 IX86_BUILTIN_PHSUBW,
20678 IX86_BUILTIN_PHSUBD,
20679 IX86_BUILTIN_PHSUBSW,
20680 IX86_BUILTIN_PMADDUBSW,
20681 IX86_BUILTIN_PMULHRSW,
20682 IX86_BUILTIN_PSHUFB,
20683 IX86_BUILTIN_PSIGNB,
20684 IX86_BUILTIN_PSIGNW,
20685 IX86_BUILTIN_PSIGND,
20686 IX86_BUILTIN_PALIGNR,
20687 IX86_BUILTIN_PABSB,
20688 IX86_BUILTIN_PABSW,
20689 IX86_BUILTIN_PABSD,
20691 IX86_BUILTIN_PHADDW128,
20692 IX86_BUILTIN_PHADDD128,
20693 IX86_BUILTIN_PHADDSW128,
20694 IX86_BUILTIN_PHSUBW128,
20695 IX86_BUILTIN_PHSUBD128,
20696 IX86_BUILTIN_PHSUBSW128,
20697 IX86_BUILTIN_PMADDUBSW128,
20698 IX86_BUILTIN_PMULHRSW128,
20699 IX86_BUILTIN_PSHUFB128,
20700 IX86_BUILTIN_PSIGNB128,
20701 IX86_BUILTIN_PSIGNW128,
20702 IX86_BUILTIN_PSIGND128,
20703 IX86_BUILTIN_PALIGNR128,
20704 IX86_BUILTIN_PABSB128,
20705 IX86_BUILTIN_PABSW128,
20706 IX86_BUILTIN_PABSD128,
20708 /* AMDFAM10 - SSE4A New Instructions. */
20709 IX86_BUILTIN_MOVNTSD,
20710 IX86_BUILTIN_MOVNTSS,
20711 IX86_BUILTIN_EXTRQI,
20712 IX86_BUILTIN_EXTRQ,
20713 IX86_BUILTIN_INSERTQI,
20714 IX86_BUILTIN_INSERTQ,
20717 IX86_BUILTIN_BLENDPD,
20718 IX86_BUILTIN_BLENDPS,
20719 IX86_BUILTIN_BLENDVPD,
20720 IX86_BUILTIN_BLENDVPS,
20721 IX86_BUILTIN_PBLENDVB128,
20722 IX86_BUILTIN_PBLENDW128,
20727 IX86_BUILTIN_INSERTPS128,
20729 IX86_BUILTIN_MOVNTDQA,
20730 IX86_BUILTIN_MPSADBW128,
20731 IX86_BUILTIN_PACKUSDW128,
20732 IX86_BUILTIN_PCMPEQQ,
20733 IX86_BUILTIN_PHMINPOSUW128,
20735 IX86_BUILTIN_PMAXSB128,
20736 IX86_BUILTIN_PMAXSD128,
20737 IX86_BUILTIN_PMAXUD128,
20738 IX86_BUILTIN_PMAXUW128,
20740 IX86_BUILTIN_PMINSB128,
20741 IX86_BUILTIN_PMINSD128,
20742 IX86_BUILTIN_PMINUD128,
20743 IX86_BUILTIN_PMINUW128,
20745 IX86_BUILTIN_PMOVSXBW128,
20746 IX86_BUILTIN_PMOVSXBD128,
20747 IX86_BUILTIN_PMOVSXBQ128,
20748 IX86_BUILTIN_PMOVSXWD128,
20749 IX86_BUILTIN_PMOVSXWQ128,
20750 IX86_BUILTIN_PMOVSXDQ128,
20752 IX86_BUILTIN_PMOVZXBW128,
20753 IX86_BUILTIN_PMOVZXBD128,
20754 IX86_BUILTIN_PMOVZXBQ128,
20755 IX86_BUILTIN_PMOVZXWD128,
20756 IX86_BUILTIN_PMOVZXWQ128,
20757 IX86_BUILTIN_PMOVZXDQ128,
20759 IX86_BUILTIN_PMULDQ128,
20760 IX86_BUILTIN_PMULLD128,
20762 IX86_BUILTIN_ROUNDPD,
20763 IX86_BUILTIN_ROUNDPS,
20764 IX86_BUILTIN_ROUNDSD,
20765 IX86_BUILTIN_ROUNDSS,
20767 IX86_BUILTIN_PTESTZ,
20768 IX86_BUILTIN_PTESTC,
20769 IX86_BUILTIN_PTESTNZC,
20771 IX86_BUILTIN_VEC_INIT_V2SI,
20772 IX86_BUILTIN_VEC_INIT_V4HI,
20773 IX86_BUILTIN_VEC_INIT_V8QI,
20774 IX86_BUILTIN_VEC_EXT_V2DF,
20775 IX86_BUILTIN_VEC_EXT_V2DI,
20776 IX86_BUILTIN_VEC_EXT_V4SF,
20777 IX86_BUILTIN_VEC_EXT_V4SI,
20778 IX86_BUILTIN_VEC_EXT_V8HI,
20779 IX86_BUILTIN_VEC_EXT_V2SI,
20780 IX86_BUILTIN_VEC_EXT_V4HI,
20781 IX86_BUILTIN_VEC_EXT_V16QI,
20782 IX86_BUILTIN_VEC_SET_V2DI,
20783 IX86_BUILTIN_VEC_SET_V4SF,
20784 IX86_BUILTIN_VEC_SET_V4SI,
20785 IX86_BUILTIN_VEC_SET_V8HI,
20786 IX86_BUILTIN_VEC_SET_V4HI,
20787 IX86_BUILTIN_VEC_SET_V16QI,
20789 IX86_BUILTIN_VEC_PACK_SFIX,
20792 IX86_BUILTIN_CRC32QI,
20793 IX86_BUILTIN_CRC32HI,
20794 IX86_BUILTIN_CRC32SI,
20795 IX86_BUILTIN_CRC32DI,
20797 IX86_BUILTIN_PCMPESTRI128,
20798 IX86_BUILTIN_PCMPESTRM128,
20799 IX86_BUILTIN_PCMPESTRA128,
20800 IX86_BUILTIN_PCMPESTRC128,
20801 IX86_BUILTIN_PCMPESTRO128,
20802 IX86_BUILTIN_PCMPESTRS128,
20803 IX86_BUILTIN_PCMPESTRZ128,
20804 IX86_BUILTIN_PCMPISTRI128,
20805 IX86_BUILTIN_PCMPISTRM128,
20806 IX86_BUILTIN_PCMPISTRA128,
20807 IX86_BUILTIN_PCMPISTRC128,
20808 IX86_BUILTIN_PCMPISTRO128,
20809 IX86_BUILTIN_PCMPISTRS128,
20810 IX86_BUILTIN_PCMPISTRZ128,
20812 IX86_BUILTIN_PCMPGTQ,
20814 /* AES instructions */
20815 IX86_BUILTIN_AESENC128,
20816 IX86_BUILTIN_AESENCLAST128,
20817 IX86_BUILTIN_AESDEC128,
20818 IX86_BUILTIN_AESDECLAST128,
20819 IX86_BUILTIN_AESIMC128,
20820 IX86_BUILTIN_AESKEYGENASSIST128,
20822 /* PCLMUL instruction */
20823 IX86_BUILTIN_PCLMULQDQ128,
20826 IX86_BUILTIN_ADDPD256,
20827 IX86_BUILTIN_ADDPS256,
20828 IX86_BUILTIN_ADDSUBPD256,
20829 IX86_BUILTIN_ADDSUBPS256,
20830 IX86_BUILTIN_ANDPD256,
20831 IX86_BUILTIN_ANDPS256,
20832 IX86_BUILTIN_ANDNPD256,
20833 IX86_BUILTIN_ANDNPS256,
20834 IX86_BUILTIN_BLENDPD256,
20835 IX86_BUILTIN_BLENDPS256,
20836 IX86_BUILTIN_BLENDVPD256,
20837 IX86_BUILTIN_BLENDVPS256,
20838 IX86_BUILTIN_DIVPD256,
20839 IX86_BUILTIN_DIVPS256,
20840 IX86_BUILTIN_DPPS256,
20841 IX86_BUILTIN_HADDPD256,
20842 IX86_BUILTIN_HADDPS256,
20843 IX86_BUILTIN_HSUBPD256,
20844 IX86_BUILTIN_HSUBPS256,
20845 IX86_BUILTIN_MAXPD256,
20846 IX86_BUILTIN_MAXPS256,
20847 IX86_BUILTIN_MINPD256,
20848 IX86_BUILTIN_MINPS256,
20849 IX86_BUILTIN_MULPD256,
20850 IX86_BUILTIN_MULPS256,
20851 IX86_BUILTIN_ORPD256,
20852 IX86_BUILTIN_ORPS256,
20853 IX86_BUILTIN_SHUFPD256,
20854 IX86_BUILTIN_SHUFPS256,
20855 IX86_BUILTIN_SUBPD256,
20856 IX86_BUILTIN_SUBPS256,
20857 IX86_BUILTIN_XORPD256,
20858 IX86_BUILTIN_XORPS256,
20859 IX86_BUILTIN_CMPSD,
20860 IX86_BUILTIN_CMPSS,
20861 IX86_BUILTIN_CMPPD,
20862 IX86_BUILTIN_CMPPS,
20863 IX86_BUILTIN_CMPPD256,
20864 IX86_BUILTIN_CMPPS256,
20865 IX86_BUILTIN_CVTDQ2PD256,
20866 IX86_BUILTIN_CVTDQ2PS256,
20867 IX86_BUILTIN_CVTPD2PS256,
20868 IX86_BUILTIN_CVTPS2DQ256,
20869 IX86_BUILTIN_CVTPS2PD256,
20870 IX86_BUILTIN_CVTTPD2DQ256,
20871 IX86_BUILTIN_CVTPD2DQ256,
20872 IX86_BUILTIN_CVTTPS2DQ256,
20873 IX86_BUILTIN_EXTRACTF128PD256,
20874 IX86_BUILTIN_EXTRACTF128PS256,
20875 IX86_BUILTIN_EXTRACTF128SI256,
20876 IX86_BUILTIN_VZEROALL,
20877 IX86_BUILTIN_VZEROUPPER,
20878 IX86_BUILTIN_VZEROUPPER_REX64,
20879 IX86_BUILTIN_VPERMILVARPD,
20880 IX86_BUILTIN_VPERMILVARPS,
20881 IX86_BUILTIN_VPERMILVARPD256,
20882 IX86_BUILTIN_VPERMILVARPS256,
20883 IX86_BUILTIN_VPERMILPD,
20884 IX86_BUILTIN_VPERMILPS,
20885 IX86_BUILTIN_VPERMILPD256,
20886 IX86_BUILTIN_VPERMILPS256,
20887 IX86_BUILTIN_VPERM2F128PD256,
20888 IX86_BUILTIN_VPERM2F128PS256,
20889 IX86_BUILTIN_VPERM2F128SI256,
20890 IX86_BUILTIN_VBROADCASTSS,
20891 IX86_BUILTIN_VBROADCASTSD256,
20892 IX86_BUILTIN_VBROADCASTSS256,
20893 IX86_BUILTIN_VBROADCASTPD256,
20894 IX86_BUILTIN_VBROADCASTPS256,
20895 IX86_BUILTIN_VINSERTF128PD256,
20896 IX86_BUILTIN_VINSERTF128PS256,
20897 IX86_BUILTIN_VINSERTF128SI256,
20898 IX86_BUILTIN_LOADUPD256,
20899 IX86_BUILTIN_LOADUPS256,
20900 IX86_BUILTIN_STOREUPD256,
20901 IX86_BUILTIN_STOREUPS256,
20902 IX86_BUILTIN_LDDQU256,
20903 IX86_BUILTIN_MOVNTDQ256,
20904 IX86_BUILTIN_MOVNTPD256,
20905 IX86_BUILTIN_MOVNTPS256,
20906 IX86_BUILTIN_LOADDQU256,
20907 IX86_BUILTIN_STOREDQU256,
20908 IX86_BUILTIN_MASKLOADPD,
20909 IX86_BUILTIN_MASKLOADPS,
20910 IX86_BUILTIN_MASKSTOREPD,
20911 IX86_BUILTIN_MASKSTOREPS,
20912 IX86_BUILTIN_MASKLOADPD256,
20913 IX86_BUILTIN_MASKLOADPS256,
20914 IX86_BUILTIN_MASKSTOREPD256,
20915 IX86_BUILTIN_MASKSTOREPS256,
20916 IX86_BUILTIN_MOVSHDUP256,
20917 IX86_BUILTIN_MOVSLDUP256,
20918 IX86_BUILTIN_MOVDDUP256,
20920 IX86_BUILTIN_SQRTPD256,
20921 IX86_BUILTIN_SQRTPS256,
20922 IX86_BUILTIN_SQRTPS_NR256,
20923 IX86_BUILTIN_RSQRTPS256,
20924 IX86_BUILTIN_RSQRTPS_NR256,
20926 IX86_BUILTIN_RCPPS256,
20928 IX86_BUILTIN_ROUNDPD256,
20929 IX86_BUILTIN_ROUNDPS256,
20931 IX86_BUILTIN_UNPCKHPD256,
20932 IX86_BUILTIN_UNPCKLPD256,
20933 IX86_BUILTIN_UNPCKHPS256,
20934 IX86_BUILTIN_UNPCKLPS256,
20936 IX86_BUILTIN_SI256_SI,
20937 IX86_BUILTIN_PS256_PS,
20938 IX86_BUILTIN_PD256_PD,
20939 IX86_BUILTIN_SI_SI256,
20940 IX86_BUILTIN_PS_PS256,
20941 IX86_BUILTIN_PD_PD256,
20943 IX86_BUILTIN_VTESTZPD,
20944 IX86_BUILTIN_VTESTCPD,
20945 IX86_BUILTIN_VTESTNZCPD,
20946 IX86_BUILTIN_VTESTZPS,
20947 IX86_BUILTIN_VTESTCPS,
20948 IX86_BUILTIN_VTESTNZCPS,
20949 IX86_BUILTIN_VTESTZPD256,
20950 IX86_BUILTIN_VTESTCPD256,
20951 IX86_BUILTIN_VTESTNZCPD256,
20952 IX86_BUILTIN_VTESTZPS256,
20953 IX86_BUILTIN_VTESTCPS256,
20954 IX86_BUILTIN_VTESTNZCPS256,
20955 IX86_BUILTIN_PTESTZ256,
20956 IX86_BUILTIN_PTESTC256,
20957 IX86_BUILTIN_PTESTNZC256,
20959 IX86_BUILTIN_MOVMSKPD256,
20960 IX86_BUILTIN_MOVMSKPS256,
20962 /* TFmode support builtins. */
20964 IX86_BUILTIN_HUGE_VALQ,
20965 IX86_BUILTIN_FABSQ,
20966 IX86_BUILTIN_COPYSIGNQ,
20968 /* SSE5 instructions */
20969 IX86_BUILTIN_FMADDSS,
20970 IX86_BUILTIN_FMADDSD,
20971 IX86_BUILTIN_FMADDPS,
20972 IX86_BUILTIN_FMADDPD,
20973 IX86_BUILTIN_FMSUBSS,
20974 IX86_BUILTIN_FMSUBSD,
20975 IX86_BUILTIN_FMSUBPS,
20976 IX86_BUILTIN_FMSUBPD,
20977 IX86_BUILTIN_FNMADDSS,
20978 IX86_BUILTIN_FNMADDSD,
20979 IX86_BUILTIN_FNMADDPS,
20980 IX86_BUILTIN_FNMADDPD,
20981 IX86_BUILTIN_FNMSUBSS,
20982 IX86_BUILTIN_FNMSUBSD,
20983 IX86_BUILTIN_FNMSUBPS,
20984 IX86_BUILTIN_FNMSUBPD,
20985 IX86_BUILTIN_PCMOV,
20986 IX86_BUILTIN_PCMOV_V2DI,
20987 IX86_BUILTIN_PCMOV_V4SI,
20988 IX86_BUILTIN_PCMOV_V8HI,
20989 IX86_BUILTIN_PCMOV_V16QI,
20990 IX86_BUILTIN_PCMOV_V4SF,
20991 IX86_BUILTIN_PCMOV_V2DF,
20992 IX86_BUILTIN_PPERM,
20993 IX86_BUILTIN_PERMPS,
20994 IX86_BUILTIN_PERMPD,
20995 IX86_BUILTIN_PMACSSWW,
20996 IX86_BUILTIN_PMACSWW,
20997 IX86_BUILTIN_PMACSSWD,
20998 IX86_BUILTIN_PMACSWD,
20999 IX86_BUILTIN_PMACSSDD,
21000 IX86_BUILTIN_PMACSDD,
21001 IX86_BUILTIN_PMACSSDQL,
21002 IX86_BUILTIN_PMACSSDQH,
21003 IX86_BUILTIN_PMACSDQL,
21004 IX86_BUILTIN_PMACSDQH,
21005 IX86_BUILTIN_PMADCSSWD,
21006 IX86_BUILTIN_PMADCSWD,
21007 IX86_BUILTIN_PHADDBW,
21008 IX86_BUILTIN_PHADDBD,
21009 IX86_BUILTIN_PHADDBQ,
21010 IX86_BUILTIN_PHADDWD,
21011 IX86_BUILTIN_PHADDWQ,
21012 IX86_BUILTIN_PHADDDQ,
21013 IX86_BUILTIN_PHADDUBW,
21014 IX86_BUILTIN_PHADDUBD,
21015 IX86_BUILTIN_PHADDUBQ,
21016 IX86_BUILTIN_PHADDUWD,
21017 IX86_BUILTIN_PHADDUWQ,
21018 IX86_BUILTIN_PHADDUDQ,
21019 IX86_BUILTIN_PHSUBBW,
21020 IX86_BUILTIN_PHSUBWD,
21021 IX86_BUILTIN_PHSUBDQ,
21022 IX86_BUILTIN_PROTB,
21023 IX86_BUILTIN_PROTW,
21024 IX86_BUILTIN_PROTD,
21025 IX86_BUILTIN_PROTQ,
21026 IX86_BUILTIN_PROTB_IMM,
21027 IX86_BUILTIN_PROTW_IMM,
21028 IX86_BUILTIN_PROTD_IMM,
21029 IX86_BUILTIN_PROTQ_IMM,
21030 IX86_BUILTIN_PSHLB,
21031 IX86_BUILTIN_PSHLW,
21032 IX86_BUILTIN_PSHLD,
21033 IX86_BUILTIN_PSHLQ,
21034 IX86_BUILTIN_PSHAB,
21035 IX86_BUILTIN_PSHAW,
21036 IX86_BUILTIN_PSHAD,
21037 IX86_BUILTIN_PSHAQ,
21038 IX86_BUILTIN_FRCZSS,
21039 IX86_BUILTIN_FRCZSD,
21040 IX86_BUILTIN_FRCZPS,
21041 IX86_BUILTIN_FRCZPD,
21042 IX86_BUILTIN_CVTPH2PS,
21043 IX86_BUILTIN_CVTPS2PH,
21045 IX86_BUILTIN_COMEQSS,
21046 IX86_BUILTIN_COMNESS,
21047 IX86_BUILTIN_COMLTSS,
21048 IX86_BUILTIN_COMLESS,
21049 IX86_BUILTIN_COMGTSS,
21050 IX86_BUILTIN_COMGESS,
21051 IX86_BUILTIN_COMUEQSS,
21052 IX86_BUILTIN_COMUNESS,
21053 IX86_BUILTIN_COMULTSS,
21054 IX86_BUILTIN_COMULESS,
21055 IX86_BUILTIN_COMUGTSS,
21056 IX86_BUILTIN_COMUGESS,
21057 IX86_BUILTIN_COMORDSS,
21058 IX86_BUILTIN_COMUNORDSS,
21059 IX86_BUILTIN_COMFALSESS,
21060 IX86_BUILTIN_COMTRUESS,
21062 IX86_BUILTIN_COMEQSD,
21063 IX86_BUILTIN_COMNESD,
21064 IX86_BUILTIN_COMLTSD,
21065 IX86_BUILTIN_COMLESD,
21066 IX86_BUILTIN_COMGTSD,
21067 IX86_BUILTIN_COMGESD,
21068 IX86_BUILTIN_COMUEQSD,
21069 IX86_BUILTIN_COMUNESD,
21070 IX86_BUILTIN_COMULTSD,
21071 IX86_BUILTIN_COMULESD,
21072 IX86_BUILTIN_COMUGTSD,
21073 IX86_BUILTIN_COMUGESD,
21074 IX86_BUILTIN_COMORDSD,
21075 IX86_BUILTIN_COMUNORDSD,
21076 IX86_BUILTIN_COMFALSESD,
21077 IX86_BUILTIN_COMTRUESD,
21079 IX86_BUILTIN_COMEQPS,
21080 IX86_BUILTIN_COMNEPS,
21081 IX86_BUILTIN_COMLTPS,
21082 IX86_BUILTIN_COMLEPS,
21083 IX86_BUILTIN_COMGTPS,
21084 IX86_BUILTIN_COMGEPS,
21085 IX86_BUILTIN_COMUEQPS,
21086 IX86_BUILTIN_COMUNEPS,
21087 IX86_BUILTIN_COMULTPS,
21088 IX86_BUILTIN_COMULEPS,
21089 IX86_BUILTIN_COMUGTPS,
21090 IX86_BUILTIN_COMUGEPS,
21091 IX86_BUILTIN_COMORDPS,
21092 IX86_BUILTIN_COMUNORDPS,
21093 IX86_BUILTIN_COMFALSEPS,
21094 IX86_BUILTIN_COMTRUEPS,
21096 IX86_BUILTIN_COMEQPD,
21097 IX86_BUILTIN_COMNEPD,
21098 IX86_BUILTIN_COMLTPD,
21099 IX86_BUILTIN_COMLEPD,
21100 IX86_BUILTIN_COMGTPD,
21101 IX86_BUILTIN_COMGEPD,
21102 IX86_BUILTIN_COMUEQPD,
21103 IX86_BUILTIN_COMUNEPD,
21104 IX86_BUILTIN_COMULTPD,
21105 IX86_BUILTIN_COMULEPD,
21106 IX86_BUILTIN_COMUGTPD,
21107 IX86_BUILTIN_COMUGEPD,
21108 IX86_BUILTIN_COMORDPD,
21109 IX86_BUILTIN_COMUNORDPD,
21110 IX86_BUILTIN_COMFALSEPD,
21111 IX86_BUILTIN_COMTRUEPD,
21113 IX86_BUILTIN_PCOMEQUB,
21114 IX86_BUILTIN_PCOMNEUB,
21115 IX86_BUILTIN_PCOMLTUB,
21116 IX86_BUILTIN_PCOMLEUB,
21117 IX86_BUILTIN_PCOMGTUB,
21118 IX86_BUILTIN_PCOMGEUB,
21119 IX86_BUILTIN_PCOMFALSEUB,
21120 IX86_BUILTIN_PCOMTRUEUB,
21121 IX86_BUILTIN_PCOMEQUW,
21122 IX86_BUILTIN_PCOMNEUW,
21123 IX86_BUILTIN_PCOMLTUW,
21124 IX86_BUILTIN_PCOMLEUW,
21125 IX86_BUILTIN_PCOMGTUW,
21126 IX86_BUILTIN_PCOMGEUW,
21127 IX86_BUILTIN_PCOMFALSEUW,
21128 IX86_BUILTIN_PCOMTRUEUW,
21129 IX86_BUILTIN_PCOMEQUD,
21130 IX86_BUILTIN_PCOMNEUD,
21131 IX86_BUILTIN_PCOMLTUD,
21132 IX86_BUILTIN_PCOMLEUD,
21133 IX86_BUILTIN_PCOMGTUD,
21134 IX86_BUILTIN_PCOMGEUD,
21135 IX86_BUILTIN_PCOMFALSEUD,
21136 IX86_BUILTIN_PCOMTRUEUD,
21137 IX86_BUILTIN_PCOMEQUQ,
21138 IX86_BUILTIN_PCOMNEUQ,
21139 IX86_BUILTIN_PCOMLTUQ,
21140 IX86_BUILTIN_PCOMLEUQ,
21141 IX86_BUILTIN_PCOMGTUQ,
21142 IX86_BUILTIN_PCOMGEUQ,
21143 IX86_BUILTIN_PCOMFALSEUQ,
21144 IX86_BUILTIN_PCOMTRUEUQ,
21146 IX86_BUILTIN_PCOMEQB,
21147 IX86_BUILTIN_PCOMNEB,
21148 IX86_BUILTIN_PCOMLTB,
21149 IX86_BUILTIN_PCOMLEB,
21150 IX86_BUILTIN_PCOMGTB,
21151 IX86_BUILTIN_PCOMGEB,
21152 IX86_BUILTIN_PCOMFALSEB,
21153 IX86_BUILTIN_PCOMTRUEB,
21154 IX86_BUILTIN_PCOMEQW,
21155 IX86_BUILTIN_PCOMNEW,
21156 IX86_BUILTIN_PCOMLTW,
21157 IX86_BUILTIN_PCOMLEW,
21158 IX86_BUILTIN_PCOMGTW,
21159 IX86_BUILTIN_PCOMGEW,
21160 IX86_BUILTIN_PCOMFALSEW,
21161 IX86_BUILTIN_PCOMTRUEW,
21162 IX86_BUILTIN_PCOMEQD,
21163 IX86_BUILTIN_PCOMNED,
21164 IX86_BUILTIN_PCOMLTD,
21165 IX86_BUILTIN_PCOMLED,
21166 IX86_BUILTIN_PCOMGTD,
21167 IX86_BUILTIN_PCOMGED,
21168 IX86_BUILTIN_PCOMFALSED,
21169 IX86_BUILTIN_PCOMTRUED,
21170 IX86_BUILTIN_PCOMEQQ,
21171 IX86_BUILTIN_PCOMNEQ,
21172 IX86_BUILTIN_PCOMLTQ,
21173 IX86_BUILTIN_PCOMLEQ,
21174 IX86_BUILTIN_PCOMGTQ,
21175 IX86_BUILTIN_PCOMGEQ,
21176 IX86_BUILTIN_PCOMFALSEQ,
21177 IX86_BUILTIN_PCOMTRUEQ,
21182 /* Table for the ix86 builtin decls. */
21183 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21185 /* Table of all of the builtin functions that are possible with different ISA's
21186 but are waiting to be built until a function is declared to use that
21188 struct GTY(()) builtin_isa {
21189 tree type; /* builtin type to use in the declaration */
21190 const char *name; /* function name */
21191 int isa; /* isa_flags this builtin is defined for */
21192 bool const_p; /* true if the declaration is constant */
21195 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21198 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21199 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21200 * function decl in the ix86_builtins array. Returns the function decl or
21201 * NULL_TREE, if the builtin was not added.
21203 * If the front end has a special hook for builtin functions, delay adding
21204 * builtin functions that aren't in the current ISA until the ISA is changed
21205 * with function specific optimization. Doing so, can save about 300K for the
21206 * default compiler. When the builtin is expanded, check at that time whether
21209 * If the front end doesn't have a special hook, record all builtins, even if
21210 * it isn't an instruction set in the current ISA in case the user uses
21211 * function specific options for a different ISA, so that we don't get scope
21212 * errors if a builtin is added in the middle of a function scope. */
21215 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21217 tree decl = NULL_TREE;
21219 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21221 ix86_builtins_isa[(int) code].isa = mask;
21223 if ((mask & ix86_isa_flags) != 0
21224 || (lang_hooks.builtin_function
21225 == lang_hooks.builtin_function_ext_scope))
21228 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21230 ix86_builtins[(int) code] = decl;
21231 ix86_builtins_isa[(int) code].type = NULL_TREE;
21235 ix86_builtins[(int) code] = NULL_TREE;
21236 ix86_builtins_isa[(int) code].const_p = false;
21237 ix86_builtins_isa[(int) code].type = type;
21238 ix86_builtins_isa[(int) code].name = name;
21245 /* Like def_builtin, but also marks the function decl "const". */
21248 def_builtin_const (int mask, const char *name, tree type,
21249 enum ix86_builtins code)
21251 tree decl = def_builtin (mask, name, type, code);
21253 TREE_READONLY (decl) = 1;
21255 ix86_builtins_isa[(int) code].const_p = true;
21260 /* Add any new builtin functions for a given ISA that may not have been
21261 declared. This saves a bit of space compared to adding all of the
21262 declarations to the tree, even if we didn't use them. */
21265 ix86_add_new_builtins (int isa)
21270 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21272 if ((ix86_builtins_isa[i].isa & isa) != 0
21273 && ix86_builtins_isa[i].type != NULL_TREE)
21275 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21276 ix86_builtins_isa[i].type,
21277 i, BUILT_IN_MD, NULL,
21280 ix86_builtins[i] = decl;
21281 ix86_builtins_isa[i].type = NULL_TREE;
21282 if (ix86_builtins_isa[i].const_p)
21283 TREE_READONLY (decl) = 1;
21288 /* Bits for builtin_description.flag. */
21290 /* Set when we don't support the comparison natively, and should
21291 swap_comparison in order to support it. */
21292 #define BUILTIN_DESC_SWAP_OPERANDS 1
21294 struct builtin_description
21296 const unsigned int mask;
21297 const enum insn_code icode;
21298 const char *const name;
21299 const enum ix86_builtins code;
21300 const enum rtx_code comparison;
21304 static const struct builtin_description bdesc_comi[] =
21306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21332 static const struct builtin_description bdesc_pcmpestr[] =
21335 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21336 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21337 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21338 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21339 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21340 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21341 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21344 static const struct builtin_description bdesc_pcmpistr[] =
21347 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21348 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21349 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21350 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21351 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21352 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21353 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21356 /* Special builtin types */
21357 enum ix86_special_builtin_type
21359 SPECIAL_FTYPE_UNKNOWN,
21361 V32QI_FTYPE_PCCHAR,
21362 V16QI_FTYPE_PCCHAR,
21364 V8SF_FTYPE_PCFLOAT,
21366 V4DF_FTYPE_PCDOUBLE,
21367 V4SF_FTYPE_PCFLOAT,
21368 V2DF_FTYPE_PCDOUBLE,
21369 V8SF_FTYPE_PCV8SF_V8SF,
21370 V4DF_FTYPE_PCV4DF_V4DF,
21371 V4SF_FTYPE_V4SF_PCV2SF,
21372 V4SF_FTYPE_PCV4SF_V4SF,
21373 V2DF_FTYPE_V2DF_PCDOUBLE,
21374 V2DF_FTYPE_PCV2DF_V2DF,
21376 VOID_FTYPE_PV2SF_V4SF,
21377 VOID_FTYPE_PV4DI_V4DI,
21378 VOID_FTYPE_PV2DI_V2DI,
21379 VOID_FTYPE_PCHAR_V32QI,
21380 VOID_FTYPE_PCHAR_V16QI,
21381 VOID_FTYPE_PFLOAT_V8SF,
21382 VOID_FTYPE_PFLOAT_V4SF,
21383 VOID_FTYPE_PDOUBLE_V4DF,
21384 VOID_FTYPE_PDOUBLE_V2DF,
21386 VOID_FTYPE_PINT_INT,
21387 VOID_FTYPE_PV8SF_V8SF_V8SF,
21388 VOID_FTYPE_PV4DF_V4DF_V4DF,
21389 VOID_FTYPE_PV4SF_V4SF_V4SF,
21390 VOID_FTYPE_PV2DF_V2DF_V2DF
21393 /* Builtin types */
21394 enum ix86_builtin_type
21397 FLOAT128_FTYPE_FLOAT128,
21399 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21400 INT_FTYPE_V8SF_V8SF_PTEST,
21401 INT_FTYPE_V4DI_V4DI_PTEST,
21402 INT_FTYPE_V4DF_V4DF_PTEST,
21403 INT_FTYPE_V4SF_V4SF_PTEST,
21404 INT_FTYPE_V2DI_V2DI_PTEST,
21405 INT_FTYPE_V2DF_V2DF_PTEST,
21437 V4SF_FTYPE_V4SF_VEC_MERGE,
21446 V2DF_FTYPE_V2DF_VEC_MERGE,
21457 V16QI_FTYPE_V16QI_V16QI,
21458 V16QI_FTYPE_V8HI_V8HI,
21459 V8QI_FTYPE_V8QI_V8QI,
21460 V8QI_FTYPE_V4HI_V4HI,
21461 V8HI_FTYPE_V8HI_V8HI,
21462 V8HI_FTYPE_V8HI_V8HI_COUNT,
21463 V8HI_FTYPE_V16QI_V16QI,
21464 V8HI_FTYPE_V4SI_V4SI,
21465 V8HI_FTYPE_V8HI_SI_COUNT,
21466 V8SF_FTYPE_V8SF_V8SF,
21467 V8SF_FTYPE_V8SF_V8SI,
21468 V4SI_FTYPE_V4SI_V4SI,
21469 V4SI_FTYPE_V4SI_V4SI_COUNT,
21470 V4SI_FTYPE_V8HI_V8HI,
21471 V4SI_FTYPE_V4SF_V4SF,
21472 V4SI_FTYPE_V2DF_V2DF,
21473 V4SI_FTYPE_V4SI_SI_COUNT,
21474 V4HI_FTYPE_V4HI_V4HI,
21475 V4HI_FTYPE_V4HI_V4HI_COUNT,
21476 V4HI_FTYPE_V8QI_V8QI,
21477 V4HI_FTYPE_V2SI_V2SI,
21478 V4HI_FTYPE_V4HI_SI_COUNT,
21479 V4DF_FTYPE_V4DF_V4DF,
21480 V4DF_FTYPE_V4DF_V4DI,
21481 V4SF_FTYPE_V4SF_V4SF,
21482 V4SF_FTYPE_V4SF_V4SF_SWAP,
21483 V4SF_FTYPE_V4SF_V4SI,
21484 V4SF_FTYPE_V4SF_V2SI,
21485 V4SF_FTYPE_V4SF_V2DF,
21486 V4SF_FTYPE_V4SF_DI,
21487 V4SF_FTYPE_V4SF_SI,
21488 V2DI_FTYPE_V2DI_V2DI,
21489 V2DI_FTYPE_V2DI_V2DI_COUNT,
21490 V2DI_FTYPE_V16QI_V16QI,
21491 V2DI_FTYPE_V4SI_V4SI,
21492 V2DI_FTYPE_V2DI_V16QI,
21493 V2DI_FTYPE_V2DF_V2DF,
21494 V2DI_FTYPE_V2DI_SI_COUNT,
21495 V2SI_FTYPE_V2SI_V2SI,
21496 V2SI_FTYPE_V2SI_V2SI_COUNT,
21497 V2SI_FTYPE_V4HI_V4HI,
21498 V2SI_FTYPE_V2SF_V2SF,
21499 V2SI_FTYPE_V2SI_SI_COUNT,
21500 V2DF_FTYPE_V2DF_V2DF,
21501 V2DF_FTYPE_V2DF_V2DF_SWAP,
21502 V2DF_FTYPE_V2DF_V4SF,
21503 V2DF_FTYPE_V2DF_V2DI,
21504 V2DF_FTYPE_V2DF_DI,
21505 V2DF_FTYPE_V2DF_SI,
21506 V2SF_FTYPE_V2SF_V2SF,
21507 V1DI_FTYPE_V1DI_V1DI,
21508 V1DI_FTYPE_V1DI_V1DI_COUNT,
21509 V1DI_FTYPE_V8QI_V8QI,
21510 V1DI_FTYPE_V2SI_V2SI,
21511 V1DI_FTYPE_V1DI_SI_COUNT,
21512 UINT64_FTYPE_UINT64_UINT64,
21513 UINT_FTYPE_UINT_UINT,
21514 UINT_FTYPE_UINT_USHORT,
21515 UINT_FTYPE_UINT_UCHAR,
21516 V8HI_FTYPE_V8HI_INT,
21517 V4SI_FTYPE_V4SI_INT,
21518 V4HI_FTYPE_V4HI_INT,
21519 V8SF_FTYPE_V8SF_INT,
21520 V4SI_FTYPE_V8SI_INT,
21521 V4SF_FTYPE_V8SF_INT,
21522 V2DF_FTYPE_V4DF_INT,
21523 V4DF_FTYPE_V4DF_INT,
21524 V4SF_FTYPE_V4SF_INT,
21525 V2DI_FTYPE_V2DI_INT,
21526 V2DI2TI_FTYPE_V2DI_INT,
21527 V2DF_FTYPE_V2DF_INT,
21528 V16QI_FTYPE_V16QI_V16QI_V16QI,
21529 V8SF_FTYPE_V8SF_V8SF_V8SF,
21530 V4DF_FTYPE_V4DF_V4DF_V4DF,
21531 V4SF_FTYPE_V4SF_V4SF_V4SF,
21532 V2DF_FTYPE_V2DF_V2DF_V2DF,
21533 V16QI_FTYPE_V16QI_V16QI_INT,
21534 V8SI_FTYPE_V8SI_V8SI_INT,
21535 V8SI_FTYPE_V8SI_V4SI_INT,
21536 V8HI_FTYPE_V8HI_V8HI_INT,
21537 V8SF_FTYPE_V8SF_V8SF_INT,
21538 V8SF_FTYPE_V8SF_V4SF_INT,
21539 V4SI_FTYPE_V4SI_V4SI_INT,
21540 V4DF_FTYPE_V4DF_V4DF_INT,
21541 V4DF_FTYPE_V4DF_V2DF_INT,
21542 V4SF_FTYPE_V4SF_V4SF_INT,
21543 V2DI_FTYPE_V2DI_V2DI_INT,
21544 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21545 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21546 V2DF_FTYPE_V2DF_V2DF_INT,
21547 V2DI_FTYPE_V2DI_UINT_UINT,
21548 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21551 /* Special builtins with variable number of arguments. */
21552 static const struct builtin_description bdesc_special_args[] =
21555 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21558 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21570 /* SSE or 3DNow!A */
21571 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21572 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21589 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21595 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21596 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21601 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21631 /* Builtins with variable number of arguments. */
21632 static const struct builtin_description bdesc_args[] =
21635 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21636 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21637 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21638 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21639 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21640 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21642 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21643 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21644 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21645 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21646 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21647 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21648 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21649 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21651 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21652 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21654 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21655 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21656 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21657 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21659 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21660 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21661 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21662 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21663 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21664 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21667 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21668 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21670 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21671 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21673 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21674 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21681 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21684 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21688 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21695 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21696 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21699 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21700 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21701 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21702 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21704 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21705 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21706 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21707 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21708 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21709 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21710 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21711 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21712 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21713 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21714 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21715 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21716 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21717 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21718 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21721 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21722 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21723 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21724 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21725 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21726 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21731 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21733 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21737 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21740 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21744 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21745 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21746 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21754 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21755 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21776 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21777 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21781 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21783 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21784 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21790 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21794 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21796 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21802 /* SSE MMX or 3Dnow!A */
21803 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21804 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21805 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21807 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21808 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21809 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21810 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21812 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21813 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21815 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21836 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21837 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21843 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21844 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21845 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21846 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21848 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21852 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21874 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21878 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21880 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21881 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21887 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21889 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21890 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21891 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21892 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21893 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21894 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21895 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21896 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21907 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21908 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21910 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21912 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21913 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21925 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21926 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21927 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21943 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21952 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21957 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21958 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21959 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21960 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21961 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21962 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21965 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21966 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21967 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21968 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21969 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21970 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21972 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21973 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21974 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21975 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21983 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21984 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21989 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21990 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21993 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21994 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21996 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21997 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21998 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21999 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22000 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22001 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22004 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22005 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22006 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22007 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22008 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22009 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22011 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22012 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22013 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22014 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22015 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22016 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22017 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22018 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22019 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22020 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22021 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22022 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22023 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22024 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22025 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22026 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22027 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22028 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22029 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22030 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22031 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22032 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22033 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22034 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22037 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
22038 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
22041 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22042 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22043 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22044 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22045 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22046 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22047 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22048 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22049 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22050 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22052 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22053 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22054 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22055 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22056 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22057 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22058 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22059 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22060 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22061 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22062 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22063 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22064 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22066 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22067 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22068 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22069 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22070 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22071 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22072 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22073 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22074 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22075 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22076 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22079 /* SSE4.1 and SSE5 */
22080 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22081 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22082 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22083 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22085 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22086 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22087 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22090 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22091 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22092 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22093 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22094 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22097 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22098 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22099 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22100 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22103 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22104 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22106 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22107 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22108 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22109 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22112 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22115 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22116 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22119 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22120 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22123 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22129 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22130 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22131 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22132 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22133 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22134 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22135 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22136 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22137 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22138 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22139 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22140 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22186 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22188 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22189 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22190 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22230 enum multi_arg_type {
22240 MULTI_ARG_3_PERMPS,
22241 MULTI_ARG_3_PERMPD,
22248 MULTI_ARG_2_DI_IMM,
22249 MULTI_ARG_2_SI_IMM,
22250 MULTI_ARG_2_HI_IMM,
22251 MULTI_ARG_2_QI_IMM,
22252 MULTI_ARG_2_SF_CMP,
22253 MULTI_ARG_2_DF_CMP,
22254 MULTI_ARG_2_DI_CMP,
22255 MULTI_ARG_2_SI_CMP,
22256 MULTI_ARG_2_HI_CMP,
22257 MULTI_ARG_2_QI_CMP,
22280 static const struct builtin_description bdesc_multi_arg[] =
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22518 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22519 in the current target ISA to allow the user to compile particular modules
22520 with different target specific options that differ from the command line
22523 ix86_init_mmx_sse_builtins (void)
22525 const struct builtin_description * d;
22528 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22529 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22530 tree V1DI_type_node
22531 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22532 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22533 tree V2DI_type_node
22534 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22535 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22536 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22537 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22538 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22539 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22540 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22542 tree pchar_type_node = build_pointer_type (char_type_node);
22543 tree pcchar_type_node
22544 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22545 tree pfloat_type_node = build_pointer_type (float_type_node);
22546 tree pcfloat_type_node
22547 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22548 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22549 tree pcv2sf_type_node
22550 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22551 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22552 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22555 tree int_ftype_v4sf_v4sf
22556 = build_function_type_list (integer_type_node,
22557 V4SF_type_node, V4SF_type_node, NULL_TREE);
22558 tree v4si_ftype_v4sf_v4sf
22559 = build_function_type_list (V4SI_type_node,
22560 V4SF_type_node, V4SF_type_node, NULL_TREE);
22561 /* MMX/SSE/integer conversions. */
22562 tree int_ftype_v4sf
22563 = build_function_type_list (integer_type_node,
22564 V4SF_type_node, NULL_TREE);
22565 tree int64_ftype_v4sf
22566 = build_function_type_list (long_long_integer_type_node,
22567 V4SF_type_node, NULL_TREE);
22568 tree int_ftype_v8qi
22569 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22570 tree v4sf_ftype_v4sf_int
22571 = build_function_type_list (V4SF_type_node,
22572 V4SF_type_node, integer_type_node, NULL_TREE);
22573 tree v4sf_ftype_v4sf_int64
22574 = build_function_type_list (V4SF_type_node,
22575 V4SF_type_node, long_long_integer_type_node,
22577 tree v4sf_ftype_v4sf_v2si
22578 = build_function_type_list (V4SF_type_node,
22579 V4SF_type_node, V2SI_type_node, NULL_TREE);
22581 /* Miscellaneous. */
22582 tree v8qi_ftype_v4hi_v4hi
22583 = build_function_type_list (V8QI_type_node,
22584 V4HI_type_node, V4HI_type_node, NULL_TREE);
22585 tree v4hi_ftype_v2si_v2si
22586 = build_function_type_list (V4HI_type_node,
22587 V2SI_type_node, V2SI_type_node, NULL_TREE);
22588 tree v4sf_ftype_v4sf_v4sf_int
22589 = build_function_type_list (V4SF_type_node,
22590 V4SF_type_node, V4SF_type_node,
22591 integer_type_node, NULL_TREE);
22592 tree v2si_ftype_v4hi_v4hi
22593 = build_function_type_list (V2SI_type_node,
22594 V4HI_type_node, V4HI_type_node, NULL_TREE);
22595 tree v4hi_ftype_v4hi_int
22596 = build_function_type_list (V4HI_type_node,
22597 V4HI_type_node, integer_type_node, NULL_TREE);
22598 tree v2si_ftype_v2si_int
22599 = build_function_type_list (V2SI_type_node,
22600 V2SI_type_node, integer_type_node, NULL_TREE);
22601 tree v1di_ftype_v1di_int
22602 = build_function_type_list (V1DI_type_node,
22603 V1DI_type_node, integer_type_node, NULL_TREE);
22605 tree void_ftype_void
22606 = build_function_type (void_type_node, void_list_node);
22607 tree void_ftype_unsigned
22608 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22609 tree void_ftype_unsigned_unsigned
22610 = build_function_type_list (void_type_node, unsigned_type_node,
22611 unsigned_type_node, NULL_TREE);
22612 tree void_ftype_pcvoid_unsigned_unsigned
22613 = build_function_type_list (void_type_node, const_ptr_type_node,
22614 unsigned_type_node, unsigned_type_node,
22616 tree unsigned_ftype_void
22617 = build_function_type (unsigned_type_node, void_list_node);
22618 tree v2si_ftype_v4sf
22619 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22620 /* Loads/stores. */
22621 tree void_ftype_v8qi_v8qi_pchar
22622 = build_function_type_list (void_type_node,
22623 V8QI_type_node, V8QI_type_node,
22624 pchar_type_node, NULL_TREE);
22625 tree v4sf_ftype_pcfloat
22626 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22627 tree v4sf_ftype_v4sf_pcv2sf
22628 = build_function_type_list (V4SF_type_node,
22629 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22630 tree void_ftype_pv2sf_v4sf
22631 = build_function_type_list (void_type_node,
22632 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22633 tree void_ftype_pfloat_v4sf
22634 = build_function_type_list (void_type_node,
22635 pfloat_type_node, V4SF_type_node, NULL_TREE);
22636 tree void_ftype_pdi_di
22637 = build_function_type_list (void_type_node,
22638 pdi_type_node, long_long_unsigned_type_node,
22640 tree void_ftype_pv2di_v2di
22641 = build_function_type_list (void_type_node,
22642 pv2di_type_node, V2DI_type_node, NULL_TREE);
22643 /* Normal vector unops. */
22644 tree v4sf_ftype_v4sf
22645 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22646 tree v16qi_ftype_v16qi
22647 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22648 tree v8hi_ftype_v8hi
22649 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22650 tree v4si_ftype_v4si
22651 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22652 tree v8qi_ftype_v8qi
22653 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22654 tree v4hi_ftype_v4hi
22655 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22657 /* Normal vector binops. */
22658 tree v4sf_ftype_v4sf_v4sf
22659 = build_function_type_list (V4SF_type_node,
22660 V4SF_type_node, V4SF_type_node, NULL_TREE);
22661 tree v8qi_ftype_v8qi_v8qi
22662 = build_function_type_list (V8QI_type_node,
22663 V8QI_type_node, V8QI_type_node, NULL_TREE);
22664 tree v4hi_ftype_v4hi_v4hi
22665 = build_function_type_list (V4HI_type_node,
22666 V4HI_type_node, V4HI_type_node, NULL_TREE);
22667 tree v2si_ftype_v2si_v2si
22668 = build_function_type_list (V2SI_type_node,
22669 V2SI_type_node, V2SI_type_node, NULL_TREE);
22670 tree v1di_ftype_v1di_v1di
22671 = build_function_type_list (V1DI_type_node,
22672 V1DI_type_node, V1DI_type_node, NULL_TREE);
22673 tree v1di_ftype_v1di_v1di_int
22674 = build_function_type_list (V1DI_type_node,
22675 V1DI_type_node, V1DI_type_node,
22676 integer_type_node, NULL_TREE);
22677 tree v2si_ftype_v2sf
22678 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22679 tree v2sf_ftype_v2si
22680 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22681 tree v2si_ftype_v2si
22682 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22683 tree v2sf_ftype_v2sf
22684 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22685 tree v2sf_ftype_v2sf_v2sf
22686 = build_function_type_list (V2SF_type_node,
22687 V2SF_type_node, V2SF_type_node, NULL_TREE);
22688 tree v2si_ftype_v2sf_v2sf
22689 = build_function_type_list (V2SI_type_node,
22690 V2SF_type_node, V2SF_type_node, NULL_TREE);
22691 tree pint_type_node = build_pointer_type (integer_type_node);
22692 tree pdouble_type_node = build_pointer_type (double_type_node);
22693 tree pcdouble_type_node = build_pointer_type (
22694 build_type_variant (double_type_node, 1, 0));
22695 tree int_ftype_v2df_v2df
22696 = build_function_type_list (integer_type_node,
22697 V2DF_type_node, V2DF_type_node, NULL_TREE);
22699 tree void_ftype_pcvoid
22700 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22701 tree v4sf_ftype_v4si
22702 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22703 tree v4si_ftype_v4sf
22704 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22705 tree v2df_ftype_v4si
22706 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22707 tree v4si_ftype_v2df
22708 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22709 tree v4si_ftype_v2df_v2df
22710 = build_function_type_list (V4SI_type_node,
22711 V2DF_type_node, V2DF_type_node, NULL_TREE);
22712 tree v2si_ftype_v2df
22713 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22714 tree v4sf_ftype_v2df
22715 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22716 tree v2df_ftype_v2si
22717 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22718 tree v2df_ftype_v4sf
22719 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22720 tree int_ftype_v2df
22721 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22722 tree int64_ftype_v2df
22723 = build_function_type_list (long_long_integer_type_node,
22724 V2DF_type_node, NULL_TREE);
22725 tree v2df_ftype_v2df_int
22726 = build_function_type_list (V2DF_type_node,
22727 V2DF_type_node, integer_type_node, NULL_TREE);
22728 tree v2df_ftype_v2df_int64
22729 = build_function_type_list (V2DF_type_node,
22730 V2DF_type_node, long_long_integer_type_node,
22732 tree v4sf_ftype_v4sf_v2df
22733 = build_function_type_list (V4SF_type_node,
22734 V4SF_type_node, V2DF_type_node, NULL_TREE);
22735 tree v2df_ftype_v2df_v4sf
22736 = build_function_type_list (V2DF_type_node,
22737 V2DF_type_node, V4SF_type_node, NULL_TREE);
22738 tree v2df_ftype_v2df_v2df_int
22739 = build_function_type_list (V2DF_type_node,
22740 V2DF_type_node, V2DF_type_node,
22743 tree v2df_ftype_v2df_pcdouble
22744 = build_function_type_list (V2DF_type_node,
22745 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22746 tree void_ftype_pdouble_v2df
22747 = build_function_type_list (void_type_node,
22748 pdouble_type_node, V2DF_type_node, NULL_TREE);
22749 tree void_ftype_pint_int
22750 = build_function_type_list (void_type_node,
22751 pint_type_node, integer_type_node, NULL_TREE);
22752 tree void_ftype_v16qi_v16qi_pchar
22753 = build_function_type_list (void_type_node,
22754 V16QI_type_node, V16QI_type_node,
22755 pchar_type_node, NULL_TREE);
22756 tree v2df_ftype_pcdouble
22757 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22758 tree v2df_ftype_v2df_v2df
22759 = build_function_type_list (V2DF_type_node,
22760 V2DF_type_node, V2DF_type_node, NULL_TREE);
22761 tree v16qi_ftype_v16qi_v16qi
22762 = build_function_type_list (V16QI_type_node,
22763 V16QI_type_node, V16QI_type_node, NULL_TREE);
22764 tree v8hi_ftype_v8hi_v8hi
22765 = build_function_type_list (V8HI_type_node,
22766 V8HI_type_node, V8HI_type_node, NULL_TREE);
22767 tree v4si_ftype_v4si_v4si
22768 = build_function_type_list (V4SI_type_node,
22769 V4SI_type_node, V4SI_type_node, NULL_TREE);
22770 tree v2di_ftype_v2di_v2di
22771 = build_function_type_list (V2DI_type_node,
22772 V2DI_type_node, V2DI_type_node, NULL_TREE);
22773 tree v2di_ftype_v2df_v2df
22774 = build_function_type_list (V2DI_type_node,
22775 V2DF_type_node, V2DF_type_node, NULL_TREE);
22776 tree v2df_ftype_v2df
22777 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22778 tree v2di_ftype_v2di_int
22779 = build_function_type_list (V2DI_type_node,
22780 V2DI_type_node, integer_type_node, NULL_TREE);
22781 tree v2di_ftype_v2di_v2di_int
22782 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22783 V2DI_type_node, integer_type_node, NULL_TREE);
22784 tree v4si_ftype_v4si_int
22785 = build_function_type_list (V4SI_type_node,
22786 V4SI_type_node, integer_type_node, NULL_TREE);
22787 tree v8hi_ftype_v8hi_int
22788 = build_function_type_list (V8HI_type_node,
22789 V8HI_type_node, integer_type_node, NULL_TREE);
22790 tree v4si_ftype_v8hi_v8hi
22791 = build_function_type_list (V4SI_type_node,
22792 V8HI_type_node, V8HI_type_node, NULL_TREE);
22793 tree v1di_ftype_v8qi_v8qi
22794 = build_function_type_list (V1DI_type_node,
22795 V8QI_type_node, V8QI_type_node, NULL_TREE);
22796 tree v1di_ftype_v2si_v2si
22797 = build_function_type_list (V1DI_type_node,
22798 V2SI_type_node, V2SI_type_node, NULL_TREE);
22799 tree v2di_ftype_v16qi_v16qi
22800 = build_function_type_list (V2DI_type_node,
22801 V16QI_type_node, V16QI_type_node, NULL_TREE);
22802 tree v2di_ftype_v4si_v4si
22803 = build_function_type_list (V2DI_type_node,
22804 V4SI_type_node, V4SI_type_node, NULL_TREE);
22805 tree int_ftype_v16qi
22806 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22807 tree v16qi_ftype_pcchar
22808 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22809 tree void_ftype_pchar_v16qi
22810 = build_function_type_list (void_type_node,
22811 pchar_type_node, V16QI_type_node, NULL_TREE);
22813 tree v2di_ftype_v2di_unsigned_unsigned
22814 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22815 unsigned_type_node, unsigned_type_node,
22817 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22818 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22819 unsigned_type_node, unsigned_type_node,
22821 tree v2di_ftype_v2di_v16qi
22822 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22824 tree v2df_ftype_v2df_v2df_v2df
22825 = build_function_type_list (V2DF_type_node,
22826 V2DF_type_node, V2DF_type_node,
22827 V2DF_type_node, NULL_TREE);
22828 tree v4sf_ftype_v4sf_v4sf_v4sf
22829 = build_function_type_list (V4SF_type_node,
22830 V4SF_type_node, V4SF_type_node,
22831 V4SF_type_node, NULL_TREE);
22832 tree v8hi_ftype_v16qi
22833 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22835 tree v4si_ftype_v16qi
22836 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22838 tree v2di_ftype_v16qi
22839 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22841 tree v4si_ftype_v8hi
22842 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22844 tree v2di_ftype_v8hi
22845 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22847 tree v2di_ftype_v4si
22848 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22850 tree v2di_ftype_pv2di
22851 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22853 tree v16qi_ftype_v16qi_v16qi_int
22854 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22855 V16QI_type_node, integer_type_node,
22857 tree v16qi_ftype_v16qi_v16qi_v16qi
22858 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22859 V16QI_type_node, V16QI_type_node,
22861 tree v8hi_ftype_v8hi_v8hi_int
22862 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22863 V8HI_type_node, integer_type_node,
22865 tree v4si_ftype_v4si_v4si_int
22866 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22867 V4SI_type_node, integer_type_node,
22869 tree int_ftype_v2di_v2di
22870 = build_function_type_list (integer_type_node,
22871 V2DI_type_node, V2DI_type_node,
22873 tree int_ftype_v16qi_int_v16qi_int_int
22874 = build_function_type_list (integer_type_node,
22881 tree v16qi_ftype_v16qi_int_v16qi_int_int
22882 = build_function_type_list (V16QI_type_node,
22889 tree int_ftype_v16qi_v16qi_int
22890 = build_function_type_list (integer_type_node,
22896 /* SSE5 instructions */
22897 tree v2di_ftype_v2di_v2di_v2di
22898 = build_function_type_list (V2DI_type_node,
22904 tree v4si_ftype_v4si_v4si_v4si
22905 = build_function_type_list (V4SI_type_node,
22911 tree v4si_ftype_v4si_v4si_v2di
22912 = build_function_type_list (V4SI_type_node,
22918 tree v8hi_ftype_v8hi_v8hi_v8hi
22919 = build_function_type_list (V8HI_type_node,
22925 tree v8hi_ftype_v8hi_v8hi_v4si
22926 = build_function_type_list (V8HI_type_node,
22932 tree v2df_ftype_v2df_v2df_v16qi
22933 = build_function_type_list (V2DF_type_node,
22939 tree v4sf_ftype_v4sf_v4sf_v16qi
22940 = build_function_type_list (V4SF_type_node,
22946 tree v2di_ftype_v2di_si
22947 = build_function_type_list (V2DI_type_node,
22952 tree v4si_ftype_v4si_si
22953 = build_function_type_list (V4SI_type_node,
22958 tree v8hi_ftype_v8hi_si
22959 = build_function_type_list (V8HI_type_node,
22964 tree v16qi_ftype_v16qi_si
22965 = build_function_type_list (V16QI_type_node,
22969 tree v4sf_ftype_v4hi
22970 = build_function_type_list (V4SF_type_node,
22974 tree v4hi_ftype_v4sf
22975 = build_function_type_list (V4HI_type_node,
22979 tree v2di_ftype_v2di
22980 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22982 tree v16qi_ftype_v8hi_v8hi
22983 = build_function_type_list (V16QI_type_node,
22984 V8HI_type_node, V8HI_type_node,
22986 tree v8hi_ftype_v4si_v4si
22987 = build_function_type_list (V8HI_type_node,
22988 V4SI_type_node, V4SI_type_node,
22990 tree v8hi_ftype_v16qi_v16qi
22991 = build_function_type_list (V8HI_type_node,
22992 V16QI_type_node, V16QI_type_node,
22994 tree v4hi_ftype_v8qi_v8qi
22995 = build_function_type_list (V4HI_type_node,
22996 V8QI_type_node, V8QI_type_node,
22998 tree unsigned_ftype_unsigned_uchar
22999 = build_function_type_list (unsigned_type_node,
23000 unsigned_type_node,
23001 unsigned_char_type_node,
23003 tree unsigned_ftype_unsigned_ushort
23004 = build_function_type_list (unsigned_type_node,
23005 unsigned_type_node,
23006 short_unsigned_type_node,
23008 tree unsigned_ftype_unsigned_unsigned
23009 = build_function_type_list (unsigned_type_node,
23010 unsigned_type_node,
23011 unsigned_type_node,
23013 tree uint64_ftype_uint64_uint64
23014 = build_function_type_list (long_long_unsigned_type_node,
23015 long_long_unsigned_type_node,
23016 long_long_unsigned_type_node,
23018 tree float_ftype_float
23019 = build_function_type_list (float_type_node,
23024 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
23026 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
23028 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
23030 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
23032 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
23034 tree v8sf_ftype_v8sf
23035 = build_function_type_list (V8SF_type_node,
23038 tree v8si_ftype_v8sf
23039 = build_function_type_list (V8SI_type_node,
23042 tree v8sf_ftype_v8si
23043 = build_function_type_list (V8SF_type_node,
23046 tree v4si_ftype_v4df
23047 = build_function_type_list (V4SI_type_node,
23050 tree v4df_ftype_v4df
23051 = build_function_type_list (V4DF_type_node,
23054 tree v4df_ftype_v4si
23055 = build_function_type_list (V4DF_type_node,
23058 tree v4df_ftype_v4sf
23059 = build_function_type_list (V4DF_type_node,
23062 tree v4sf_ftype_v4df
23063 = build_function_type_list (V4SF_type_node,
23066 tree v8sf_ftype_v8sf_v8sf
23067 = build_function_type_list (V8SF_type_node,
23068 V8SF_type_node, V8SF_type_node,
23070 tree v4df_ftype_v4df_v4df
23071 = build_function_type_list (V4DF_type_node,
23072 V4DF_type_node, V4DF_type_node,
23074 tree v8sf_ftype_v8sf_int
23075 = build_function_type_list (V8SF_type_node,
23076 V8SF_type_node, integer_type_node,
23078 tree v4si_ftype_v8si_int
23079 = build_function_type_list (V4SI_type_node,
23080 V8SI_type_node, integer_type_node,
23082 tree v4df_ftype_v4df_int
23083 = build_function_type_list (V4DF_type_node,
23084 V4DF_type_node, integer_type_node,
23086 tree v4sf_ftype_v8sf_int
23087 = build_function_type_list (V4SF_type_node,
23088 V8SF_type_node, integer_type_node,
23090 tree v2df_ftype_v4df_int
23091 = build_function_type_list (V2DF_type_node,
23092 V4DF_type_node, integer_type_node,
23094 tree v8sf_ftype_v8sf_v8sf_int
23095 = build_function_type_list (V8SF_type_node,
23096 V8SF_type_node, V8SF_type_node,
23099 tree v8sf_ftype_v8sf_v8sf_v8sf
23100 = build_function_type_list (V8SF_type_node,
23101 V8SF_type_node, V8SF_type_node,
23104 tree v4df_ftype_v4df_v4df_v4df
23105 = build_function_type_list (V4DF_type_node,
23106 V4DF_type_node, V4DF_type_node,
23109 tree v8si_ftype_v8si_v8si_int
23110 = build_function_type_list (V8SI_type_node,
23111 V8SI_type_node, V8SI_type_node,
23114 tree v4df_ftype_v4df_v4df_int
23115 = build_function_type_list (V4DF_type_node,
23116 V4DF_type_node, V4DF_type_node,
23119 tree v8sf_ftype_pcfloat
23120 = build_function_type_list (V8SF_type_node,
23123 tree v4df_ftype_pcdouble
23124 = build_function_type_list (V4DF_type_node,
23125 pcdouble_type_node,
23127 tree pcv4sf_type_node
23128 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
23129 tree pcv2df_type_node
23130 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
23131 tree v8sf_ftype_pcv4sf
23132 = build_function_type_list (V8SF_type_node,
23135 tree v4df_ftype_pcv2df
23136 = build_function_type_list (V4DF_type_node,
23139 tree v32qi_ftype_pcchar
23140 = build_function_type_list (V32QI_type_node,
23143 tree void_ftype_pchar_v32qi
23144 = build_function_type_list (void_type_node,
23145 pchar_type_node, V32QI_type_node,
23147 tree v8si_ftype_v8si_v4si_int
23148 = build_function_type_list (V8SI_type_node,
23149 V8SI_type_node, V4SI_type_node,
23152 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23153 tree void_ftype_pv4di_v4di
23154 = build_function_type_list (void_type_node,
23155 pv4di_type_node, V4DI_type_node,
23157 tree v8sf_ftype_v8sf_v4sf_int
23158 = build_function_type_list (V8SF_type_node,
23159 V8SF_type_node, V4SF_type_node,
23162 tree v4df_ftype_v4df_v2df_int
23163 = build_function_type_list (V4DF_type_node,
23164 V4DF_type_node, V2DF_type_node,
23167 tree void_ftype_pfloat_v8sf
23168 = build_function_type_list (void_type_node,
23169 pfloat_type_node, V8SF_type_node,
23171 tree void_ftype_pdouble_v4df
23172 = build_function_type_list (void_type_node,
23173 pdouble_type_node, V4DF_type_node,
23175 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23176 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23177 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23178 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23179 tree pcv8sf_type_node
23180 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23181 tree pcv4df_type_node
23182 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23183 tree v8sf_ftype_pcv8sf_v8sf
23184 = build_function_type_list (V8SF_type_node,
23185 pcv8sf_type_node, V8SF_type_node,
23187 tree v4df_ftype_pcv4df_v4df
23188 = build_function_type_list (V4DF_type_node,
23189 pcv4df_type_node, V4DF_type_node,
23191 tree v4sf_ftype_pcv4sf_v4sf
23192 = build_function_type_list (V4SF_type_node,
23193 pcv4sf_type_node, V4SF_type_node,
23195 tree v2df_ftype_pcv2df_v2df
23196 = build_function_type_list (V2DF_type_node,
23197 pcv2df_type_node, V2DF_type_node,
23199 tree void_ftype_pv8sf_v8sf_v8sf
23200 = build_function_type_list (void_type_node,
23201 pv8sf_type_node, V8SF_type_node,
23204 tree void_ftype_pv4df_v4df_v4df
23205 = build_function_type_list (void_type_node,
23206 pv4df_type_node, V4DF_type_node,
23209 tree void_ftype_pv4sf_v4sf_v4sf
23210 = build_function_type_list (void_type_node,
23211 pv4sf_type_node, V4SF_type_node,
23214 tree void_ftype_pv2df_v2df_v2df
23215 = build_function_type_list (void_type_node,
23216 pv2df_type_node, V2DF_type_node,
23219 tree v4df_ftype_v2df
23220 = build_function_type_list (V4DF_type_node,
23223 tree v8sf_ftype_v4sf
23224 = build_function_type_list (V8SF_type_node,
23227 tree v8si_ftype_v4si
23228 = build_function_type_list (V8SI_type_node,
23231 tree v2df_ftype_v4df
23232 = build_function_type_list (V2DF_type_node,
23235 tree v4sf_ftype_v8sf
23236 = build_function_type_list (V4SF_type_node,
23239 tree v4si_ftype_v8si
23240 = build_function_type_list (V4SI_type_node,
23243 tree int_ftype_v4df
23244 = build_function_type_list (integer_type_node,
23247 tree int_ftype_v8sf
23248 = build_function_type_list (integer_type_node,
23251 tree int_ftype_v8sf_v8sf
23252 = build_function_type_list (integer_type_node,
23253 V8SF_type_node, V8SF_type_node,
23255 tree int_ftype_v4di_v4di
23256 = build_function_type_list (integer_type_node,
23257 V4DI_type_node, V4DI_type_node,
23259 tree int_ftype_v4df_v4df
23260 = build_function_type_list (integer_type_node,
23261 V4DF_type_node, V4DF_type_node,
23263 tree v8sf_ftype_v8sf_v8si
23264 = build_function_type_list (V8SF_type_node,
23265 V8SF_type_node, V8SI_type_node,
23267 tree v4df_ftype_v4df_v4di
23268 = build_function_type_list (V4DF_type_node,
23269 V4DF_type_node, V4DI_type_node,
23271 tree v4sf_ftype_v4sf_v4si
23272 = build_function_type_list (V4SF_type_node,
23273 V4SF_type_node, V4SI_type_node, NULL_TREE);
23274 tree v2df_ftype_v2df_v2di
23275 = build_function_type_list (V2DF_type_node,
23276 V2DF_type_node, V2DI_type_node, NULL_TREE);
23280 /* Add all special builtins with variable number of operands. */
23281 for (i = 0, d = bdesc_special_args;
23282 i < ARRAY_SIZE (bdesc_special_args);
23290 switch ((enum ix86_special_builtin_type) d->flag)
23292 case VOID_FTYPE_VOID:
23293 type = void_ftype_void;
23295 case V32QI_FTYPE_PCCHAR:
23296 type = v32qi_ftype_pcchar;
23298 case V16QI_FTYPE_PCCHAR:
23299 type = v16qi_ftype_pcchar;
23301 case V8SF_FTYPE_PCV4SF:
23302 type = v8sf_ftype_pcv4sf;
23304 case V8SF_FTYPE_PCFLOAT:
23305 type = v8sf_ftype_pcfloat;
23307 case V4DF_FTYPE_PCV2DF:
23308 type = v4df_ftype_pcv2df;
23310 case V4DF_FTYPE_PCDOUBLE:
23311 type = v4df_ftype_pcdouble;
23313 case V4SF_FTYPE_PCFLOAT:
23314 type = v4sf_ftype_pcfloat;
23316 case V2DI_FTYPE_PV2DI:
23317 type = v2di_ftype_pv2di;
23319 case V2DF_FTYPE_PCDOUBLE:
23320 type = v2df_ftype_pcdouble;
23322 case V8SF_FTYPE_PCV8SF_V8SF:
23323 type = v8sf_ftype_pcv8sf_v8sf;
23325 case V4DF_FTYPE_PCV4DF_V4DF:
23326 type = v4df_ftype_pcv4df_v4df;
23328 case V4SF_FTYPE_V4SF_PCV2SF:
23329 type = v4sf_ftype_v4sf_pcv2sf;
23331 case V4SF_FTYPE_PCV4SF_V4SF:
23332 type = v4sf_ftype_pcv4sf_v4sf;
23334 case V2DF_FTYPE_V2DF_PCDOUBLE:
23335 type = v2df_ftype_v2df_pcdouble;
23337 case V2DF_FTYPE_PCV2DF_V2DF:
23338 type = v2df_ftype_pcv2df_v2df;
23340 case VOID_FTYPE_PV2SF_V4SF:
23341 type = void_ftype_pv2sf_v4sf;
23343 case VOID_FTYPE_PV4DI_V4DI:
23344 type = void_ftype_pv4di_v4di;
23346 case VOID_FTYPE_PV2DI_V2DI:
23347 type = void_ftype_pv2di_v2di;
23349 case VOID_FTYPE_PCHAR_V32QI:
23350 type = void_ftype_pchar_v32qi;
23352 case VOID_FTYPE_PCHAR_V16QI:
23353 type = void_ftype_pchar_v16qi;
23355 case VOID_FTYPE_PFLOAT_V8SF:
23356 type = void_ftype_pfloat_v8sf;
23358 case VOID_FTYPE_PFLOAT_V4SF:
23359 type = void_ftype_pfloat_v4sf;
23361 case VOID_FTYPE_PDOUBLE_V4DF:
23362 type = void_ftype_pdouble_v4df;
23364 case VOID_FTYPE_PDOUBLE_V2DF:
23365 type = void_ftype_pdouble_v2df;
23367 case VOID_FTYPE_PDI_DI:
23368 type = void_ftype_pdi_di;
23370 case VOID_FTYPE_PINT_INT:
23371 type = void_ftype_pint_int;
23373 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23374 type = void_ftype_pv8sf_v8sf_v8sf;
23376 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23377 type = void_ftype_pv4df_v4df_v4df;
23379 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23380 type = void_ftype_pv4sf_v4sf_v4sf;
23382 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23383 type = void_ftype_pv2df_v2df_v2df;
23386 gcc_unreachable ();
23389 def_builtin (d->mask, d->name, type, d->code);
23392 /* Add all builtins with variable number of operands. */
23393 for (i = 0, d = bdesc_args;
23394 i < ARRAY_SIZE (bdesc_args);
23402 switch ((enum ix86_builtin_type) d->flag)
23404 case FLOAT_FTYPE_FLOAT:
23405 type = float_ftype_float;
23407 case INT_FTYPE_V8SF_V8SF_PTEST:
23408 type = int_ftype_v8sf_v8sf;
23410 case INT_FTYPE_V4DI_V4DI_PTEST:
23411 type = int_ftype_v4di_v4di;
23413 case INT_FTYPE_V4DF_V4DF_PTEST:
23414 type = int_ftype_v4df_v4df;
23416 case INT_FTYPE_V4SF_V4SF_PTEST:
23417 type = int_ftype_v4sf_v4sf;
23419 case INT_FTYPE_V2DI_V2DI_PTEST:
23420 type = int_ftype_v2di_v2di;
23422 case INT_FTYPE_V2DF_V2DF_PTEST:
23423 type = int_ftype_v2df_v2df;
23425 case INT64_FTYPE_V4SF:
23426 type = int64_ftype_v4sf;
23428 case INT64_FTYPE_V2DF:
23429 type = int64_ftype_v2df;
23431 case INT_FTYPE_V16QI:
23432 type = int_ftype_v16qi;
23434 case INT_FTYPE_V8QI:
23435 type = int_ftype_v8qi;
23437 case INT_FTYPE_V8SF:
23438 type = int_ftype_v8sf;
23440 case INT_FTYPE_V4DF:
23441 type = int_ftype_v4df;
23443 case INT_FTYPE_V4SF:
23444 type = int_ftype_v4sf;
23446 case INT_FTYPE_V2DF:
23447 type = int_ftype_v2df;
23449 case V16QI_FTYPE_V16QI:
23450 type = v16qi_ftype_v16qi;
23452 case V8SI_FTYPE_V8SF:
23453 type = v8si_ftype_v8sf;
23455 case V8SI_FTYPE_V4SI:
23456 type = v8si_ftype_v4si;
23458 case V8HI_FTYPE_V8HI:
23459 type = v8hi_ftype_v8hi;
23461 case V8HI_FTYPE_V16QI:
23462 type = v8hi_ftype_v16qi;
23464 case V8QI_FTYPE_V8QI:
23465 type = v8qi_ftype_v8qi;
23467 case V8SF_FTYPE_V8SF:
23468 type = v8sf_ftype_v8sf;
23470 case V8SF_FTYPE_V8SI:
23471 type = v8sf_ftype_v8si;
23473 case V8SF_FTYPE_V4SF:
23474 type = v8sf_ftype_v4sf;
23476 case V4SI_FTYPE_V4DF:
23477 type = v4si_ftype_v4df;
23479 case V4SI_FTYPE_V4SI:
23480 type = v4si_ftype_v4si;
23482 case V4SI_FTYPE_V16QI:
23483 type = v4si_ftype_v16qi;
23485 case V4SI_FTYPE_V8SI:
23486 type = v4si_ftype_v8si;
23488 case V4SI_FTYPE_V8HI:
23489 type = v4si_ftype_v8hi;
23491 case V4SI_FTYPE_V4SF:
23492 type = v4si_ftype_v4sf;
23494 case V4SI_FTYPE_V2DF:
23495 type = v4si_ftype_v2df;
23497 case V4HI_FTYPE_V4HI:
23498 type = v4hi_ftype_v4hi;
23500 case V4DF_FTYPE_V4DF:
23501 type = v4df_ftype_v4df;
23503 case V4DF_FTYPE_V4SI:
23504 type = v4df_ftype_v4si;
23506 case V4DF_FTYPE_V4SF:
23507 type = v4df_ftype_v4sf;
23509 case V4DF_FTYPE_V2DF:
23510 type = v4df_ftype_v2df;
23512 case V4SF_FTYPE_V4SF:
23513 case V4SF_FTYPE_V4SF_VEC_MERGE:
23514 type = v4sf_ftype_v4sf;
23516 case V4SF_FTYPE_V8SF:
23517 type = v4sf_ftype_v8sf;
23519 case V4SF_FTYPE_V4SI:
23520 type = v4sf_ftype_v4si;
23522 case V4SF_FTYPE_V4DF:
23523 type = v4sf_ftype_v4df;
23525 case V4SF_FTYPE_V2DF:
23526 type = v4sf_ftype_v2df;
23528 case V2DI_FTYPE_V2DI:
23529 type = v2di_ftype_v2di;
23531 case V2DI_FTYPE_V16QI:
23532 type = v2di_ftype_v16qi;
23534 case V2DI_FTYPE_V8HI:
23535 type = v2di_ftype_v8hi;
23537 case V2DI_FTYPE_V4SI:
23538 type = v2di_ftype_v4si;
23540 case V2SI_FTYPE_V2SI:
23541 type = v2si_ftype_v2si;
23543 case V2SI_FTYPE_V4SF:
23544 type = v2si_ftype_v4sf;
23546 case V2SI_FTYPE_V2DF:
23547 type = v2si_ftype_v2df;
23549 case V2SI_FTYPE_V2SF:
23550 type = v2si_ftype_v2sf;
23552 case V2DF_FTYPE_V4DF:
23553 type = v2df_ftype_v4df;
23555 case V2DF_FTYPE_V4SF:
23556 type = v2df_ftype_v4sf;
23558 case V2DF_FTYPE_V2DF:
23559 case V2DF_FTYPE_V2DF_VEC_MERGE:
23560 type = v2df_ftype_v2df;
23562 case V2DF_FTYPE_V2SI:
23563 type = v2df_ftype_v2si;
23565 case V2DF_FTYPE_V4SI:
23566 type = v2df_ftype_v4si;
23568 case V2SF_FTYPE_V2SF:
23569 type = v2sf_ftype_v2sf;
23571 case V2SF_FTYPE_V2SI:
23572 type = v2sf_ftype_v2si;
23574 case V16QI_FTYPE_V16QI_V16QI:
23575 type = v16qi_ftype_v16qi_v16qi;
23577 case V16QI_FTYPE_V8HI_V8HI:
23578 type = v16qi_ftype_v8hi_v8hi;
23580 case V8QI_FTYPE_V8QI_V8QI:
23581 type = v8qi_ftype_v8qi_v8qi;
23583 case V8QI_FTYPE_V4HI_V4HI:
23584 type = v8qi_ftype_v4hi_v4hi;
23586 case V8HI_FTYPE_V8HI_V8HI:
23587 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23588 type = v8hi_ftype_v8hi_v8hi;
23590 case V8HI_FTYPE_V16QI_V16QI:
23591 type = v8hi_ftype_v16qi_v16qi;
23593 case V8HI_FTYPE_V4SI_V4SI:
23594 type = v8hi_ftype_v4si_v4si;
23596 case V8HI_FTYPE_V8HI_SI_COUNT:
23597 type = v8hi_ftype_v8hi_int;
23599 case V8SF_FTYPE_V8SF_V8SF:
23600 type = v8sf_ftype_v8sf_v8sf;
23602 case V8SF_FTYPE_V8SF_V8SI:
23603 type = v8sf_ftype_v8sf_v8si;
23605 case V4SI_FTYPE_V4SI_V4SI:
23606 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23607 type = v4si_ftype_v4si_v4si;
23609 case V4SI_FTYPE_V8HI_V8HI:
23610 type = v4si_ftype_v8hi_v8hi;
23612 case V4SI_FTYPE_V4SF_V4SF:
23613 type = v4si_ftype_v4sf_v4sf;
23615 case V4SI_FTYPE_V2DF_V2DF:
23616 type = v4si_ftype_v2df_v2df;
23618 case V4SI_FTYPE_V4SI_SI_COUNT:
23619 type = v4si_ftype_v4si_int;
23621 case V4HI_FTYPE_V4HI_V4HI:
23622 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23623 type = v4hi_ftype_v4hi_v4hi;
23625 case V4HI_FTYPE_V8QI_V8QI:
23626 type = v4hi_ftype_v8qi_v8qi;
23628 case V4HI_FTYPE_V2SI_V2SI:
23629 type = v4hi_ftype_v2si_v2si;
23631 case V4HI_FTYPE_V4HI_SI_COUNT:
23632 type = v4hi_ftype_v4hi_int;
23634 case V4DF_FTYPE_V4DF_V4DF:
23635 type = v4df_ftype_v4df_v4df;
23637 case V4DF_FTYPE_V4DF_V4DI:
23638 type = v4df_ftype_v4df_v4di;
23640 case V4SF_FTYPE_V4SF_V4SF:
23641 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23642 type = v4sf_ftype_v4sf_v4sf;
23644 case V4SF_FTYPE_V4SF_V4SI:
23645 type = v4sf_ftype_v4sf_v4si;
23647 case V4SF_FTYPE_V4SF_V2SI:
23648 type = v4sf_ftype_v4sf_v2si;
23650 case V4SF_FTYPE_V4SF_V2DF:
23651 type = v4sf_ftype_v4sf_v2df;
23653 case V4SF_FTYPE_V4SF_DI:
23654 type = v4sf_ftype_v4sf_int64;
23656 case V4SF_FTYPE_V4SF_SI:
23657 type = v4sf_ftype_v4sf_int;
23659 case V2DI_FTYPE_V2DI_V2DI:
23660 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23661 type = v2di_ftype_v2di_v2di;
23663 case V2DI_FTYPE_V16QI_V16QI:
23664 type = v2di_ftype_v16qi_v16qi;
23666 case V2DI_FTYPE_V4SI_V4SI:
23667 type = v2di_ftype_v4si_v4si;
23669 case V2DI_FTYPE_V2DI_V16QI:
23670 type = v2di_ftype_v2di_v16qi;
23672 case V2DI_FTYPE_V2DF_V2DF:
23673 type = v2di_ftype_v2df_v2df;
23675 case V2DI_FTYPE_V2DI_SI_COUNT:
23676 type = v2di_ftype_v2di_int;
23678 case V2SI_FTYPE_V2SI_V2SI:
23679 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23680 type = v2si_ftype_v2si_v2si;
23682 case V2SI_FTYPE_V4HI_V4HI:
23683 type = v2si_ftype_v4hi_v4hi;
23685 case V2SI_FTYPE_V2SF_V2SF:
23686 type = v2si_ftype_v2sf_v2sf;
23688 case V2SI_FTYPE_V2SI_SI_COUNT:
23689 type = v2si_ftype_v2si_int;
23691 case V2DF_FTYPE_V2DF_V2DF:
23692 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23693 type = v2df_ftype_v2df_v2df;
23695 case V2DF_FTYPE_V2DF_V4SF:
23696 type = v2df_ftype_v2df_v4sf;
23698 case V2DF_FTYPE_V2DF_V2DI:
23699 type = v2df_ftype_v2df_v2di;
23701 case V2DF_FTYPE_V2DF_DI:
23702 type = v2df_ftype_v2df_int64;
23704 case V2DF_FTYPE_V2DF_SI:
23705 type = v2df_ftype_v2df_int;
23707 case V2SF_FTYPE_V2SF_V2SF:
23708 type = v2sf_ftype_v2sf_v2sf;
23710 case V1DI_FTYPE_V1DI_V1DI:
23711 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23712 type = v1di_ftype_v1di_v1di;
23714 case V1DI_FTYPE_V8QI_V8QI:
23715 type = v1di_ftype_v8qi_v8qi;
23717 case V1DI_FTYPE_V2SI_V2SI:
23718 type = v1di_ftype_v2si_v2si;
23720 case V1DI_FTYPE_V1DI_SI_COUNT:
23721 type = v1di_ftype_v1di_int;
23723 case UINT64_FTYPE_UINT64_UINT64:
23724 type = uint64_ftype_uint64_uint64;
23726 case UINT_FTYPE_UINT_UINT:
23727 type = unsigned_ftype_unsigned_unsigned;
23729 case UINT_FTYPE_UINT_USHORT:
23730 type = unsigned_ftype_unsigned_ushort;
23732 case UINT_FTYPE_UINT_UCHAR:
23733 type = unsigned_ftype_unsigned_uchar;
23735 case V8HI_FTYPE_V8HI_INT:
23736 type = v8hi_ftype_v8hi_int;
23738 case V8SF_FTYPE_V8SF_INT:
23739 type = v8sf_ftype_v8sf_int;
23741 case V4SI_FTYPE_V4SI_INT:
23742 type = v4si_ftype_v4si_int;
23744 case V4SI_FTYPE_V8SI_INT:
23745 type = v4si_ftype_v8si_int;
23747 case V4HI_FTYPE_V4HI_INT:
23748 type = v4hi_ftype_v4hi_int;
23750 case V4DF_FTYPE_V4DF_INT:
23751 type = v4df_ftype_v4df_int;
23753 case V4SF_FTYPE_V4SF_INT:
23754 type = v4sf_ftype_v4sf_int;
23756 case V4SF_FTYPE_V8SF_INT:
23757 type = v4sf_ftype_v8sf_int;
23759 case V2DI_FTYPE_V2DI_INT:
23760 case V2DI2TI_FTYPE_V2DI_INT:
23761 type = v2di_ftype_v2di_int;
23763 case V2DF_FTYPE_V2DF_INT:
23764 type = v2df_ftype_v2df_int;
23766 case V2DF_FTYPE_V4DF_INT:
23767 type = v2df_ftype_v4df_int;
23769 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23770 type = v16qi_ftype_v16qi_v16qi_v16qi;
23772 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23773 type = v8sf_ftype_v8sf_v8sf_v8sf;
23775 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23776 type = v4df_ftype_v4df_v4df_v4df;
23778 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23779 type = v4sf_ftype_v4sf_v4sf_v4sf;
23781 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23782 type = v2df_ftype_v2df_v2df_v2df;
23784 case V16QI_FTYPE_V16QI_V16QI_INT:
23785 type = v16qi_ftype_v16qi_v16qi_int;
23787 case V8SI_FTYPE_V8SI_V8SI_INT:
23788 type = v8si_ftype_v8si_v8si_int;
23790 case V8SI_FTYPE_V8SI_V4SI_INT:
23791 type = v8si_ftype_v8si_v4si_int;
23793 case V8HI_FTYPE_V8HI_V8HI_INT:
23794 type = v8hi_ftype_v8hi_v8hi_int;
23796 case V8SF_FTYPE_V8SF_V8SF_INT:
23797 type = v8sf_ftype_v8sf_v8sf_int;
23799 case V8SF_FTYPE_V8SF_V4SF_INT:
23800 type = v8sf_ftype_v8sf_v4sf_int;
23802 case V4SI_FTYPE_V4SI_V4SI_INT:
23803 type = v4si_ftype_v4si_v4si_int;
23805 case V4DF_FTYPE_V4DF_V4DF_INT:
23806 type = v4df_ftype_v4df_v4df_int;
23808 case V4DF_FTYPE_V4DF_V2DF_INT:
23809 type = v4df_ftype_v4df_v2df_int;
23811 case V4SF_FTYPE_V4SF_V4SF_INT:
23812 type = v4sf_ftype_v4sf_v4sf_int;
23814 case V2DI_FTYPE_V2DI_V2DI_INT:
23815 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23816 type = v2di_ftype_v2di_v2di_int;
23818 case V2DF_FTYPE_V2DF_V2DF_INT:
23819 type = v2df_ftype_v2df_v2df_int;
23821 case V2DI_FTYPE_V2DI_UINT_UINT:
23822 type = v2di_ftype_v2di_unsigned_unsigned;
23824 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23825 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23827 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23828 type = v1di_ftype_v1di_v1di_int;
23831 gcc_unreachable ();
23834 def_builtin_const (d->mask, d->name, type, d->code);
23837 /* pcmpestr[im] insns. */
23838 for (i = 0, d = bdesc_pcmpestr;
23839 i < ARRAY_SIZE (bdesc_pcmpestr);
23842 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23843 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23845 ftype = int_ftype_v16qi_int_v16qi_int_int;
23846 def_builtin_const (d->mask, d->name, ftype, d->code);
23849 /* pcmpistr[im] insns. */
23850 for (i = 0, d = bdesc_pcmpistr;
23851 i < ARRAY_SIZE (bdesc_pcmpistr);
23854 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23855 ftype = v16qi_ftype_v16qi_v16qi_int;
23857 ftype = int_ftype_v16qi_v16qi_int;
23858 def_builtin_const (d->mask, d->name, ftype, d->code);
23861 /* comi/ucomi insns. */
23862 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23863 if (d->mask == OPTION_MASK_ISA_SSE2)
23864 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23866 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23869 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23870 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23872 /* SSE or 3DNow!A */
23873 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23876 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23878 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23879 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23882 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23883 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23886 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23887 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23888 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23889 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23890 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23891 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23894 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23897 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23898 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23900 /* Access to the vec_init patterns. */
23901 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23902 integer_type_node, NULL_TREE);
23903 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23905 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23906 short_integer_type_node,
23907 short_integer_type_node,
23908 short_integer_type_node, NULL_TREE);
23909 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23911 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23912 char_type_node, char_type_node,
23913 char_type_node, char_type_node,
23914 char_type_node, char_type_node,
23915 char_type_node, NULL_TREE);
23916 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23918 /* Access to the vec_extract patterns. */
23919 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23920 integer_type_node, NULL_TREE);
23921 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23923 ftype = build_function_type_list (long_long_integer_type_node,
23924 V2DI_type_node, integer_type_node,
23926 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23928 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23929 integer_type_node, NULL_TREE);
23930 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23932 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23933 integer_type_node, NULL_TREE);
23934 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23936 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23937 integer_type_node, NULL_TREE);
23938 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23940 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23941 integer_type_node, NULL_TREE);
23942 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23944 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23945 integer_type_node, NULL_TREE);
23946 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23948 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23949 integer_type_node, NULL_TREE);
23950 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23952 /* Access to the vec_set patterns. */
23953 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23955 integer_type_node, NULL_TREE);
23956 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23958 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23960 integer_type_node, NULL_TREE);
23961 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23963 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23965 integer_type_node, NULL_TREE);
23966 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23968 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23970 integer_type_node, NULL_TREE);
23971 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23973 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23975 integer_type_node, NULL_TREE);
23976 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23978 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23980 integer_type_node, NULL_TREE);
23981 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23983 /* Add SSE5 multi-arg argument instructions */
23984 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23986 tree mtype = NULL_TREE;
23991 switch ((enum multi_arg_type)d->flag)
23993 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23994 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23995 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23996 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23997 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23998 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23999 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
24000 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
24001 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
24002 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
24003 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
24004 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
24005 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
24006 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
24007 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
24008 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
24009 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
24010 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
24011 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
24012 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
24013 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
24014 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
24015 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
24016 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
24017 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
24018 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
24019 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
24020 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
24021 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
24022 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
24023 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
24024 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
24025 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
24026 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
24027 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
24028 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
24029 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
24030 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
24031 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
24032 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
24033 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
24034 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
24035 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
24036 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
24037 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
24038 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
24039 case MULTI_ARG_UNKNOWN:
24041 gcc_unreachable ();
24045 def_builtin_const (d->mask, d->name, mtype, d->code);
24049 /* Internal method for ix86_init_builtins. */
24052 ix86_init_builtins_va_builtins_abi (void)
24054 tree ms_va_ref, sysv_va_ref;
24055 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24056 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24057 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24058 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24062 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24063 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24064 ms_va_ref = build_reference_type (ms_va_list_type_node);
24066 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24069 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24070 fnvoid_va_start_ms =
24071 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24072 fnvoid_va_end_sysv =
24073 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24074 fnvoid_va_start_sysv =
24075 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24077 fnvoid_va_copy_ms =
24078 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24080 fnvoid_va_copy_sysv =
24081 build_function_type_list (void_type_node, sysv_va_ref,
24082 sysv_va_ref, NULL_TREE);
24084 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24085 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24086 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24087 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24088 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24089 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24090 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24091 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24092 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24093 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24094 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24095 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24099 ix86_init_builtins (void)
24101 tree float128_type_node = make_node (REAL_TYPE);
24104 /* The __float80 type. */
24105 if (TYPE_MODE (long_double_type_node) == XFmode)
24106 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24110 /* The __float80 type. */
24111 tree float80_type_node = make_node (REAL_TYPE);
24113 TYPE_PRECISION (float80_type_node) = 80;
24114 layout_type (float80_type_node);
24115 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24119 /* The __float128 type. */
24120 TYPE_PRECISION (float128_type_node) = 128;
24121 layout_type (float128_type_node);
24122 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24125 /* TFmode support builtins. */
24126 ftype = build_function_type (float128_type_node, void_list_node);
24127 decl = add_builtin_function ("__builtin_infq", ftype,
24128 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24130 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24132 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24133 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24135 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24137 /* We will expand them to normal call if SSE2 isn't available since
24138 they are used by libgcc. */
24139 ftype = build_function_type_list (float128_type_node,
24140 float128_type_node,
24142 decl = add_builtin_function ("__builtin_fabsq", ftype,
24143 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24144 "__fabstf2", NULL_TREE);
24145 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24146 TREE_READONLY (decl) = 1;
24148 ftype = build_function_type_list (float128_type_node,
24149 float128_type_node,
24150 float128_type_node,
24152 decl = add_builtin_function ("__builtin_copysignq", ftype,
24153 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24154 "__copysigntf3", NULL_TREE);
24155 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24156 TREE_READONLY (decl) = 1;
24158 ix86_init_mmx_sse_builtins ();
24160 ix86_init_builtins_va_builtins_abi ();
24163 /* Errors in the source file can cause expand_expr to return const0_rtx
24164 where we expect a vector. To avoid crashing, use one of the vector
24165 clear instructions. */
24167 safe_vector_operand (rtx x, enum machine_mode mode)
24169 if (x == const0_rtx)
24170 x = CONST0_RTX (mode);
24174 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24177 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24180 tree arg0 = CALL_EXPR_ARG (exp, 0);
24181 tree arg1 = CALL_EXPR_ARG (exp, 1);
24182 rtx op0 = expand_normal (arg0);
24183 rtx op1 = expand_normal (arg1);
24184 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24185 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24186 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24188 if (VECTOR_MODE_P (mode0))
24189 op0 = safe_vector_operand (op0, mode0);
24190 if (VECTOR_MODE_P (mode1))
24191 op1 = safe_vector_operand (op1, mode1);
24193 if (optimize || !target
24194 || GET_MODE (target) != tmode
24195 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24196 target = gen_reg_rtx (tmode);
24198 if (GET_MODE (op1) == SImode && mode1 == TImode)
24200 rtx x = gen_reg_rtx (V4SImode);
24201 emit_insn (gen_sse2_loadd (x, op1));
24202 op1 = gen_lowpart (TImode, x);
24205 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24206 op0 = copy_to_mode_reg (mode0, op0);
24207 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24208 op1 = copy_to_mode_reg (mode1, op1);
24210 pat = GEN_FCN (icode) (target, op0, op1);
24219 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24222 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24223 enum multi_arg_type m_type,
24224 enum rtx_code sub_code)
24229 bool comparison_p = false;
24231 bool last_arg_constant = false;
24232 int num_memory = 0;
24235 enum machine_mode mode;
24238 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24242 case MULTI_ARG_3_SF:
24243 case MULTI_ARG_3_DF:
24244 case MULTI_ARG_3_DI:
24245 case MULTI_ARG_3_SI:
24246 case MULTI_ARG_3_SI_DI:
24247 case MULTI_ARG_3_HI:
24248 case MULTI_ARG_3_HI_SI:
24249 case MULTI_ARG_3_QI:
24250 case MULTI_ARG_3_PERMPS:
24251 case MULTI_ARG_3_PERMPD:
24255 case MULTI_ARG_2_SF:
24256 case MULTI_ARG_2_DF:
24257 case MULTI_ARG_2_DI:
24258 case MULTI_ARG_2_SI:
24259 case MULTI_ARG_2_HI:
24260 case MULTI_ARG_2_QI:
24264 case MULTI_ARG_2_DI_IMM:
24265 case MULTI_ARG_2_SI_IMM:
24266 case MULTI_ARG_2_HI_IMM:
24267 case MULTI_ARG_2_QI_IMM:
24269 last_arg_constant = true;
24272 case MULTI_ARG_1_SF:
24273 case MULTI_ARG_1_DF:
24274 case MULTI_ARG_1_DI:
24275 case MULTI_ARG_1_SI:
24276 case MULTI_ARG_1_HI:
24277 case MULTI_ARG_1_QI:
24278 case MULTI_ARG_1_SI_DI:
24279 case MULTI_ARG_1_HI_DI:
24280 case MULTI_ARG_1_HI_SI:
24281 case MULTI_ARG_1_QI_DI:
24282 case MULTI_ARG_1_QI_SI:
24283 case MULTI_ARG_1_QI_HI:
24284 case MULTI_ARG_1_PH2PS:
24285 case MULTI_ARG_1_PS2PH:
24289 case MULTI_ARG_2_SF_CMP:
24290 case MULTI_ARG_2_DF_CMP:
24291 case MULTI_ARG_2_DI_CMP:
24292 case MULTI_ARG_2_SI_CMP:
24293 case MULTI_ARG_2_HI_CMP:
24294 case MULTI_ARG_2_QI_CMP:
24296 comparison_p = true;
24299 case MULTI_ARG_2_SF_TF:
24300 case MULTI_ARG_2_DF_TF:
24301 case MULTI_ARG_2_DI_TF:
24302 case MULTI_ARG_2_SI_TF:
24303 case MULTI_ARG_2_HI_TF:
24304 case MULTI_ARG_2_QI_TF:
24309 case MULTI_ARG_UNKNOWN:
24311 gcc_unreachable ();
24314 if (optimize || !target
24315 || GET_MODE (target) != tmode
24316 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24317 target = gen_reg_rtx (tmode);
24319 gcc_assert (nargs <= 4);
24321 for (i = 0; i < nargs; i++)
24323 tree arg = CALL_EXPR_ARG (exp, i);
24324 rtx op = expand_normal (arg);
24325 int adjust = (comparison_p) ? 1 : 0;
24326 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24328 if (last_arg_constant && i == nargs-1)
24330 if (!CONST_INT_P (op))
24332 error ("last argument must be an immediate");
24333 return gen_reg_rtx (tmode);
24338 if (VECTOR_MODE_P (mode))
24339 op = safe_vector_operand (op, mode);
24341 /* If we aren't optimizing, only allow one memory operand to be
24343 if (memory_operand (op, mode))
24346 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24349 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24351 op = force_reg (mode, op);
24355 args[i].mode = mode;
24361 pat = GEN_FCN (icode) (target, args[0].op);
24366 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24367 GEN_INT ((int)sub_code));
24368 else if (! comparison_p)
24369 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24372 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24376 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24381 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24385 gcc_unreachable ();
24395 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24396 insns with vec_merge. */
24399 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24403 tree arg0 = CALL_EXPR_ARG (exp, 0);
24404 rtx op1, op0 = expand_normal (arg0);
24405 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24406 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24408 if (optimize || !target
24409 || GET_MODE (target) != tmode
24410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24411 target = gen_reg_rtx (tmode);
24413 if (VECTOR_MODE_P (mode0))
24414 op0 = safe_vector_operand (op0, mode0);
24416 if ((optimize && !register_operand (op0, mode0))
24417 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24418 op0 = copy_to_mode_reg (mode0, op0);
24421 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24422 op1 = copy_to_mode_reg (mode0, op1);
24424 pat = GEN_FCN (icode) (target, op0, op1);
24431 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24434 ix86_expand_sse_compare (const struct builtin_description *d,
24435 tree exp, rtx target, bool swap)
24438 tree arg0 = CALL_EXPR_ARG (exp, 0);
24439 tree arg1 = CALL_EXPR_ARG (exp, 1);
24440 rtx op0 = expand_normal (arg0);
24441 rtx op1 = expand_normal (arg1);
24443 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24444 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24445 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24446 enum rtx_code comparison = d->comparison;
24448 if (VECTOR_MODE_P (mode0))
24449 op0 = safe_vector_operand (op0, mode0);
24450 if (VECTOR_MODE_P (mode1))
24451 op1 = safe_vector_operand (op1, mode1);
24453 /* Swap operands if we have a comparison that isn't available in
24457 rtx tmp = gen_reg_rtx (mode1);
24458 emit_move_insn (tmp, op1);
24463 if (optimize || !target
24464 || GET_MODE (target) != tmode
24465 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24466 target = gen_reg_rtx (tmode);
24468 if ((optimize && !register_operand (op0, mode0))
24469 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24470 op0 = copy_to_mode_reg (mode0, op0);
24471 if ((optimize && !register_operand (op1, mode1))
24472 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24473 op1 = copy_to_mode_reg (mode1, op1);
24475 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24476 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24483 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24486 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24490 tree arg0 = CALL_EXPR_ARG (exp, 0);
24491 tree arg1 = CALL_EXPR_ARG (exp, 1);
24492 rtx op0 = expand_normal (arg0);
24493 rtx op1 = expand_normal (arg1);
24494 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24495 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24496 enum rtx_code comparison = d->comparison;
24498 if (VECTOR_MODE_P (mode0))
24499 op0 = safe_vector_operand (op0, mode0);
24500 if (VECTOR_MODE_P (mode1))
24501 op1 = safe_vector_operand (op1, mode1);
24503 /* Swap operands if we have a comparison that isn't available in
24505 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24512 target = gen_reg_rtx (SImode);
24513 emit_move_insn (target, const0_rtx);
24514 target = gen_rtx_SUBREG (QImode, target, 0);
24516 if ((optimize && !register_operand (op0, mode0))
24517 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24518 op0 = copy_to_mode_reg (mode0, op0);
24519 if ((optimize && !register_operand (op1, mode1))
24520 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24521 op1 = copy_to_mode_reg (mode1, op1);
24523 pat = GEN_FCN (d->icode) (op0, op1);
24527 emit_insn (gen_rtx_SET (VOIDmode,
24528 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24529 gen_rtx_fmt_ee (comparison, QImode,
24533 return SUBREG_REG (target);
24536 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24539 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24543 tree arg0 = CALL_EXPR_ARG (exp, 0);
24544 tree arg1 = CALL_EXPR_ARG (exp, 1);
24545 rtx op0 = expand_normal (arg0);
24546 rtx op1 = expand_normal (arg1);
24547 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24548 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24549 enum rtx_code comparison = d->comparison;
24551 if (VECTOR_MODE_P (mode0))
24552 op0 = safe_vector_operand (op0, mode0);
24553 if (VECTOR_MODE_P (mode1))
24554 op1 = safe_vector_operand (op1, mode1);
24556 target = gen_reg_rtx (SImode);
24557 emit_move_insn (target, const0_rtx);
24558 target = gen_rtx_SUBREG (QImode, target, 0);
24560 if ((optimize && !register_operand (op0, mode0))
24561 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24562 op0 = copy_to_mode_reg (mode0, op0);
24563 if ((optimize && !register_operand (op1, mode1))
24564 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24565 op1 = copy_to_mode_reg (mode1, op1);
24567 pat = GEN_FCN (d->icode) (op0, op1);
24571 emit_insn (gen_rtx_SET (VOIDmode,
24572 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24573 gen_rtx_fmt_ee (comparison, QImode,
24577 return SUBREG_REG (target);
24580 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24583 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24584 tree exp, rtx target)
24587 tree arg0 = CALL_EXPR_ARG (exp, 0);
24588 tree arg1 = CALL_EXPR_ARG (exp, 1);
24589 tree arg2 = CALL_EXPR_ARG (exp, 2);
24590 tree arg3 = CALL_EXPR_ARG (exp, 3);
24591 tree arg4 = CALL_EXPR_ARG (exp, 4);
24592 rtx scratch0, scratch1;
24593 rtx op0 = expand_normal (arg0);
24594 rtx op1 = expand_normal (arg1);
24595 rtx op2 = expand_normal (arg2);
24596 rtx op3 = expand_normal (arg3);
24597 rtx op4 = expand_normal (arg4);
24598 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24600 tmode0 = insn_data[d->icode].operand[0].mode;
24601 tmode1 = insn_data[d->icode].operand[1].mode;
24602 modev2 = insn_data[d->icode].operand[2].mode;
24603 modei3 = insn_data[d->icode].operand[3].mode;
24604 modev4 = insn_data[d->icode].operand[4].mode;
24605 modei5 = insn_data[d->icode].operand[5].mode;
24606 modeimm = insn_data[d->icode].operand[6].mode;
24608 if (VECTOR_MODE_P (modev2))
24609 op0 = safe_vector_operand (op0, modev2);
24610 if (VECTOR_MODE_P (modev4))
24611 op2 = safe_vector_operand (op2, modev4);
24613 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24614 op0 = copy_to_mode_reg (modev2, op0);
24615 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24616 op1 = copy_to_mode_reg (modei3, op1);
24617 if ((optimize && !register_operand (op2, modev4))
24618 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24619 op2 = copy_to_mode_reg (modev4, op2);
24620 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24621 op3 = copy_to_mode_reg (modei5, op3);
24623 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24625 error ("the fifth argument must be a 8-bit immediate");
24629 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24631 if (optimize || !target
24632 || GET_MODE (target) != tmode0
24633 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24634 target = gen_reg_rtx (tmode0);
24636 scratch1 = gen_reg_rtx (tmode1);
24638 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24640 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24642 if (optimize || !target
24643 || GET_MODE (target) != tmode1
24644 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24645 target = gen_reg_rtx (tmode1);
24647 scratch0 = gen_reg_rtx (tmode0);
24649 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24653 gcc_assert (d->flag);
24655 scratch0 = gen_reg_rtx (tmode0);
24656 scratch1 = gen_reg_rtx (tmode1);
24658 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24668 target = gen_reg_rtx (SImode);
24669 emit_move_insn (target, const0_rtx);
24670 target = gen_rtx_SUBREG (QImode, target, 0);
24673 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24674 gen_rtx_fmt_ee (EQ, QImode,
24675 gen_rtx_REG ((enum machine_mode) d->flag,
24678 return SUBREG_REG (target);
24685 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24688 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24689 tree exp, rtx target)
24692 tree arg0 = CALL_EXPR_ARG (exp, 0);
24693 tree arg1 = CALL_EXPR_ARG (exp, 1);
24694 tree arg2 = CALL_EXPR_ARG (exp, 2);
24695 rtx scratch0, scratch1;
24696 rtx op0 = expand_normal (arg0);
24697 rtx op1 = expand_normal (arg1);
24698 rtx op2 = expand_normal (arg2);
24699 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24701 tmode0 = insn_data[d->icode].operand[0].mode;
24702 tmode1 = insn_data[d->icode].operand[1].mode;
24703 modev2 = insn_data[d->icode].operand[2].mode;
24704 modev3 = insn_data[d->icode].operand[3].mode;
24705 modeimm = insn_data[d->icode].operand[4].mode;
24707 if (VECTOR_MODE_P (modev2))
24708 op0 = safe_vector_operand (op0, modev2);
24709 if (VECTOR_MODE_P (modev3))
24710 op1 = safe_vector_operand (op1, modev3);
24712 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24713 op0 = copy_to_mode_reg (modev2, op0);
24714 if ((optimize && !register_operand (op1, modev3))
24715 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24716 op1 = copy_to_mode_reg (modev3, op1);
24718 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24720 error ("the third argument must be a 8-bit immediate");
24724 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24726 if (optimize || !target
24727 || GET_MODE (target) != tmode0
24728 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24729 target = gen_reg_rtx (tmode0);
24731 scratch1 = gen_reg_rtx (tmode1);
24733 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24735 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24737 if (optimize || !target
24738 || GET_MODE (target) != tmode1
24739 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24740 target = gen_reg_rtx (tmode1);
24742 scratch0 = gen_reg_rtx (tmode0);
24744 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24748 gcc_assert (d->flag);
24750 scratch0 = gen_reg_rtx (tmode0);
24751 scratch1 = gen_reg_rtx (tmode1);
24753 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24763 target = gen_reg_rtx (SImode);
24764 emit_move_insn (target, const0_rtx);
24765 target = gen_rtx_SUBREG (QImode, target, 0);
24768 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24769 gen_rtx_fmt_ee (EQ, QImode,
24770 gen_rtx_REG ((enum machine_mode) d->flag,
24773 return SUBREG_REG (target);
24779 /* Subroutine of ix86_expand_builtin to take care of insns with
24780 variable number of operands. */
24783 ix86_expand_args_builtin (const struct builtin_description *d,
24784 tree exp, rtx target)
24786 rtx pat, real_target;
24787 unsigned int i, nargs;
24788 unsigned int nargs_constant = 0;
24789 int num_memory = 0;
24793 enum machine_mode mode;
24795 bool last_arg_count = false;
24796 enum insn_code icode = d->icode;
24797 const struct insn_data *insn_p = &insn_data[icode];
24798 enum machine_mode tmode = insn_p->operand[0].mode;
24799 enum machine_mode rmode = VOIDmode;
24801 enum rtx_code comparison = d->comparison;
24803 switch ((enum ix86_builtin_type) d->flag)
24805 case INT_FTYPE_V8SF_V8SF_PTEST:
24806 case INT_FTYPE_V4DI_V4DI_PTEST:
24807 case INT_FTYPE_V4DF_V4DF_PTEST:
24808 case INT_FTYPE_V4SF_V4SF_PTEST:
24809 case INT_FTYPE_V2DI_V2DI_PTEST:
24810 case INT_FTYPE_V2DF_V2DF_PTEST:
24811 return ix86_expand_sse_ptest (d, exp, target);
24812 case FLOAT128_FTYPE_FLOAT128:
24813 case FLOAT_FTYPE_FLOAT:
24814 case INT64_FTYPE_V4SF:
24815 case INT64_FTYPE_V2DF:
24816 case INT_FTYPE_V16QI:
24817 case INT_FTYPE_V8QI:
24818 case INT_FTYPE_V8SF:
24819 case INT_FTYPE_V4DF:
24820 case INT_FTYPE_V4SF:
24821 case INT_FTYPE_V2DF:
24822 case V16QI_FTYPE_V16QI:
24823 case V8SI_FTYPE_V8SF:
24824 case V8SI_FTYPE_V4SI:
24825 case V8HI_FTYPE_V8HI:
24826 case V8HI_FTYPE_V16QI:
24827 case V8QI_FTYPE_V8QI:
24828 case V8SF_FTYPE_V8SF:
24829 case V8SF_FTYPE_V8SI:
24830 case V8SF_FTYPE_V4SF:
24831 case V4SI_FTYPE_V4SI:
24832 case V4SI_FTYPE_V16QI:
24833 case V4SI_FTYPE_V4SF:
24834 case V4SI_FTYPE_V8SI:
24835 case V4SI_FTYPE_V8HI:
24836 case V4SI_FTYPE_V4DF:
24837 case V4SI_FTYPE_V2DF:
24838 case V4HI_FTYPE_V4HI:
24839 case V4DF_FTYPE_V4DF:
24840 case V4DF_FTYPE_V4SI:
24841 case V4DF_FTYPE_V4SF:
24842 case V4DF_FTYPE_V2DF:
24843 case V4SF_FTYPE_V4SF:
24844 case V4SF_FTYPE_V4SI:
24845 case V4SF_FTYPE_V8SF:
24846 case V4SF_FTYPE_V4DF:
24847 case V4SF_FTYPE_V2DF:
24848 case V2DI_FTYPE_V2DI:
24849 case V2DI_FTYPE_V16QI:
24850 case V2DI_FTYPE_V8HI:
24851 case V2DI_FTYPE_V4SI:
24852 case V2DF_FTYPE_V2DF:
24853 case V2DF_FTYPE_V4SI:
24854 case V2DF_FTYPE_V4DF:
24855 case V2DF_FTYPE_V4SF:
24856 case V2DF_FTYPE_V2SI:
24857 case V2SI_FTYPE_V2SI:
24858 case V2SI_FTYPE_V4SF:
24859 case V2SI_FTYPE_V2SF:
24860 case V2SI_FTYPE_V2DF:
24861 case V2SF_FTYPE_V2SF:
24862 case V2SF_FTYPE_V2SI:
24865 case V4SF_FTYPE_V4SF_VEC_MERGE:
24866 case V2DF_FTYPE_V2DF_VEC_MERGE:
24867 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24868 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24869 case V16QI_FTYPE_V16QI_V16QI:
24870 case V16QI_FTYPE_V8HI_V8HI:
24871 case V8QI_FTYPE_V8QI_V8QI:
24872 case V8QI_FTYPE_V4HI_V4HI:
24873 case V8HI_FTYPE_V8HI_V8HI:
24874 case V8HI_FTYPE_V16QI_V16QI:
24875 case V8HI_FTYPE_V4SI_V4SI:
24876 case V8SF_FTYPE_V8SF_V8SF:
24877 case V8SF_FTYPE_V8SF_V8SI:
24878 case V4SI_FTYPE_V4SI_V4SI:
24879 case V4SI_FTYPE_V8HI_V8HI:
24880 case V4SI_FTYPE_V4SF_V4SF:
24881 case V4SI_FTYPE_V2DF_V2DF:
24882 case V4HI_FTYPE_V4HI_V4HI:
24883 case V4HI_FTYPE_V8QI_V8QI:
24884 case V4HI_FTYPE_V2SI_V2SI:
24885 case V4DF_FTYPE_V4DF_V4DF:
24886 case V4DF_FTYPE_V4DF_V4DI:
24887 case V4SF_FTYPE_V4SF_V4SF:
24888 case V4SF_FTYPE_V4SF_V4SI:
24889 case V4SF_FTYPE_V4SF_V2SI:
24890 case V4SF_FTYPE_V4SF_V2DF:
24891 case V4SF_FTYPE_V4SF_DI:
24892 case V4SF_FTYPE_V4SF_SI:
24893 case V2DI_FTYPE_V2DI_V2DI:
24894 case V2DI_FTYPE_V16QI_V16QI:
24895 case V2DI_FTYPE_V4SI_V4SI:
24896 case V2DI_FTYPE_V2DI_V16QI:
24897 case V2DI_FTYPE_V2DF_V2DF:
24898 case V2SI_FTYPE_V2SI_V2SI:
24899 case V2SI_FTYPE_V4HI_V4HI:
24900 case V2SI_FTYPE_V2SF_V2SF:
24901 case V2DF_FTYPE_V2DF_V2DF:
24902 case V2DF_FTYPE_V2DF_V4SF:
24903 case V2DF_FTYPE_V2DF_V2DI:
24904 case V2DF_FTYPE_V2DF_DI:
24905 case V2DF_FTYPE_V2DF_SI:
24906 case V2SF_FTYPE_V2SF_V2SF:
24907 case V1DI_FTYPE_V1DI_V1DI:
24908 case V1DI_FTYPE_V8QI_V8QI:
24909 case V1DI_FTYPE_V2SI_V2SI:
24910 if (comparison == UNKNOWN)
24911 return ix86_expand_binop_builtin (icode, exp, target);
24914 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24915 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24916 gcc_assert (comparison != UNKNOWN);
24920 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24921 case V8HI_FTYPE_V8HI_SI_COUNT:
24922 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24923 case V4SI_FTYPE_V4SI_SI_COUNT:
24924 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24925 case V4HI_FTYPE_V4HI_SI_COUNT:
24926 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24927 case V2DI_FTYPE_V2DI_SI_COUNT:
24928 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24929 case V2SI_FTYPE_V2SI_SI_COUNT:
24930 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24931 case V1DI_FTYPE_V1DI_SI_COUNT:
24933 last_arg_count = true;
24935 case UINT64_FTYPE_UINT64_UINT64:
24936 case UINT_FTYPE_UINT_UINT:
24937 case UINT_FTYPE_UINT_USHORT:
24938 case UINT_FTYPE_UINT_UCHAR:
24941 case V2DI2TI_FTYPE_V2DI_INT:
24944 nargs_constant = 1;
24946 case V8HI_FTYPE_V8HI_INT:
24947 case V8SF_FTYPE_V8SF_INT:
24948 case V4SI_FTYPE_V4SI_INT:
24949 case V4SI_FTYPE_V8SI_INT:
24950 case V4HI_FTYPE_V4HI_INT:
24951 case V4DF_FTYPE_V4DF_INT:
24952 case V4SF_FTYPE_V4SF_INT:
24953 case V4SF_FTYPE_V8SF_INT:
24954 case V2DI_FTYPE_V2DI_INT:
24955 case V2DF_FTYPE_V2DF_INT:
24956 case V2DF_FTYPE_V4DF_INT:
24958 nargs_constant = 1;
24960 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24961 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24962 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24963 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24964 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24967 case V16QI_FTYPE_V16QI_V16QI_INT:
24968 case V8HI_FTYPE_V8HI_V8HI_INT:
24969 case V8SI_FTYPE_V8SI_V8SI_INT:
24970 case V8SI_FTYPE_V8SI_V4SI_INT:
24971 case V8SF_FTYPE_V8SF_V8SF_INT:
24972 case V8SF_FTYPE_V8SF_V4SF_INT:
24973 case V4SI_FTYPE_V4SI_V4SI_INT:
24974 case V4DF_FTYPE_V4DF_V4DF_INT:
24975 case V4DF_FTYPE_V4DF_V2DF_INT:
24976 case V4SF_FTYPE_V4SF_V4SF_INT:
24977 case V2DI_FTYPE_V2DI_V2DI_INT:
24978 case V2DF_FTYPE_V2DF_V2DF_INT:
24980 nargs_constant = 1;
24982 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24985 nargs_constant = 1;
24987 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24990 nargs_constant = 1;
24992 case V2DI_FTYPE_V2DI_UINT_UINT:
24994 nargs_constant = 2;
24996 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24998 nargs_constant = 2;
25001 gcc_unreachable ();
25004 gcc_assert (nargs <= ARRAY_SIZE (args));
25006 if (comparison != UNKNOWN)
25008 gcc_assert (nargs == 2);
25009 return ix86_expand_sse_compare (d, exp, target, swap);
25012 if (rmode == VOIDmode || rmode == tmode)
25016 || GET_MODE (target) != tmode
25017 || ! (*insn_p->operand[0].predicate) (target, tmode))
25018 target = gen_reg_rtx (tmode);
25019 real_target = target;
25023 target = gen_reg_rtx (rmode);
25024 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25027 for (i = 0; i < nargs; i++)
25029 tree arg = CALL_EXPR_ARG (exp, i);
25030 rtx op = expand_normal (arg);
25031 enum machine_mode mode = insn_p->operand[i + 1].mode;
25032 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
25034 if (last_arg_count && (i + 1) == nargs)
25036 /* SIMD shift insns take either an 8-bit immediate or
25037 register as count. But builtin functions take int as
25038 count. If count doesn't match, we put it in register. */
25041 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25042 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
25043 op = copy_to_reg (op);
25046 else if ((nargs - i) <= nargs_constant)
25051 case CODE_FOR_sse4_1_roundpd:
25052 case CODE_FOR_sse4_1_roundps:
25053 case CODE_FOR_sse4_1_roundsd:
25054 case CODE_FOR_sse4_1_roundss:
25055 case CODE_FOR_sse4_1_blendps:
25056 case CODE_FOR_avx_blendpd256:
25057 case CODE_FOR_avx_vpermilv4df:
25058 case CODE_FOR_avx_roundpd256:
25059 case CODE_FOR_avx_roundps256:
25060 error ("the last argument must be a 4-bit immediate");
25063 case CODE_FOR_sse4_1_blendpd:
25064 case CODE_FOR_avx_vpermilv2df:
25065 error ("the last argument must be a 2-bit immediate");
25068 case CODE_FOR_avx_vextractf128v4df:
25069 case CODE_FOR_avx_vextractf128v8sf:
25070 case CODE_FOR_avx_vextractf128v8si:
25071 case CODE_FOR_avx_vinsertf128v4df:
25072 case CODE_FOR_avx_vinsertf128v8sf:
25073 case CODE_FOR_avx_vinsertf128v8si:
25074 error ("the last argument must be a 1-bit immediate");
25077 case CODE_FOR_avx_cmpsdv2df3:
25078 case CODE_FOR_avx_cmpssv4sf3:
25079 case CODE_FOR_avx_cmppdv2df3:
25080 case CODE_FOR_avx_cmppsv4sf3:
25081 case CODE_FOR_avx_cmppdv4df3:
25082 case CODE_FOR_avx_cmppsv8sf3:
25083 error ("the last argument must be a 5-bit immediate");
25087 switch (nargs_constant)
25090 if ((nargs - i) == nargs_constant)
25092 error ("the next to last argument must be an 8-bit immediate");
25096 error ("the last argument must be an 8-bit immediate");
25099 gcc_unreachable ();
25106 if (VECTOR_MODE_P (mode))
25107 op = safe_vector_operand (op, mode);
25109 /* If we aren't optimizing, only allow one memory operand to
25111 if (memory_operand (op, mode))
25114 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25116 if (optimize || !match || num_memory > 1)
25117 op = copy_to_mode_reg (mode, op);
25121 op = copy_to_reg (op);
25122 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25127 args[i].mode = mode;
25133 pat = GEN_FCN (icode) (real_target, args[0].op);
25136 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25139 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25143 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25144 args[2].op, args[3].op);
25147 gcc_unreachable ();
25157 /* Subroutine of ix86_expand_builtin to take care of special insns
25158 with variable number of operands. */
25161 ix86_expand_special_args_builtin (const struct builtin_description *d,
25162 tree exp, rtx target)
25166 unsigned int i, nargs, arg_adjust, memory;
25170 enum machine_mode mode;
25172 enum insn_code icode = d->icode;
25173 bool last_arg_constant = false;
25174 const struct insn_data *insn_p = &insn_data[icode];
25175 enum machine_mode tmode = insn_p->operand[0].mode;
25176 enum { load, store } klass;
25178 switch ((enum ix86_special_builtin_type) d->flag)
25180 case VOID_FTYPE_VOID:
25181 emit_insn (GEN_FCN (icode) (target));
25183 case V2DI_FTYPE_PV2DI:
25184 case V32QI_FTYPE_PCCHAR:
25185 case V16QI_FTYPE_PCCHAR:
25186 case V8SF_FTYPE_PCV4SF:
25187 case V8SF_FTYPE_PCFLOAT:
25188 case V4SF_FTYPE_PCFLOAT:
25189 case V4DF_FTYPE_PCV2DF:
25190 case V4DF_FTYPE_PCDOUBLE:
25191 case V2DF_FTYPE_PCDOUBLE:
25196 case VOID_FTYPE_PV2SF_V4SF:
25197 case VOID_FTYPE_PV4DI_V4DI:
25198 case VOID_FTYPE_PV2DI_V2DI:
25199 case VOID_FTYPE_PCHAR_V32QI:
25200 case VOID_FTYPE_PCHAR_V16QI:
25201 case VOID_FTYPE_PFLOAT_V8SF:
25202 case VOID_FTYPE_PFLOAT_V4SF:
25203 case VOID_FTYPE_PDOUBLE_V4DF:
25204 case VOID_FTYPE_PDOUBLE_V2DF:
25205 case VOID_FTYPE_PDI_DI:
25206 case VOID_FTYPE_PINT_INT:
25209 /* Reserve memory operand for target. */
25210 memory = ARRAY_SIZE (args);
25212 case V4SF_FTYPE_V4SF_PCV2SF:
25213 case V2DF_FTYPE_V2DF_PCDOUBLE:
25218 case V8SF_FTYPE_PCV8SF_V8SF:
25219 case V4DF_FTYPE_PCV4DF_V4DF:
25220 case V4SF_FTYPE_PCV4SF_V4SF:
25221 case V2DF_FTYPE_PCV2DF_V2DF:
25226 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25227 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25228 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25229 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25232 /* Reserve memory operand for target. */
25233 memory = ARRAY_SIZE (args);
25236 gcc_unreachable ();
25239 gcc_assert (nargs <= ARRAY_SIZE (args));
25241 if (klass == store)
25243 arg = CALL_EXPR_ARG (exp, 0);
25244 op = expand_normal (arg);
25245 gcc_assert (target == 0);
25246 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25254 || GET_MODE (target) != tmode
25255 || ! (*insn_p->operand[0].predicate) (target, tmode))
25256 target = gen_reg_rtx (tmode);
25259 for (i = 0; i < nargs; i++)
25261 enum machine_mode mode = insn_p->operand[i + 1].mode;
25264 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25265 op = expand_normal (arg);
25266 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25268 if (last_arg_constant && (i + 1) == nargs)
25274 error ("the last argument must be an 8-bit immediate");
25282 /* This must be the memory operand. */
25283 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25284 gcc_assert (GET_MODE (op) == mode
25285 || GET_MODE (op) == VOIDmode);
25289 /* This must be register. */
25290 if (VECTOR_MODE_P (mode))
25291 op = safe_vector_operand (op, mode);
25293 gcc_assert (GET_MODE (op) == mode
25294 || GET_MODE (op) == VOIDmode);
25295 op = copy_to_mode_reg (mode, op);
25300 args[i].mode = mode;
25306 pat = GEN_FCN (icode) (target, args[0].op);
25309 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25312 gcc_unreachable ();
25318 return klass == store ? 0 : target;
25321 /* Return the integer constant in ARG. Constrain it to be in the range
25322 of the subparts of VEC_TYPE; issue an error if not. */
25325 get_element_number (tree vec_type, tree arg)
25327 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25329 if (!host_integerp (arg, 1)
25330 || (elt = tree_low_cst (arg, 1), elt > max))
25332 error ("selector must be an integer constant in the range 0..%wi", max);
25339 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25340 ix86_expand_vector_init. We DO have language-level syntax for this, in
25341 the form of (type){ init-list }. Except that since we can't place emms
25342 instructions from inside the compiler, we can't allow the use of MMX
25343 registers unless the user explicitly asks for it. So we do *not* define
25344 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25345 we have builtins invoked by mmintrin.h that gives us license to emit
25346 these sorts of instructions. */
25349 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25351 enum machine_mode tmode = TYPE_MODE (type);
25352 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25353 int i, n_elt = GET_MODE_NUNITS (tmode);
25354 rtvec v = rtvec_alloc (n_elt);
25356 gcc_assert (VECTOR_MODE_P (tmode));
25357 gcc_assert (call_expr_nargs (exp) == n_elt);
25359 for (i = 0; i < n_elt; ++i)
25361 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25362 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25365 if (!target || !register_operand (target, tmode))
25366 target = gen_reg_rtx (tmode);
25368 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25372 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25373 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25374 had a language-level syntax for referencing vector elements. */
25377 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25379 enum machine_mode tmode, mode0;
25384 arg0 = CALL_EXPR_ARG (exp, 0);
25385 arg1 = CALL_EXPR_ARG (exp, 1);
25387 op0 = expand_normal (arg0);
25388 elt = get_element_number (TREE_TYPE (arg0), arg1);
25390 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25391 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25392 gcc_assert (VECTOR_MODE_P (mode0));
25394 op0 = force_reg (mode0, op0);
25396 if (optimize || !target || !register_operand (target, tmode))
25397 target = gen_reg_rtx (tmode);
25399 ix86_expand_vector_extract (true, target, op0, elt);
25404 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25405 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25406 a language-level syntax for referencing vector elements. */
25409 ix86_expand_vec_set_builtin (tree exp)
25411 enum machine_mode tmode, mode1;
25412 tree arg0, arg1, arg2;
25414 rtx op0, op1, target;
25416 arg0 = CALL_EXPR_ARG (exp, 0);
25417 arg1 = CALL_EXPR_ARG (exp, 1);
25418 arg2 = CALL_EXPR_ARG (exp, 2);
25420 tmode = TYPE_MODE (TREE_TYPE (arg0));
25421 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25422 gcc_assert (VECTOR_MODE_P (tmode));
25424 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25425 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25426 elt = get_element_number (TREE_TYPE (arg0), arg2);
25428 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25429 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25431 op0 = force_reg (tmode, op0);
25432 op1 = force_reg (mode1, op1);
25434 /* OP0 is the source of these builtin functions and shouldn't be
25435 modified. Create a copy, use it and return it as target. */
25436 target = gen_reg_rtx (tmode);
25437 emit_move_insn (target, op0);
25438 ix86_expand_vector_set (true, target, op1, elt);
25443 /* Expand an expression EXP that calls a built-in function,
25444 with result going to TARGET if that's convenient
25445 (and in mode MODE if that's convenient).
25446 SUBTARGET may be used as the target for computing one of EXP's operands.
25447 IGNORE is nonzero if the value is to be ignored. */
25450 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25451 enum machine_mode mode ATTRIBUTE_UNUSED,
25452 int ignore ATTRIBUTE_UNUSED)
25454 const struct builtin_description *d;
25456 enum insn_code icode;
25457 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25458 tree arg0, arg1, arg2;
25459 rtx op0, op1, op2, pat;
25460 enum machine_mode mode0, mode1, mode2;
25461 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25463 /* Determine whether the builtin function is available under the current ISA.
25464 Originally the builtin was not created if it wasn't applicable to the
25465 current ISA based on the command line switches. With function specific
25466 options, we need to check in the context of the function making the call
25467 whether it is supported. */
25468 if (ix86_builtins_isa[fcode].isa
25469 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25471 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25472 NULL, NULL, false);
25475 error ("%qE needs unknown isa option", fndecl);
25478 gcc_assert (opts != NULL);
25479 error ("%qE needs isa option %s", fndecl, opts);
25487 case IX86_BUILTIN_MASKMOVQ:
25488 case IX86_BUILTIN_MASKMOVDQU:
25489 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25490 ? CODE_FOR_mmx_maskmovq
25491 : CODE_FOR_sse2_maskmovdqu);
25492 /* Note the arg order is different from the operand order. */
25493 arg1 = CALL_EXPR_ARG (exp, 0);
25494 arg2 = CALL_EXPR_ARG (exp, 1);
25495 arg0 = CALL_EXPR_ARG (exp, 2);
25496 op0 = expand_normal (arg0);
25497 op1 = expand_normal (arg1);
25498 op2 = expand_normal (arg2);
25499 mode0 = insn_data[icode].operand[0].mode;
25500 mode1 = insn_data[icode].operand[1].mode;
25501 mode2 = insn_data[icode].operand[2].mode;
25503 op0 = force_reg (Pmode, op0);
25504 op0 = gen_rtx_MEM (mode1, op0);
25506 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25507 op0 = copy_to_mode_reg (mode0, op0);
25508 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25509 op1 = copy_to_mode_reg (mode1, op1);
25510 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25511 op2 = copy_to_mode_reg (mode2, op2);
25512 pat = GEN_FCN (icode) (op0, op1, op2);
25518 case IX86_BUILTIN_LDMXCSR:
25519 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25520 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25521 emit_move_insn (target, op0);
25522 emit_insn (gen_sse_ldmxcsr (target));
25525 case IX86_BUILTIN_STMXCSR:
25526 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25527 emit_insn (gen_sse_stmxcsr (target));
25528 return copy_to_mode_reg (SImode, target);
25530 case IX86_BUILTIN_CLFLUSH:
25531 arg0 = CALL_EXPR_ARG (exp, 0);
25532 op0 = expand_normal (arg0);
25533 icode = CODE_FOR_sse2_clflush;
25534 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25535 op0 = copy_to_mode_reg (Pmode, op0);
25537 emit_insn (gen_sse2_clflush (op0));
25540 case IX86_BUILTIN_MONITOR:
25541 arg0 = CALL_EXPR_ARG (exp, 0);
25542 arg1 = CALL_EXPR_ARG (exp, 1);
25543 arg2 = CALL_EXPR_ARG (exp, 2);
25544 op0 = expand_normal (arg0);
25545 op1 = expand_normal (arg1);
25546 op2 = expand_normal (arg2);
25548 op0 = copy_to_mode_reg (Pmode, op0);
25550 op1 = copy_to_mode_reg (SImode, op1);
25552 op2 = copy_to_mode_reg (SImode, op2);
25553 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25556 case IX86_BUILTIN_MWAIT:
25557 arg0 = CALL_EXPR_ARG (exp, 0);
25558 arg1 = CALL_EXPR_ARG (exp, 1);
25559 op0 = expand_normal (arg0);
25560 op1 = expand_normal (arg1);
25562 op0 = copy_to_mode_reg (SImode, op0);
25564 op1 = copy_to_mode_reg (SImode, op1);
25565 emit_insn (gen_sse3_mwait (op0, op1));
25568 case IX86_BUILTIN_VEC_INIT_V2SI:
25569 case IX86_BUILTIN_VEC_INIT_V4HI:
25570 case IX86_BUILTIN_VEC_INIT_V8QI:
25571 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25573 case IX86_BUILTIN_VEC_EXT_V2DF:
25574 case IX86_BUILTIN_VEC_EXT_V2DI:
25575 case IX86_BUILTIN_VEC_EXT_V4SF:
25576 case IX86_BUILTIN_VEC_EXT_V4SI:
25577 case IX86_BUILTIN_VEC_EXT_V8HI:
25578 case IX86_BUILTIN_VEC_EXT_V2SI:
25579 case IX86_BUILTIN_VEC_EXT_V4HI:
25580 case IX86_BUILTIN_VEC_EXT_V16QI:
25581 return ix86_expand_vec_ext_builtin (exp, target);
25583 case IX86_BUILTIN_VEC_SET_V2DI:
25584 case IX86_BUILTIN_VEC_SET_V4SF:
25585 case IX86_BUILTIN_VEC_SET_V4SI:
25586 case IX86_BUILTIN_VEC_SET_V8HI:
25587 case IX86_BUILTIN_VEC_SET_V4HI:
25588 case IX86_BUILTIN_VEC_SET_V16QI:
25589 return ix86_expand_vec_set_builtin (exp);
25591 case IX86_BUILTIN_INFQ:
25592 case IX86_BUILTIN_HUGE_VALQ:
25594 REAL_VALUE_TYPE inf;
25598 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25600 tmp = validize_mem (force_const_mem (mode, tmp));
25603 target = gen_reg_rtx (mode);
25605 emit_move_insn (target, tmp);
25613 for (i = 0, d = bdesc_special_args;
25614 i < ARRAY_SIZE (bdesc_special_args);
25616 if (d->code == fcode)
25617 return ix86_expand_special_args_builtin (d, exp, target);
25619 for (i = 0, d = bdesc_args;
25620 i < ARRAY_SIZE (bdesc_args);
25622 if (d->code == fcode)
25625 case IX86_BUILTIN_FABSQ:
25626 case IX86_BUILTIN_COPYSIGNQ:
25628 /* Emit a normal call if SSE2 isn't available. */
25629 return expand_call (exp, target, ignore);
25631 return ix86_expand_args_builtin (d, exp, target);
25634 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25635 if (d->code == fcode)
25636 return ix86_expand_sse_comi (d, exp, target);
25638 for (i = 0, d = bdesc_pcmpestr;
25639 i < ARRAY_SIZE (bdesc_pcmpestr);
25641 if (d->code == fcode)
25642 return ix86_expand_sse_pcmpestr (d, exp, target);
25644 for (i = 0, d = bdesc_pcmpistr;
25645 i < ARRAY_SIZE (bdesc_pcmpistr);
25647 if (d->code == fcode)
25648 return ix86_expand_sse_pcmpistr (d, exp, target);
25650 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25651 if (d->code == fcode)
25652 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25653 (enum multi_arg_type)d->flag,
25656 gcc_unreachable ();
25659 /* Returns a function decl for a vectorized version of the builtin function
25660 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25661 if it is not available. */
25664 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25667 enum machine_mode in_mode, out_mode;
25670 if (TREE_CODE (type_out) != VECTOR_TYPE
25671 || TREE_CODE (type_in) != VECTOR_TYPE)
25674 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25675 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25676 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25677 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25681 case BUILT_IN_SQRT:
25682 if (out_mode == DFmode && out_n == 2
25683 && in_mode == DFmode && in_n == 2)
25684 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25687 case BUILT_IN_SQRTF:
25688 if (out_mode == SFmode && out_n == 4
25689 && in_mode == SFmode && in_n == 4)
25690 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25693 case BUILT_IN_LRINT:
25694 if (out_mode == SImode && out_n == 4
25695 && in_mode == DFmode && in_n == 2)
25696 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25699 case BUILT_IN_LRINTF:
25700 if (out_mode == SImode && out_n == 4
25701 && in_mode == SFmode && in_n == 4)
25702 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25709 /* Dispatch to a handler for a vectorization library. */
25710 if (ix86_veclib_handler)
25711 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25717 /* Handler for an SVML-style interface to
25718 a library with vectorized intrinsics. */
25721 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25724 tree fntype, new_fndecl, args;
25727 enum machine_mode el_mode, in_mode;
25730 /* The SVML is suitable for unsafe math only. */
25731 if (!flag_unsafe_math_optimizations)
25734 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25735 n = TYPE_VECTOR_SUBPARTS (type_out);
25736 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25737 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25738 if (el_mode != in_mode
25746 case BUILT_IN_LOG10:
25748 case BUILT_IN_TANH:
25750 case BUILT_IN_ATAN:
25751 case BUILT_IN_ATAN2:
25752 case BUILT_IN_ATANH:
25753 case BUILT_IN_CBRT:
25754 case BUILT_IN_SINH:
25756 case BUILT_IN_ASINH:
25757 case BUILT_IN_ASIN:
25758 case BUILT_IN_COSH:
25760 case BUILT_IN_ACOSH:
25761 case BUILT_IN_ACOS:
25762 if (el_mode != DFmode || n != 2)
25766 case BUILT_IN_EXPF:
25767 case BUILT_IN_LOGF:
25768 case BUILT_IN_LOG10F:
25769 case BUILT_IN_POWF:
25770 case BUILT_IN_TANHF:
25771 case BUILT_IN_TANF:
25772 case BUILT_IN_ATANF:
25773 case BUILT_IN_ATAN2F:
25774 case BUILT_IN_ATANHF:
25775 case BUILT_IN_CBRTF:
25776 case BUILT_IN_SINHF:
25777 case BUILT_IN_SINF:
25778 case BUILT_IN_ASINHF:
25779 case BUILT_IN_ASINF:
25780 case BUILT_IN_COSHF:
25781 case BUILT_IN_COSF:
25782 case BUILT_IN_ACOSHF:
25783 case BUILT_IN_ACOSF:
25784 if (el_mode != SFmode || n != 4)
25792 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25794 if (fn == BUILT_IN_LOGF)
25795 strcpy (name, "vmlsLn4");
25796 else if (fn == BUILT_IN_LOG)
25797 strcpy (name, "vmldLn2");
25800 sprintf (name, "vmls%s", bname+10);
25801 name[strlen (name)-1] = '4';
25804 sprintf (name, "vmld%s2", bname+10);
25806 /* Convert to uppercase. */
25810 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25811 args = TREE_CHAIN (args))
25815 fntype = build_function_type_list (type_out, type_in, NULL);
25817 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25819 /* Build a function declaration for the vectorized function. */
25820 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25821 TREE_PUBLIC (new_fndecl) = 1;
25822 DECL_EXTERNAL (new_fndecl) = 1;
25823 DECL_IS_NOVOPS (new_fndecl) = 1;
25824 TREE_READONLY (new_fndecl) = 1;
25829 /* Handler for an ACML-style interface to
25830 a library with vectorized intrinsics. */
25833 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25835 char name[20] = "__vr.._";
25836 tree fntype, new_fndecl, args;
25839 enum machine_mode el_mode, in_mode;
25842 /* The ACML is 64bits only and suitable for unsafe math only as
25843 it does not correctly support parts of IEEE with the required
25844 precision such as denormals. */
25846 || !flag_unsafe_math_optimizations)
25849 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25850 n = TYPE_VECTOR_SUBPARTS (type_out);
25851 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25852 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25853 if (el_mode != in_mode
25863 case BUILT_IN_LOG2:
25864 case BUILT_IN_LOG10:
25867 if (el_mode != DFmode
25872 case BUILT_IN_SINF:
25873 case BUILT_IN_COSF:
25874 case BUILT_IN_EXPF:
25875 case BUILT_IN_POWF:
25876 case BUILT_IN_LOGF:
25877 case BUILT_IN_LOG2F:
25878 case BUILT_IN_LOG10F:
25881 if (el_mode != SFmode
25890 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25891 sprintf (name + 7, "%s", bname+10);
25894 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25895 args = TREE_CHAIN (args))
25899 fntype = build_function_type_list (type_out, type_in, NULL);
25901 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25903 /* Build a function declaration for the vectorized function. */
25904 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25905 TREE_PUBLIC (new_fndecl) = 1;
25906 DECL_EXTERNAL (new_fndecl) = 1;
25907 DECL_IS_NOVOPS (new_fndecl) = 1;
25908 TREE_READONLY (new_fndecl) = 1;
25914 /* Returns a decl of a function that implements conversion of an integer vector
25915 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25916 side of the conversion.
25917 Return NULL_TREE if it is not available. */
25920 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25922 if (TREE_CODE (type) != VECTOR_TYPE
25923 /* There are only conversions from/to signed integers. */
25924 || TYPE_UNSIGNED (TREE_TYPE (type)))
25930 switch (TYPE_MODE (type))
25933 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25938 case FIX_TRUNC_EXPR:
25939 switch (TYPE_MODE (type))
25942 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25952 /* Returns a code for a target-specific builtin that implements
25953 reciprocal of the function, or NULL_TREE if not available. */
25956 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25957 bool sqrt ATTRIBUTE_UNUSED)
25959 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25960 && flag_finite_math_only && !flag_trapping_math
25961 && flag_unsafe_math_optimizations))
25965 /* Machine dependent builtins. */
25968 /* Vectorized version of sqrt to rsqrt conversion. */
25969 case IX86_BUILTIN_SQRTPS_NR:
25970 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25976 /* Normal builtins. */
25979 /* Sqrt to rsqrt conversion. */
25980 case BUILT_IN_SQRTF:
25981 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25988 /* Store OPERAND to the memory after reload is completed. This means
25989 that we can't easily use assign_stack_local. */
25991 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25995 gcc_assert (reload_completed);
25996 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25998 result = gen_rtx_MEM (mode,
25999 gen_rtx_PLUS (Pmode,
26001 GEN_INT (-RED_ZONE_SIZE)));
26002 emit_move_insn (result, operand);
26004 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
26010 operand = gen_lowpart (DImode, operand);
26014 gen_rtx_SET (VOIDmode,
26015 gen_rtx_MEM (DImode,
26016 gen_rtx_PRE_DEC (DImode,
26017 stack_pointer_rtx)),
26021 gcc_unreachable ();
26023 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26032 split_di (&operand, 1, operands, operands + 1);
26034 gen_rtx_SET (VOIDmode,
26035 gen_rtx_MEM (SImode,
26036 gen_rtx_PRE_DEC (Pmode,
26037 stack_pointer_rtx)),
26040 gen_rtx_SET (VOIDmode,
26041 gen_rtx_MEM (SImode,
26042 gen_rtx_PRE_DEC (Pmode,
26043 stack_pointer_rtx)),
26048 /* Store HImodes as SImodes. */
26049 operand = gen_lowpart (SImode, operand);
26053 gen_rtx_SET (VOIDmode,
26054 gen_rtx_MEM (GET_MODE (operand),
26055 gen_rtx_PRE_DEC (SImode,
26056 stack_pointer_rtx)),
26060 gcc_unreachable ();
26062 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26067 /* Free operand from the memory. */
26069 ix86_free_from_memory (enum machine_mode mode)
26071 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
26075 if (mode == DImode || TARGET_64BIT)
26079 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26080 to pop or add instruction if registers are available. */
26081 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26082 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26087 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26088 QImode must go into class Q_REGS.
26089 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26090 movdf to do mem-to-mem moves through integer regs. */
26092 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26094 enum machine_mode mode = GET_MODE (x);
26096 /* We're only allowed to return a subclass of CLASS. Many of the
26097 following checks fail for NO_REGS, so eliminate that early. */
26098 if (regclass == NO_REGS)
26101 /* All classes can load zeros. */
26102 if (x == CONST0_RTX (mode))
26105 /* Force constants into memory if we are loading a (nonzero) constant into
26106 an MMX or SSE register. This is because there are no MMX/SSE instructions
26107 to load from a constant. */
26109 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26112 /* Prefer SSE regs only, if we can use them for math. */
26113 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26114 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26116 /* Floating-point constants need more complex checks. */
26117 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26119 /* General regs can load everything. */
26120 if (reg_class_subset_p (regclass, GENERAL_REGS))
26123 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26124 zero above. We only want to wind up preferring 80387 registers if
26125 we plan on doing computation with them. */
26127 && standard_80387_constant_p (x))
26129 /* Limit class to non-sse. */
26130 if (regclass == FLOAT_SSE_REGS)
26132 if (regclass == FP_TOP_SSE_REGS)
26134 if (regclass == FP_SECOND_SSE_REGS)
26135 return FP_SECOND_REG;
26136 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26143 /* Generally when we see PLUS here, it's the function invariant
26144 (plus soft-fp const_int). Which can only be computed into general
26146 if (GET_CODE (x) == PLUS)
26147 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26149 /* QImode constants are easy to load, but non-constant QImode data
26150 must go into Q_REGS. */
26151 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26153 if (reg_class_subset_p (regclass, Q_REGS))
26155 if (reg_class_subset_p (Q_REGS, regclass))
26163 /* Discourage putting floating-point values in SSE registers unless
26164 SSE math is being used, and likewise for the 387 registers. */
26166 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26168 enum machine_mode mode = GET_MODE (x);
26170 /* Restrict the output reload class to the register bank that we are doing
26171 math on. If we would like not to return a subset of CLASS, reject this
26172 alternative: if reload cannot do this, it will still use its choice. */
26173 mode = GET_MODE (x);
26174 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26175 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26177 if (X87_FLOAT_MODE_P (mode))
26179 if (regclass == FP_TOP_SSE_REGS)
26181 else if (regclass == FP_SECOND_SSE_REGS)
26182 return FP_SECOND_REG;
26184 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26190 static enum reg_class
26191 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26192 enum machine_mode mode,
26193 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26195 /* QImode spills from non-QI registers require
26196 intermediate register on 32bit targets. */
26197 if (!in_p && mode == QImode && !TARGET_64BIT
26198 && (rclass == GENERAL_REGS
26199 || rclass == LEGACY_REGS
26200 || rclass == INDEX_REGS))
26209 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26210 regno = true_regnum (x);
26212 /* Return Q_REGS if the operand is in memory. */
26220 /* If we are copying between general and FP registers, we need a memory
26221 location. The same is true for SSE and MMX registers.
26223 To optimize register_move_cost performance, allow inline variant.
26225 The macro can't work reliably when one of the CLASSES is class containing
26226 registers from multiple units (SSE, MMX, integer). We avoid this by never
26227 combining those units in single alternative in the machine description.
26228 Ensure that this constraint holds to avoid unexpected surprises.
26230 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26231 enforce these sanity checks. */
26234 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26235 enum machine_mode mode, int strict)
26237 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26238 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26239 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26240 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26241 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26242 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26244 gcc_assert (!strict);
26248 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26251 /* ??? This is a lie. We do have moves between mmx/general, and for
26252 mmx/sse2. But by saying we need secondary memory we discourage the
26253 register allocator from using the mmx registers unless needed. */
26254 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26257 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26259 /* SSE1 doesn't have any direct moves from other classes. */
26263 /* If the target says that inter-unit moves are more expensive
26264 than moving through memory, then don't generate them. */
26265 if (!TARGET_INTER_UNIT_MOVES)
26268 /* Between SSE and general, we have moves no larger than word size. */
26269 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26277 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26278 enum machine_mode mode, int strict)
26280 return inline_secondary_memory_needed (class1, class2, mode, strict);
26283 /* Return true if the registers in CLASS cannot represent the change from
26284 modes FROM to TO. */
26287 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26288 enum reg_class regclass)
26293 /* x87 registers can't do subreg at all, as all values are reformatted
26294 to extended precision. */
26295 if (MAYBE_FLOAT_CLASS_P (regclass))
26298 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26300 /* Vector registers do not support QI or HImode loads. If we don't
26301 disallow a change to these modes, reload will assume it's ok to
26302 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26303 the vec_dupv4hi pattern. */
26304 if (GET_MODE_SIZE (from) < 4)
26307 /* Vector registers do not support subreg with nonzero offsets, which
26308 are otherwise valid for integer registers. Since we can't see
26309 whether we have a nonzero offset from here, prohibit all
26310 nonparadoxical subregs changing size. */
26311 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26318 /* Return the cost of moving data of mode M between a
26319 register and memory. A value of 2 is the default; this cost is
26320 relative to those in `REGISTER_MOVE_COST'.
26322 This function is used extensively by register_move_cost that is used to
26323 build tables at startup. Make it inline in this case.
26324 When IN is 2, return maximum of in and out move cost.
26326 If moving between registers and memory is more expensive than
26327 between two registers, you should define this macro to express the
26330 Model also increased moving costs of QImode registers in non
26334 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26338 if (FLOAT_CLASS_P (regclass))
26356 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26357 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26359 if (SSE_CLASS_P (regclass))
26362 switch (GET_MODE_SIZE (mode))
26377 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26378 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26380 if (MMX_CLASS_P (regclass))
26383 switch (GET_MODE_SIZE (mode))
26395 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26396 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26398 switch (GET_MODE_SIZE (mode))
26401 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26404 return ix86_cost->int_store[0];
26405 if (TARGET_PARTIAL_REG_DEPENDENCY
26406 && optimize_function_for_speed_p (cfun))
26407 cost = ix86_cost->movzbl_load;
26409 cost = ix86_cost->int_load[0];
26411 return MAX (cost, ix86_cost->int_store[0]);
26417 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26419 return ix86_cost->movzbl_load;
26421 return ix86_cost->int_store[0] + 4;
26426 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26427 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26429 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26430 if (mode == TFmode)
26433 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26435 cost = ix86_cost->int_load[2];
26437 cost = ix86_cost->int_store[2];
26438 return (cost * (((int) GET_MODE_SIZE (mode)
26439 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26444 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26446 return inline_memory_move_cost (mode, regclass, in);
26450 /* Return the cost of moving data from a register in class CLASS1 to
26451 one in class CLASS2.
26453 It is not required that the cost always equal 2 when FROM is the same as TO;
26454 on some machines it is expensive to move between registers if they are not
26455 general registers. */
26458 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26459 enum reg_class class2)
26461 /* In case we require secondary memory, compute cost of the store followed
26462 by load. In order to avoid bad register allocation choices, we need
26463 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26465 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26469 cost += inline_memory_move_cost (mode, class1, 2);
26470 cost += inline_memory_move_cost (mode, class2, 2);
26472 /* In case of copying from general_purpose_register we may emit multiple
26473 stores followed by single load causing memory size mismatch stall.
26474 Count this as arbitrarily high cost of 20. */
26475 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26478 /* In the case of FP/MMX moves, the registers actually overlap, and we
26479 have to switch modes in order to treat them differently. */
26480 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26481 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26487 /* Moves between SSE/MMX and integer unit are expensive. */
26488 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26489 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26491 /* ??? By keeping returned value relatively high, we limit the number
26492 of moves between integer and MMX/SSE registers for all targets.
26493 Additionally, high value prevents problem with x86_modes_tieable_p(),
26494 where integer modes in MMX/SSE registers are not tieable
26495 because of missing QImode and HImode moves to, from or between
26496 MMX/SSE registers. */
26497 return MAX (8, ix86_cost->mmxsse_to_integer);
26499 if (MAYBE_FLOAT_CLASS_P (class1))
26500 return ix86_cost->fp_move;
26501 if (MAYBE_SSE_CLASS_P (class1))
26502 return ix86_cost->sse_move;
26503 if (MAYBE_MMX_CLASS_P (class1))
26504 return ix86_cost->mmx_move;
26508 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26511 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26513 /* Flags and only flags can only hold CCmode values. */
26514 if (CC_REGNO_P (regno))
26515 return GET_MODE_CLASS (mode) == MODE_CC;
26516 if (GET_MODE_CLASS (mode) == MODE_CC
26517 || GET_MODE_CLASS (mode) == MODE_RANDOM
26518 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26520 if (FP_REGNO_P (regno))
26521 return VALID_FP_MODE_P (mode);
26522 if (SSE_REGNO_P (regno))
26524 /* We implement the move patterns for all vector modes into and
26525 out of SSE registers, even when no operation instructions
26526 are available. OImode move is available only when AVX is
26528 return ((TARGET_AVX && mode == OImode)
26529 || VALID_AVX256_REG_MODE (mode)
26530 || VALID_SSE_REG_MODE (mode)
26531 || VALID_SSE2_REG_MODE (mode)
26532 || VALID_MMX_REG_MODE (mode)
26533 || VALID_MMX_REG_MODE_3DNOW (mode));
26535 if (MMX_REGNO_P (regno))
26537 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26538 so if the register is available at all, then we can move data of
26539 the given mode into or out of it. */
26540 return (VALID_MMX_REG_MODE (mode)
26541 || VALID_MMX_REG_MODE_3DNOW (mode));
26544 if (mode == QImode)
26546 /* Take care for QImode values - they can be in non-QI regs,
26547 but then they do cause partial register stalls. */
26548 if (regno <= BX_REG || TARGET_64BIT)
26550 if (!TARGET_PARTIAL_REG_STALL)
26552 return reload_in_progress || reload_completed;
26554 /* We handle both integer and floats in the general purpose registers. */
26555 else if (VALID_INT_MODE_P (mode))
26557 else if (VALID_FP_MODE_P (mode))
26559 else if (VALID_DFP_MODE_P (mode))
26561 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26562 on to use that value in smaller contexts, this can easily force a
26563 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26564 supporting DImode, allow it. */
26565 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26571 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26572 tieable integer mode. */
26575 ix86_tieable_integer_mode_p (enum machine_mode mode)
26584 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26587 return TARGET_64BIT;
26594 /* Return true if MODE1 is accessible in a register that can hold MODE2
26595 without copying. That is, all register classes that can hold MODE2
26596 can also hold MODE1. */
26599 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26601 if (mode1 == mode2)
26604 if (ix86_tieable_integer_mode_p (mode1)
26605 && ix86_tieable_integer_mode_p (mode2))
26608 /* MODE2 being XFmode implies fp stack or general regs, which means we
26609 can tie any smaller floating point modes to it. Note that we do not
26610 tie this with TFmode. */
26611 if (mode2 == XFmode)
26612 return mode1 == SFmode || mode1 == DFmode;
26614 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26615 that we can tie it with SFmode. */
26616 if (mode2 == DFmode)
26617 return mode1 == SFmode;
26619 /* If MODE2 is only appropriate for an SSE register, then tie with
26620 any other mode acceptable to SSE registers. */
26621 if (GET_MODE_SIZE (mode2) == 16
26622 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26623 return (GET_MODE_SIZE (mode1) == 16
26624 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26626 /* If MODE2 is appropriate for an MMX register, then tie
26627 with any other mode acceptable to MMX registers. */
26628 if (GET_MODE_SIZE (mode2) == 8
26629 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26630 return (GET_MODE_SIZE (mode1) == 8
26631 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26636 /* Compute a (partial) cost for rtx X. Return true if the complete
26637 cost has been computed, and false if subexpressions should be
26638 scanned. In either case, *TOTAL contains the cost result. */
26641 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26643 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26644 enum machine_mode mode = GET_MODE (x);
26645 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26653 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26655 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26657 else if (flag_pic && SYMBOLIC_CONST (x)
26659 || (!GET_CODE (x) != LABEL_REF
26660 && (GET_CODE (x) != SYMBOL_REF
26661 || !SYMBOL_REF_LOCAL_P (x)))))
26668 if (mode == VOIDmode)
26671 switch (standard_80387_constant_p (x))
26676 default: /* Other constants */
26681 /* Start with (MEM (SYMBOL_REF)), since that's where
26682 it'll probably end up. Add a penalty for size. */
26683 *total = (COSTS_N_INSNS (1)
26684 + (flag_pic != 0 && !TARGET_64BIT)
26685 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26691 /* The zero extensions is often completely free on x86_64, so make
26692 it as cheap as possible. */
26693 if (TARGET_64BIT && mode == DImode
26694 && GET_MODE (XEXP (x, 0)) == SImode)
26696 else if (TARGET_ZERO_EXTEND_WITH_AND)
26697 *total = cost->add;
26699 *total = cost->movzx;
26703 *total = cost->movsx;
26707 if (CONST_INT_P (XEXP (x, 1))
26708 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26710 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26713 *total = cost->add;
26716 if ((value == 2 || value == 3)
26717 && cost->lea <= cost->shift_const)
26719 *total = cost->lea;
26729 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26731 if (CONST_INT_P (XEXP (x, 1)))
26733 if (INTVAL (XEXP (x, 1)) > 32)
26734 *total = cost->shift_const + COSTS_N_INSNS (2);
26736 *total = cost->shift_const * 2;
26740 if (GET_CODE (XEXP (x, 1)) == AND)
26741 *total = cost->shift_var * 2;
26743 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26748 if (CONST_INT_P (XEXP (x, 1)))
26749 *total = cost->shift_const;
26751 *total = cost->shift_var;
26756 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26758 /* ??? SSE scalar cost should be used here. */
26759 *total = cost->fmul;
26762 else if (X87_FLOAT_MODE_P (mode))
26764 *total = cost->fmul;
26767 else if (FLOAT_MODE_P (mode))
26769 /* ??? SSE vector cost should be used here. */
26770 *total = cost->fmul;
26775 rtx op0 = XEXP (x, 0);
26776 rtx op1 = XEXP (x, 1);
26778 if (CONST_INT_P (XEXP (x, 1)))
26780 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26781 for (nbits = 0; value != 0; value &= value - 1)
26785 /* This is arbitrary. */
26788 /* Compute costs correctly for widening multiplication. */
26789 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26790 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26791 == GET_MODE_SIZE (mode))
26793 int is_mulwiden = 0;
26794 enum machine_mode inner_mode = GET_MODE (op0);
26796 if (GET_CODE (op0) == GET_CODE (op1))
26797 is_mulwiden = 1, op1 = XEXP (op1, 0);
26798 else if (CONST_INT_P (op1))
26800 if (GET_CODE (op0) == SIGN_EXTEND)
26801 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26804 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26808 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26811 *total = (cost->mult_init[MODE_INDEX (mode)]
26812 + nbits * cost->mult_bit
26813 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26822 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26823 /* ??? SSE cost should be used here. */
26824 *total = cost->fdiv;
26825 else if (X87_FLOAT_MODE_P (mode))
26826 *total = cost->fdiv;
26827 else if (FLOAT_MODE_P (mode))
26828 /* ??? SSE vector cost should be used here. */
26829 *total = cost->fdiv;
26831 *total = cost->divide[MODE_INDEX (mode)];
26835 if (GET_MODE_CLASS (mode) == MODE_INT
26836 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26838 if (GET_CODE (XEXP (x, 0)) == PLUS
26839 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26840 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26841 && CONSTANT_P (XEXP (x, 1)))
26843 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26844 if (val == 2 || val == 4 || val == 8)
26846 *total = cost->lea;
26847 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26848 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26849 outer_code, speed);
26850 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26854 else if (GET_CODE (XEXP (x, 0)) == MULT
26855 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26857 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26858 if (val == 2 || val == 4 || val == 8)
26860 *total = cost->lea;
26861 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26862 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26866 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26868 *total = cost->lea;
26869 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26870 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26871 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26878 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26880 /* ??? SSE cost should be used here. */
26881 *total = cost->fadd;
26884 else if (X87_FLOAT_MODE_P (mode))
26886 *total = cost->fadd;
26889 else if (FLOAT_MODE_P (mode))
26891 /* ??? SSE vector cost should be used here. */
26892 *total = cost->fadd;
26900 if (!TARGET_64BIT && mode == DImode)
26902 *total = (cost->add * 2
26903 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26904 << (GET_MODE (XEXP (x, 0)) != DImode))
26905 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26906 << (GET_MODE (XEXP (x, 1)) != DImode)));
26912 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26914 /* ??? SSE cost should be used here. */
26915 *total = cost->fchs;
26918 else if (X87_FLOAT_MODE_P (mode))
26920 *total = cost->fchs;
26923 else if (FLOAT_MODE_P (mode))
26925 /* ??? SSE vector cost should be used here. */
26926 *total = cost->fchs;
26932 if (!TARGET_64BIT && mode == DImode)
26933 *total = cost->add * 2;
26935 *total = cost->add;
26939 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26940 && XEXP (XEXP (x, 0), 1) == const1_rtx
26941 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26942 && XEXP (x, 1) == const0_rtx)
26944 /* This kind of construct is implemented using test[bwl].
26945 Treat it as if we had an AND. */
26946 *total = (cost->add
26947 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26948 + rtx_cost (const1_rtx, outer_code, speed));
26954 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26959 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26960 /* ??? SSE cost should be used here. */
26961 *total = cost->fabs;
26962 else if (X87_FLOAT_MODE_P (mode))
26963 *total = cost->fabs;
26964 else if (FLOAT_MODE_P (mode))
26965 /* ??? SSE vector cost should be used here. */
26966 *total = cost->fabs;
26970 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26971 /* ??? SSE cost should be used here. */
26972 *total = cost->fsqrt;
26973 else if (X87_FLOAT_MODE_P (mode))
26974 *total = cost->fsqrt;
26975 else if (FLOAT_MODE_P (mode))
26976 /* ??? SSE vector cost should be used here. */
26977 *total = cost->fsqrt;
26981 if (XINT (x, 1) == UNSPEC_TP)
26992 static int current_machopic_label_num;
26994 /* Given a symbol name and its associated stub, write out the
26995 definition of the stub. */
26998 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27000 unsigned int length;
27001 char *binder_name, *symbol_name, lazy_ptr_name[32];
27002 int label = ++current_machopic_label_num;
27004 /* For 64-bit we shouldn't get here. */
27005 gcc_assert (!TARGET_64BIT);
27007 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27008 symb = (*targetm.strip_name_encoding) (symb);
27010 length = strlen (stub);
27011 binder_name = XALLOCAVEC (char, length + 32);
27012 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27014 length = strlen (symb);
27015 symbol_name = XALLOCAVEC (char, length + 32);
27016 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27018 sprintf (lazy_ptr_name, "L%d$lz", label);
27021 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27023 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27025 fprintf (file, "%s:\n", stub);
27026 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27030 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27031 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27032 fprintf (file, "\tjmp\t*%%edx\n");
27035 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27037 fprintf (file, "%s:\n", binder_name);
27041 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27042 fprintf (file, "\tpushl\t%%eax\n");
27045 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27047 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
27049 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27050 fprintf (file, "%s:\n", lazy_ptr_name);
27051 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27052 fprintf (file, "\t.long %s\n", binder_name);
27056 darwin_x86_file_end (void)
27058 darwin_file_end ();
27061 #endif /* TARGET_MACHO */
27063 /* Order the registers for register allocator. */
27066 x86_order_regs_for_local_alloc (void)
27071 /* First allocate the local general purpose registers. */
27072 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27073 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27074 reg_alloc_order [pos++] = i;
27076 /* Global general purpose registers. */
27077 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27078 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27079 reg_alloc_order [pos++] = i;
27081 /* x87 registers come first in case we are doing FP math
27083 if (!TARGET_SSE_MATH)
27084 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27085 reg_alloc_order [pos++] = i;
27087 /* SSE registers. */
27088 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27089 reg_alloc_order [pos++] = i;
27090 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27091 reg_alloc_order [pos++] = i;
27093 /* x87 registers. */
27094 if (TARGET_SSE_MATH)
27095 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27096 reg_alloc_order [pos++] = i;
27098 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27099 reg_alloc_order [pos++] = i;
27101 /* Initialize the rest of array as we do not allocate some registers
27103 while (pos < FIRST_PSEUDO_REGISTER)
27104 reg_alloc_order [pos++] = 0;
27107 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27108 struct attribute_spec.handler. */
27110 ix86_handle_abi_attribute (tree *node, tree name,
27111 tree args ATTRIBUTE_UNUSED,
27112 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27114 if (TREE_CODE (*node) != FUNCTION_TYPE
27115 && TREE_CODE (*node) != METHOD_TYPE
27116 && TREE_CODE (*node) != FIELD_DECL
27117 && TREE_CODE (*node) != TYPE_DECL)
27119 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27121 *no_add_attrs = true;
27126 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27128 *no_add_attrs = true;
27132 /* Can combine regparm with all attributes but fastcall. */
27133 if (is_attribute_p ("ms_abi", name))
27135 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27137 error ("ms_abi and sysv_abi attributes are not compatible");
27142 else if (is_attribute_p ("sysv_abi", name))
27144 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27146 error ("ms_abi and sysv_abi attributes are not compatible");
27155 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27156 struct attribute_spec.handler. */
27158 ix86_handle_struct_attribute (tree *node, tree name,
27159 tree args ATTRIBUTE_UNUSED,
27160 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27163 if (DECL_P (*node))
27165 if (TREE_CODE (*node) == TYPE_DECL)
27166 type = &TREE_TYPE (*node);
27171 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27172 || TREE_CODE (*type) == UNION_TYPE)))
27174 warning (OPT_Wattributes, "%qE attribute ignored",
27176 *no_add_attrs = true;
27179 else if ((is_attribute_p ("ms_struct", name)
27180 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27181 || ((is_attribute_p ("gcc_struct", name)
27182 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27184 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27186 *no_add_attrs = true;
27193 ix86_ms_bitfield_layout_p (const_tree record_type)
27195 return (TARGET_MS_BITFIELD_LAYOUT &&
27196 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27197 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27200 /* Returns an expression indicating where the this parameter is
27201 located on entry to the FUNCTION. */
27204 x86_this_parameter (tree function)
27206 tree type = TREE_TYPE (function);
27207 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27212 const int *parm_regs;
27214 if (ix86_function_type_abi (type) == MS_ABI)
27215 parm_regs = x86_64_ms_abi_int_parameter_registers;
27217 parm_regs = x86_64_int_parameter_registers;
27218 return gen_rtx_REG (DImode, parm_regs[aggr]);
27221 nregs = ix86_function_regparm (type, function);
27223 if (nregs > 0 && !stdarg_p (type))
27227 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27228 regno = aggr ? DX_REG : CX_REG;
27236 return gen_rtx_MEM (SImode,
27237 plus_constant (stack_pointer_rtx, 4));
27240 return gen_rtx_REG (SImode, regno);
27243 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27246 /* Determine whether x86_output_mi_thunk can succeed. */
27249 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27250 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27251 HOST_WIDE_INT vcall_offset, const_tree function)
27253 /* 64-bit can handle anything. */
27257 /* For 32-bit, everything's fine if we have one free register. */
27258 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27261 /* Need a free register for vcall_offset. */
27265 /* Need a free register for GOT references. */
27266 if (flag_pic && !(*targetm.binds_local_p) (function))
27269 /* Otherwise ok. */
27273 /* Output the assembler code for a thunk function. THUNK_DECL is the
27274 declaration for the thunk function itself, FUNCTION is the decl for
27275 the target function. DELTA is an immediate constant offset to be
27276 added to THIS. If VCALL_OFFSET is nonzero, the word at
27277 *(*this + vcall_offset) should be added to THIS. */
27280 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27281 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27282 HOST_WIDE_INT vcall_offset, tree function)
27285 rtx this_param = x86_this_parameter (function);
27288 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27289 pull it in now and let DELTA benefit. */
27290 if (REG_P (this_param))
27291 this_reg = this_param;
27292 else if (vcall_offset)
27294 /* Put the this parameter into %eax. */
27295 xops[0] = this_param;
27296 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27297 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27300 this_reg = NULL_RTX;
27302 /* Adjust the this parameter by a fixed constant. */
27305 xops[0] = GEN_INT (delta);
27306 xops[1] = this_reg ? this_reg : this_param;
27309 if (!x86_64_general_operand (xops[0], DImode))
27311 tmp = gen_rtx_REG (DImode, R10_REG);
27313 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27315 xops[1] = this_param;
27317 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27320 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27323 /* Adjust the this parameter by a value stored in the vtable. */
27327 tmp = gen_rtx_REG (DImode, R10_REG);
27330 int tmp_regno = CX_REG;
27331 if (lookup_attribute ("fastcall",
27332 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27333 tmp_regno = AX_REG;
27334 tmp = gen_rtx_REG (SImode, tmp_regno);
27337 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27339 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27341 /* Adjust the this parameter. */
27342 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27343 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27345 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27346 xops[0] = GEN_INT (vcall_offset);
27348 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27349 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27351 xops[1] = this_reg;
27352 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27355 /* If necessary, drop THIS back to its stack slot. */
27356 if (this_reg && this_reg != this_param)
27358 xops[0] = this_reg;
27359 xops[1] = this_param;
27360 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27363 xops[0] = XEXP (DECL_RTL (function), 0);
27366 if (!flag_pic || (*targetm.binds_local_p) (function))
27367 output_asm_insn ("jmp\t%P0", xops);
27368 /* All thunks should be in the same object as their target,
27369 and thus binds_local_p should be true. */
27370 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27371 gcc_unreachable ();
27374 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27375 tmp = gen_rtx_CONST (Pmode, tmp);
27376 tmp = gen_rtx_MEM (QImode, tmp);
27378 output_asm_insn ("jmp\t%A0", xops);
27383 if (!flag_pic || (*targetm.binds_local_p) (function))
27384 output_asm_insn ("jmp\t%P0", xops);
27389 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27390 tmp = (gen_rtx_SYMBOL_REF
27392 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27393 tmp = gen_rtx_MEM (QImode, tmp);
27395 output_asm_insn ("jmp\t%0", xops);
27398 #endif /* TARGET_MACHO */
27400 tmp = gen_rtx_REG (SImode, CX_REG);
27401 output_set_got (tmp, NULL_RTX);
27404 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27405 output_asm_insn ("jmp\t{*}%1", xops);
27411 x86_file_start (void)
27413 default_file_start ();
27415 darwin_file_start ();
27417 if (X86_FILE_START_VERSION_DIRECTIVE)
27418 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27419 if (X86_FILE_START_FLTUSED)
27420 fputs ("\t.global\t__fltused\n", asm_out_file);
27421 if (ix86_asm_dialect == ASM_INTEL)
27422 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27426 x86_field_alignment (tree field, int computed)
27428 enum machine_mode mode;
27429 tree type = TREE_TYPE (field);
27431 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27433 mode = TYPE_MODE (strip_array_types (type));
27434 if (mode == DFmode || mode == DCmode
27435 || GET_MODE_CLASS (mode) == MODE_INT
27436 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27437 return MIN (32, computed);
27441 /* Output assembler code to FILE to increment profiler label # LABELNO
27442 for profiling a function entry. */
27444 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27448 #ifndef NO_PROFILE_COUNTERS
27449 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27452 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27453 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27455 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27459 #ifndef NO_PROFILE_COUNTERS
27460 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27461 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27463 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27467 #ifndef NO_PROFILE_COUNTERS
27468 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27469 PROFILE_COUNT_REGISTER);
27471 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27475 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27476 /* We don't have exact information about the insn sizes, but we may assume
27477 quite safely that we are informed about all 1 byte insns and memory
27478 address sizes. This is enough to eliminate unnecessary padding in
27482 min_insn_size (rtx insn)
27486 if (!INSN_P (insn) || !active_insn_p (insn))
27489 /* Discard alignments we've emit and jump instructions. */
27490 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27491 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27493 if (JUMP_TABLE_DATA_P(insn))
27496 /* Important case - calls are always 5 bytes.
27497 It is common to have many calls in the row. */
27499 && symbolic_reference_mentioned_p (PATTERN (insn))
27500 && !SIBLING_CALL_P (insn))
27502 if (get_attr_length (insn) <= 1)
27505 /* For normal instructions we may rely on the sizes of addresses
27506 and the presence of symbol to require 4 bytes of encoding.
27507 This is not the case for jumps where references are PC relative. */
27508 if (!JUMP_P (insn))
27510 l = get_attr_length_address (insn);
27511 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27520 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27524 ix86_avoid_jump_mispredicts (void)
27526 rtx insn, start = get_insns ();
27527 int nbytes = 0, njumps = 0;
27530 /* Look for all minimal intervals of instructions containing 4 jumps.
27531 The intervals are bounded by START and INSN. NBYTES is the total
27532 size of instructions in the interval including INSN and not including
27533 START. When the NBYTES is smaller than 16 bytes, it is possible
27534 that the end of START and INSN ends up in the same 16byte page.
27536 The smallest offset in the page INSN can start is the case where START
27537 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27538 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27540 for (insn = start; insn; insn = NEXT_INSN (insn))
27544 if (LABEL_P (insn))
27546 int align = label_to_alignment (insn);
27547 int max_skip = label_to_max_skip (insn);
27551 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27552 already in the current 16 byte page, because otherwise
27553 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27554 bytes to reach 16 byte boundary. */
27556 || (align <= 3 && max_skip != (1 << align) - 1))
27559 fprintf (dump_file, "Label %i with max_skip %i\n",
27560 INSN_UID (insn), max_skip);
27563 while (nbytes + max_skip >= 16)
27565 start = NEXT_INSN (start);
27566 if ((JUMP_P (start)
27567 && GET_CODE (PATTERN (start)) != ADDR_VEC
27568 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27570 njumps--, isjump = 1;
27573 nbytes -= min_insn_size (start);
27579 min_size = min_insn_size (insn);
27580 nbytes += min_size;
27582 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27583 INSN_UID (insn), min_size);
27585 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27586 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27594 start = NEXT_INSN (start);
27595 if ((JUMP_P (start)
27596 && GET_CODE (PATTERN (start)) != ADDR_VEC
27597 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27599 njumps--, isjump = 1;
27602 nbytes -= min_insn_size (start);
27604 gcc_assert (njumps >= 0);
27606 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27607 INSN_UID (start), INSN_UID (insn), nbytes);
27609 if (njumps == 3 && isjump && nbytes < 16)
27611 int padsize = 15 - nbytes + min_insn_size (insn);
27614 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27615 INSN_UID (insn), padsize);
27616 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27622 /* AMD Athlon works faster
27623 when RET is not destination of conditional jump or directly preceded
27624 by other jump instruction. We avoid the penalty by inserting NOP just
27625 before the RET instructions in such cases. */
27627 ix86_pad_returns (void)
27632 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27634 basic_block bb = e->src;
27635 rtx ret = BB_END (bb);
27637 bool replace = false;
27639 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27640 || optimize_bb_for_size_p (bb))
27642 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27643 if (active_insn_p (prev) || LABEL_P (prev))
27645 if (prev && LABEL_P (prev))
27650 FOR_EACH_EDGE (e, ei, bb->preds)
27651 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27652 && !(e->flags & EDGE_FALLTHRU))
27657 prev = prev_active_insn (ret);
27659 && ((JUMP_P (prev) && any_condjump_p (prev))
27662 /* Empty functions get branch mispredict even when the jump destination
27663 is not visible to us. */
27664 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27669 emit_insn_before (gen_return_internal_long (), ret);
27675 /* Implement machine specific optimizations. We implement padding of returns
27676 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27680 if (optimize && optimize_function_for_speed_p (cfun))
27682 if (TARGET_PAD_RETURNS)
27683 ix86_pad_returns ();
27684 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27685 if (TARGET_FOUR_JUMP_LIMIT)
27686 ix86_avoid_jump_mispredicts ();
27691 /* Return nonzero when QImode register that must be represented via REX prefix
27694 x86_extended_QIreg_mentioned_p (rtx insn)
27697 extract_insn_cached (insn);
27698 for (i = 0; i < recog_data.n_operands; i++)
27699 if (REG_P (recog_data.operand[i])
27700 && REGNO (recog_data.operand[i]) > BX_REG)
27705 /* Return nonzero when P points to register encoded via REX prefix.
27706 Called via for_each_rtx. */
27708 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27710 unsigned int regno;
27713 regno = REGNO (*p);
27714 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27717 /* Return true when INSN mentions register that must be encoded using REX
27720 x86_extended_reg_mentioned_p (rtx insn)
27722 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27723 extended_reg_mentioned_1, NULL);
27726 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27727 optabs would emit if we didn't have TFmode patterns. */
27730 x86_emit_floatuns (rtx operands[2])
27732 rtx neglab, donelab, i0, i1, f0, in, out;
27733 enum machine_mode mode, inmode;
27735 inmode = GET_MODE (operands[1]);
27736 gcc_assert (inmode == SImode || inmode == DImode);
27739 in = force_reg (inmode, operands[1]);
27740 mode = GET_MODE (out);
27741 neglab = gen_label_rtx ();
27742 donelab = gen_label_rtx ();
27743 f0 = gen_reg_rtx (mode);
27745 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27747 expand_float (out, in, 0);
27749 emit_jump_insn (gen_jump (donelab));
27752 emit_label (neglab);
27754 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27756 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27758 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27760 expand_float (f0, i0, 0);
27762 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27764 emit_label (donelab);
27767 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27768 with all elements equal to VAR. Return true if successful. */
27771 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27772 rtx target, rtx val)
27774 enum machine_mode hmode, smode, wsmode, wvmode;
27789 val = force_reg (GET_MODE_INNER (mode), val);
27790 x = gen_rtx_VEC_DUPLICATE (mode, val);
27791 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27797 if (TARGET_SSE || TARGET_3DNOW_A)
27799 val = gen_lowpart (SImode, val);
27800 x = gen_rtx_TRUNCATE (HImode, val);
27801 x = gen_rtx_VEC_DUPLICATE (mode, x);
27802 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27824 /* Extend HImode to SImode using a paradoxical SUBREG. */
27825 tmp1 = gen_reg_rtx (SImode);
27826 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27827 /* Insert the SImode value as low element of V4SImode vector. */
27828 tmp2 = gen_reg_rtx (V4SImode);
27829 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27830 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27831 CONST0_RTX (V4SImode),
27833 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27834 /* Cast the V4SImode vector back to a V8HImode vector. */
27835 tmp1 = gen_reg_rtx (V8HImode);
27836 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27837 /* Duplicate the low short through the whole low SImode word. */
27838 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27839 /* Cast the V8HImode vector back to a V4SImode vector. */
27840 tmp2 = gen_reg_rtx (V4SImode);
27841 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27842 /* Replicate the low element of the V4SImode vector. */
27843 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27844 /* Cast the V2SImode back to V8HImode, and store in target. */
27845 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27856 /* Extend QImode to SImode using a paradoxical SUBREG. */
27857 tmp1 = gen_reg_rtx (SImode);
27858 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27859 /* Insert the SImode value as low element of V4SImode vector. */
27860 tmp2 = gen_reg_rtx (V4SImode);
27861 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27862 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27863 CONST0_RTX (V4SImode),
27865 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27866 /* Cast the V4SImode vector back to a V16QImode vector. */
27867 tmp1 = gen_reg_rtx (V16QImode);
27868 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27869 /* Duplicate the low byte through the whole low SImode word. */
27870 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27871 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27872 /* Cast the V16QImode vector back to a V4SImode vector. */
27873 tmp2 = gen_reg_rtx (V4SImode);
27874 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27875 /* Replicate the low element of the V4SImode vector. */
27876 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27877 /* Cast the V2SImode back to V16QImode, and store in target. */
27878 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27886 /* Replicate the value once into the next wider mode and recurse. */
27887 val = convert_modes (wsmode, smode, val, true);
27888 x = expand_simple_binop (wsmode, ASHIFT, val,
27889 GEN_INT (GET_MODE_BITSIZE (smode)),
27890 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27891 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27893 x = gen_reg_rtx (wvmode);
27894 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27895 gcc_unreachable ();
27896 emit_move_insn (target, gen_lowpart (mode, x));
27919 rtx tmp = gen_reg_rtx (hmode);
27920 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27921 emit_insn (gen_rtx_SET (VOIDmode, target,
27922 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27931 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27932 whose ONE_VAR element is VAR, and other elements are zero. Return true
27936 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27937 rtx target, rtx var, int one_var)
27939 enum machine_mode vsimode;
27942 bool use_vector_set = false;
27947 /* For SSE4.1, we normally use vector set. But if the second
27948 element is zero and inter-unit moves are OK, we use movq
27950 use_vector_set = (TARGET_64BIT
27952 && !(TARGET_INTER_UNIT_MOVES
27958 use_vector_set = TARGET_SSE4_1;
27961 use_vector_set = TARGET_SSE2;
27964 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27971 use_vector_set = TARGET_AVX;
27974 /* Use ix86_expand_vector_set in 64bit mode only. */
27975 use_vector_set = TARGET_AVX && TARGET_64BIT;
27981 if (use_vector_set)
27983 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27984 var = force_reg (GET_MODE_INNER (mode), var);
27985 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28001 var = force_reg (GET_MODE_INNER (mode), var);
28002 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28003 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28008 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28009 new_target = gen_reg_rtx (mode);
28011 new_target = target;
28012 var = force_reg (GET_MODE_INNER (mode), var);
28013 x = gen_rtx_VEC_DUPLICATE (mode, var);
28014 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28015 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28018 /* We need to shuffle the value to the correct position, so
28019 create a new pseudo to store the intermediate result. */
28021 /* With SSE2, we can use the integer shuffle insns. */
28022 if (mode != V4SFmode && TARGET_SSE2)
28024 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28026 GEN_INT (one_var == 1 ? 0 : 1),
28027 GEN_INT (one_var == 2 ? 0 : 1),
28028 GEN_INT (one_var == 3 ? 0 : 1)));
28029 if (target != new_target)
28030 emit_move_insn (target, new_target);
28034 /* Otherwise convert the intermediate result to V4SFmode and
28035 use the SSE1 shuffle instructions. */
28036 if (mode != V4SFmode)
28038 tmp = gen_reg_rtx (V4SFmode);
28039 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28044 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28046 GEN_INT (one_var == 1 ? 0 : 1),
28047 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28048 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28050 if (mode != V4SFmode)
28051 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28052 else if (tmp != target)
28053 emit_move_insn (target, tmp);
28055 else if (target != new_target)
28056 emit_move_insn (target, new_target);
28061 vsimode = V4SImode;
28067 vsimode = V2SImode;
28073 /* Zero extend the variable element to SImode and recurse. */
28074 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28076 x = gen_reg_rtx (vsimode);
28077 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28079 gcc_unreachable ();
28081 emit_move_insn (target, gen_lowpart (mode, x));
28089 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28090 consisting of the values in VALS. It is known that all elements
28091 except ONE_VAR are constants. Return true if successful. */
28094 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28095 rtx target, rtx vals, int one_var)
28097 rtx var = XVECEXP (vals, 0, one_var);
28098 enum machine_mode wmode;
28101 const_vec = copy_rtx (vals);
28102 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28103 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28111 /* For the two element vectors, it's just as easy to use
28112 the general case. */
28116 /* Use ix86_expand_vector_set in 64bit mode only. */
28139 /* There's no way to set one QImode entry easily. Combine
28140 the variable value with its adjacent constant value, and
28141 promote to an HImode set. */
28142 x = XVECEXP (vals, 0, one_var ^ 1);
28145 var = convert_modes (HImode, QImode, var, true);
28146 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28147 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28148 x = GEN_INT (INTVAL (x) & 0xff);
28152 var = convert_modes (HImode, QImode, var, true);
28153 x = gen_int_mode (INTVAL (x) << 8, HImode);
28155 if (x != const0_rtx)
28156 var = expand_simple_binop (HImode, IOR, var, x, var,
28157 1, OPTAB_LIB_WIDEN);
28159 x = gen_reg_rtx (wmode);
28160 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28161 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28163 emit_move_insn (target, gen_lowpart (mode, x));
28170 emit_move_insn (target, const_vec);
28171 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28175 /* A subroutine of ix86_expand_vector_init_general. Use vector
28176 concatenate to handle the most general case: all values variable,
28177 and none identical. */
28180 ix86_expand_vector_init_concat (enum machine_mode mode,
28181 rtx target, rtx *ops, int n)
28183 enum machine_mode cmode, hmode = VOIDmode;
28184 rtx first[8], second[4];
28224 gcc_unreachable ();
28227 if (!register_operand (ops[1], cmode))
28228 ops[1] = force_reg (cmode, ops[1]);
28229 if (!register_operand (ops[0], cmode))
28230 ops[0] = force_reg (cmode, ops[0]);
28231 emit_insn (gen_rtx_SET (VOIDmode, target,
28232 gen_rtx_VEC_CONCAT (mode, ops[0],
28252 gcc_unreachable ();
28268 gcc_unreachable ();
28273 /* FIXME: We process inputs backward to help RA. PR 36222. */
28276 for (; i > 0; i -= 2, j--)
28278 first[j] = gen_reg_rtx (cmode);
28279 v = gen_rtvec (2, ops[i - 1], ops[i]);
28280 ix86_expand_vector_init (false, first[j],
28281 gen_rtx_PARALLEL (cmode, v));
28287 gcc_assert (hmode != VOIDmode);
28288 for (i = j = 0; i < n; i += 2, j++)
28290 second[j] = gen_reg_rtx (hmode);
28291 ix86_expand_vector_init_concat (hmode, second [j],
28295 ix86_expand_vector_init_concat (mode, target, second, n);
28298 ix86_expand_vector_init_concat (mode, target, first, n);
28302 gcc_unreachable ();
28306 /* A subroutine of ix86_expand_vector_init_general. Use vector
28307 interleave to handle the most general case: all values variable,
28308 and none identical. */
28311 ix86_expand_vector_init_interleave (enum machine_mode mode,
28312 rtx target, rtx *ops, int n)
28314 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28317 rtx (*gen_load_even) (rtx, rtx, rtx);
28318 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28319 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28324 gen_load_even = gen_vec_setv8hi;
28325 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28326 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28327 inner_mode = HImode;
28328 first_imode = V4SImode;
28329 second_imode = V2DImode;
28330 third_imode = VOIDmode;
28333 gen_load_even = gen_vec_setv16qi;
28334 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28335 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28336 inner_mode = QImode;
28337 first_imode = V8HImode;
28338 second_imode = V4SImode;
28339 third_imode = V2DImode;
28342 gcc_unreachable ();
28345 for (i = 0; i < n; i++)
28347 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28348 op0 = gen_reg_rtx (SImode);
28349 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28351 /* Insert the SImode value as low element of V4SImode vector. */
28352 op1 = gen_reg_rtx (V4SImode);
28353 op0 = gen_rtx_VEC_MERGE (V4SImode,
28354 gen_rtx_VEC_DUPLICATE (V4SImode,
28356 CONST0_RTX (V4SImode),
28358 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28360 /* Cast the V4SImode vector back to a vector in orignal mode. */
28361 op0 = gen_reg_rtx (mode);
28362 emit_move_insn (op0, gen_lowpart (mode, op1));
28364 /* Load even elements into the second positon. */
28365 emit_insn ((*gen_load_even) (op0,
28366 force_reg (inner_mode,
28370 /* Cast vector to FIRST_IMODE vector. */
28371 ops[i] = gen_reg_rtx (first_imode);
28372 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28375 /* Interleave low FIRST_IMODE vectors. */
28376 for (i = j = 0; i < n; i += 2, j++)
28378 op0 = gen_reg_rtx (first_imode);
28379 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28381 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28382 ops[j] = gen_reg_rtx (second_imode);
28383 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28386 /* Interleave low SECOND_IMODE vectors. */
28387 switch (second_imode)
28390 for (i = j = 0; i < n / 2; i += 2, j++)
28392 op0 = gen_reg_rtx (second_imode);
28393 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28396 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28398 ops[j] = gen_reg_rtx (third_imode);
28399 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28401 second_imode = V2DImode;
28402 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28406 op0 = gen_reg_rtx (second_imode);
28407 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28410 /* Cast the SECOND_IMODE vector back to a vector on original
28412 emit_insn (gen_rtx_SET (VOIDmode, target,
28413 gen_lowpart (mode, op0)));
28417 gcc_unreachable ();
28421 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28422 all values variable, and none identical. */
28425 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28426 rtx target, rtx vals)
28428 rtx ops[32], op0, op1;
28429 enum machine_mode half_mode = VOIDmode;
28436 if (!mmx_ok && !TARGET_SSE)
28448 n = GET_MODE_NUNITS (mode);
28449 for (i = 0; i < n; i++)
28450 ops[i] = XVECEXP (vals, 0, i);
28451 ix86_expand_vector_init_concat (mode, target, ops, n);
28455 half_mode = V16QImode;
28459 half_mode = V8HImode;
28463 n = GET_MODE_NUNITS (mode);
28464 for (i = 0; i < n; i++)
28465 ops[i] = XVECEXP (vals, 0, i);
28466 op0 = gen_reg_rtx (half_mode);
28467 op1 = gen_reg_rtx (half_mode);
28468 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28470 ix86_expand_vector_init_interleave (half_mode, op1,
28471 &ops [n >> 1], n >> 2);
28472 emit_insn (gen_rtx_SET (VOIDmode, target,
28473 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28477 if (!TARGET_SSE4_1)
28485 /* Don't use ix86_expand_vector_init_interleave if we can't
28486 move from GPR to SSE register directly. */
28487 if (!TARGET_INTER_UNIT_MOVES)
28490 n = GET_MODE_NUNITS (mode);
28491 for (i = 0; i < n; i++)
28492 ops[i] = XVECEXP (vals, 0, i);
28493 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28501 gcc_unreachable ();
28505 int i, j, n_elts, n_words, n_elt_per_word;
28506 enum machine_mode inner_mode;
28507 rtx words[4], shift;
28509 inner_mode = GET_MODE_INNER (mode);
28510 n_elts = GET_MODE_NUNITS (mode);
28511 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28512 n_elt_per_word = n_elts / n_words;
28513 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28515 for (i = 0; i < n_words; ++i)
28517 rtx word = NULL_RTX;
28519 for (j = 0; j < n_elt_per_word; ++j)
28521 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28522 elt = convert_modes (word_mode, inner_mode, elt, true);
28528 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28529 word, 1, OPTAB_LIB_WIDEN);
28530 word = expand_simple_binop (word_mode, IOR, word, elt,
28531 word, 1, OPTAB_LIB_WIDEN);
28539 emit_move_insn (target, gen_lowpart (mode, words[0]));
28540 else if (n_words == 2)
28542 rtx tmp = gen_reg_rtx (mode);
28543 emit_clobber (tmp);
28544 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28545 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28546 emit_move_insn (target, tmp);
28548 else if (n_words == 4)
28550 rtx tmp = gen_reg_rtx (V4SImode);
28551 gcc_assert (word_mode == SImode);
28552 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28553 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28554 emit_move_insn (target, gen_lowpart (mode, tmp));
28557 gcc_unreachable ();
28561 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28562 instructions unless MMX_OK is true. */
28565 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28567 enum machine_mode mode = GET_MODE (target);
28568 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28569 int n_elts = GET_MODE_NUNITS (mode);
28570 int n_var = 0, one_var = -1;
28571 bool all_same = true, all_const_zero = true;
28575 for (i = 0; i < n_elts; ++i)
28577 x = XVECEXP (vals, 0, i);
28578 if (!(CONST_INT_P (x)
28579 || GET_CODE (x) == CONST_DOUBLE
28580 || GET_CODE (x) == CONST_FIXED))
28581 n_var++, one_var = i;
28582 else if (x != CONST0_RTX (inner_mode))
28583 all_const_zero = false;
28584 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28588 /* Constants are best loaded from the constant pool. */
28591 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28595 /* If all values are identical, broadcast the value. */
28597 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28598 XVECEXP (vals, 0, 0)))
28601 /* Values where only one field is non-constant are best loaded from
28602 the pool and overwritten via move later. */
28606 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28607 XVECEXP (vals, 0, one_var),
28611 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28615 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28619 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28621 enum machine_mode mode = GET_MODE (target);
28622 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28623 enum machine_mode half_mode;
28624 bool use_vec_merge = false;
28626 static rtx (*gen_extract[6][2]) (rtx, rtx)
28628 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28629 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28630 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28631 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28632 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28633 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28635 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28637 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28638 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28639 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28640 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28641 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28642 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28652 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28653 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28655 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28657 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28658 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28664 use_vec_merge = TARGET_SSE4_1;
28672 /* For the two element vectors, we implement a VEC_CONCAT with
28673 the extraction of the other element. */
28675 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28676 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28679 op0 = val, op1 = tmp;
28681 op0 = tmp, op1 = val;
28683 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28684 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28689 use_vec_merge = TARGET_SSE4_1;
28696 use_vec_merge = true;
28700 /* tmp = target = A B C D */
28701 tmp = copy_to_reg (target);
28702 /* target = A A B B */
28703 emit_insn (gen_sse_unpcklps (target, target, target));
28704 /* target = X A B B */
28705 ix86_expand_vector_set (false, target, val, 0);
28706 /* target = A X C D */
28707 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28708 GEN_INT (1), GEN_INT (0),
28709 GEN_INT (2+4), GEN_INT (3+4)));
28713 /* tmp = target = A B C D */
28714 tmp = copy_to_reg (target);
28715 /* tmp = X B C D */
28716 ix86_expand_vector_set (false, tmp, val, 0);
28717 /* target = A B X D */
28718 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28719 GEN_INT (0), GEN_INT (1),
28720 GEN_INT (0+4), GEN_INT (3+4)));
28724 /* tmp = target = A B C D */
28725 tmp = copy_to_reg (target);
28726 /* tmp = X B C D */
28727 ix86_expand_vector_set (false, tmp, val, 0);
28728 /* target = A B X D */
28729 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28730 GEN_INT (0), GEN_INT (1),
28731 GEN_INT (2+4), GEN_INT (0+4)));
28735 gcc_unreachable ();
28740 use_vec_merge = TARGET_SSE4_1;
28744 /* Element 0 handled by vec_merge below. */
28747 use_vec_merge = true;
28753 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28754 store into element 0, then shuffle them back. */
28758 order[0] = GEN_INT (elt);
28759 order[1] = const1_rtx;
28760 order[2] = const2_rtx;
28761 order[3] = GEN_INT (3);
28762 order[elt] = const0_rtx;
28764 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28765 order[1], order[2], order[3]));
28767 ix86_expand_vector_set (false, target, val, 0);
28769 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28770 order[1], order[2], order[3]));
28774 /* For SSE1, we have to reuse the V4SF code. */
28775 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28776 gen_lowpart (SFmode, val), elt);
28781 use_vec_merge = TARGET_SSE2;
28784 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28788 use_vec_merge = TARGET_SSE4_1;
28795 half_mode = V16QImode;
28801 half_mode = V8HImode;
28807 half_mode = V4SImode;
28813 half_mode = V2DImode;
28819 half_mode = V4SFmode;
28825 half_mode = V2DFmode;
28831 /* Compute offset. */
28835 gcc_assert (i <= 1);
28837 /* Extract the half. */
28838 tmp = gen_reg_rtx (half_mode);
28839 emit_insn ((*gen_extract[j][i]) (tmp, target));
28841 /* Put val in tmp at elt. */
28842 ix86_expand_vector_set (false, tmp, val, elt);
28845 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28854 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28855 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28856 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28860 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28862 emit_move_insn (mem, target);
28864 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28865 emit_move_insn (tmp, val);
28867 emit_move_insn (target, mem);
28872 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28874 enum machine_mode mode = GET_MODE (vec);
28875 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28876 bool use_vec_extr = false;
28889 use_vec_extr = true;
28893 use_vec_extr = TARGET_SSE4_1;
28905 tmp = gen_reg_rtx (mode);
28906 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28907 GEN_INT (elt), GEN_INT (elt),
28908 GEN_INT (elt+4), GEN_INT (elt+4)));
28912 tmp = gen_reg_rtx (mode);
28913 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28917 gcc_unreachable ();
28920 use_vec_extr = true;
28925 use_vec_extr = TARGET_SSE4_1;
28939 tmp = gen_reg_rtx (mode);
28940 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28941 GEN_INT (elt), GEN_INT (elt),
28942 GEN_INT (elt), GEN_INT (elt)));
28946 tmp = gen_reg_rtx (mode);
28947 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28951 gcc_unreachable ();
28954 use_vec_extr = true;
28959 /* For SSE1, we have to reuse the V4SF code. */
28960 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28961 gen_lowpart (V4SFmode, vec), elt);
28967 use_vec_extr = TARGET_SSE2;
28970 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28974 use_vec_extr = TARGET_SSE4_1;
28978 /* ??? Could extract the appropriate HImode element and shift. */
28985 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28986 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28988 /* Let the rtl optimizers know about the zero extension performed. */
28989 if (inner_mode == QImode || inner_mode == HImode)
28991 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28992 target = gen_lowpart (SImode, target);
28995 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28999 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29001 emit_move_insn (mem, vec);
29003 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29004 emit_move_insn (target, tmp);
29008 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29009 pattern to reduce; DEST is the destination; IN is the input vector. */
29012 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29014 rtx tmp1, tmp2, tmp3;
29016 tmp1 = gen_reg_rtx (V4SFmode);
29017 tmp2 = gen_reg_rtx (V4SFmode);
29018 tmp3 = gen_reg_rtx (V4SFmode);
29020 emit_insn (gen_sse_movhlps (tmp1, in, in));
29021 emit_insn (fn (tmp2, tmp1, in));
29023 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29024 GEN_INT (1), GEN_INT (1),
29025 GEN_INT (1+4), GEN_INT (1+4)));
29026 emit_insn (fn (dest, tmp2, tmp3));
29029 /* Target hook for scalar_mode_supported_p. */
29031 ix86_scalar_mode_supported_p (enum machine_mode mode)
29033 if (DECIMAL_FLOAT_MODE_P (mode))
29035 else if (mode == TFmode)
29038 return default_scalar_mode_supported_p (mode);
29041 /* Implements target hook vector_mode_supported_p. */
29043 ix86_vector_mode_supported_p (enum machine_mode mode)
29045 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29047 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29049 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29051 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29053 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29058 /* Target hook for c_mode_for_suffix. */
29059 static enum machine_mode
29060 ix86_c_mode_for_suffix (char suffix)
29070 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29072 We do this in the new i386 backend to maintain source compatibility
29073 with the old cc0-based compiler. */
29076 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29077 tree inputs ATTRIBUTE_UNUSED,
29080 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29082 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29087 /* Implements target vector targetm.asm.encode_section_info. This
29088 is not used by netware. */
29090 static void ATTRIBUTE_UNUSED
29091 ix86_encode_section_info (tree decl, rtx rtl, int first)
29093 default_encode_section_info (decl, rtl, first);
29095 if (TREE_CODE (decl) == VAR_DECL
29096 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29097 && ix86_in_large_data_p (decl))
29098 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29101 /* Worker function for REVERSE_CONDITION. */
29104 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29106 return (mode != CCFPmode && mode != CCFPUmode
29107 ? reverse_condition (code)
29108 : reverse_condition_maybe_unordered (code));
29111 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29115 output_387_reg_move (rtx insn, rtx *operands)
29117 if (REG_P (operands[0]))
29119 if (REG_P (operands[1])
29120 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29122 if (REGNO (operands[0]) == FIRST_STACK_REG)
29123 return output_387_ffreep (operands, 0);
29124 return "fstp\t%y0";
29126 if (STACK_TOP_P (operands[0]))
29127 return "fld%Z1\t%y1";
29130 else if (MEM_P (operands[0]))
29132 gcc_assert (REG_P (operands[1]));
29133 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29134 return "fstp%Z0\t%y0";
29137 /* There is no non-popping store to memory for XFmode.
29138 So if we need one, follow the store with a load. */
29139 if (GET_MODE (operands[0]) == XFmode)
29140 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29142 return "fst%Z0\t%y0";
29149 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29150 FP status register is set. */
29153 ix86_emit_fp_unordered_jump (rtx label)
29155 rtx reg = gen_reg_rtx (HImode);
29158 emit_insn (gen_x86_fnstsw_1 (reg));
29160 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29162 emit_insn (gen_x86_sahf_1 (reg));
29164 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29165 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29169 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29171 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29172 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29175 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29176 gen_rtx_LABEL_REF (VOIDmode, label),
29178 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29180 emit_jump_insn (temp);
29181 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29184 /* Output code to perform a log1p XFmode calculation. */
29186 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29188 rtx label1 = gen_label_rtx ();
29189 rtx label2 = gen_label_rtx ();
29191 rtx tmp = gen_reg_rtx (XFmode);
29192 rtx tmp2 = gen_reg_rtx (XFmode);
29195 emit_insn (gen_absxf2 (tmp, op1));
29196 test = gen_rtx_GE (VOIDmode, tmp,
29197 CONST_DOUBLE_FROM_REAL_VALUE (
29198 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29200 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29202 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29203 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29204 emit_jump (label2);
29206 emit_label (label1);
29207 emit_move_insn (tmp, CONST1_RTX (XFmode));
29208 emit_insn (gen_addxf3 (tmp, op1, tmp));
29209 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29210 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29212 emit_label (label2);
29215 /* Output code to perform a Newton-Rhapson approximation of a single precision
29216 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29218 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29220 rtx x0, x1, e0, e1, two;
29222 x0 = gen_reg_rtx (mode);
29223 e0 = gen_reg_rtx (mode);
29224 e1 = gen_reg_rtx (mode);
29225 x1 = gen_reg_rtx (mode);
29227 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29229 if (VECTOR_MODE_P (mode))
29230 two = ix86_build_const_vector (SFmode, true, two);
29232 two = force_reg (mode, two);
29234 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29236 /* x0 = rcp(b) estimate */
29237 emit_insn (gen_rtx_SET (VOIDmode, x0,
29238 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29241 emit_insn (gen_rtx_SET (VOIDmode, e0,
29242 gen_rtx_MULT (mode, x0, b)));
29244 emit_insn (gen_rtx_SET (VOIDmode, e1,
29245 gen_rtx_MINUS (mode, two, e0)));
29247 emit_insn (gen_rtx_SET (VOIDmode, x1,
29248 gen_rtx_MULT (mode, x0, e1)));
29250 emit_insn (gen_rtx_SET (VOIDmode, res,
29251 gen_rtx_MULT (mode, a, x1)));
29254 /* Output code to perform a Newton-Rhapson approximation of a
29255 single precision floating point [reciprocal] square root. */
29257 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29260 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29263 x0 = gen_reg_rtx (mode);
29264 e0 = gen_reg_rtx (mode);
29265 e1 = gen_reg_rtx (mode);
29266 e2 = gen_reg_rtx (mode);
29267 e3 = gen_reg_rtx (mode);
29269 real_from_integer (&r, VOIDmode, -3, -1, 0);
29270 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29272 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29273 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29275 if (VECTOR_MODE_P (mode))
29277 mthree = ix86_build_const_vector (SFmode, true, mthree);
29278 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29281 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29282 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29284 /* x0 = rsqrt(a) estimate */
29285 emit_insn (gen_rtx_SET (VOIDmode, x0,
29286 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29289 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29294 zero = gen_reg_rtx (mode);
29295 mask = gen_reg_rtx (mode);
29297 zero = force_reg (mode, CONST0_RTX(mode));
29298 emit_insn (gen_rtx_SET (VOIDmode, mask,
29299 gen_rtx_NE (mode, zero, a)));
29301 emit_insn (gen_rtx_SET (VOIDmode, x0,
29302 gen_rtx_AND (mode, x0, mask)));
29306 emit_insn (gen_rtx_SET (VOIDmode, e0,
29307 gen_rtx_MULT (mode, x0, a)));
29309 emit_insn (gen_rtx_SET (VOIDmode, e1,
29310 gen_rtx_MULT (mode, e0, x0)));
29313 mthree = force_reg (mode, mthree);
29314 emit_insn (gen_rtx_SET (VOIDmode, e2,
29315 gen_rtx_PLUS (mode, e1, mthree)));
29317 mhalf = force_reg (mode, mhalf);
29319 /* e3 = -.5 * x0 */
29320 emit_insn (gen_rtx_SET (VOIDmode, e3,
29321 gen_rtx_MULT (mode, x0, mhalf)));
29323 /* e3 = -.5 * e0 */
29324 emit_insn (gen_rtx_SET (VOIDmode, e3,
29325 gen_rtx_MULT (mode, e0, mhalf)));
29326 /* ret = e2 * e3 */
29327 emit_insn (gen_rtx_SET (VOIDmode, res,
29328 gen_rtx_MULT (mode, e2, e3)));
29331 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29333 static void ATTRIBUTE_UNUSED
29334 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29337 /* With Binutils 2.15, the "@unwind" marker must be specified on
29338 every occurrence of the ".eh_frame" section, not just the first
29341 && strcmp (name, ".eh_frame") == 0)
29343 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29344 flags & SECTION_WRITE ? "aw" : "a");
29347 default_elf_asm_named_section (name, flags, decl);
29350 /* Return the mangling of TYPE if it is an extended fundamental type. */
29352 static const char *
29353 ix86_mangle_type (const_tree type)
29355 type = TYPE_MAIN_VARIANT (type);
29357 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29358 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29361 switch (TYPE_MODE (type))
29364 /* __float128 is "g". */
29367 /* "long double" or __float80 is "e". */
29374 /* For 32-bit code we can save PIC register setup by using
29375 __stack_chk_fail_local hidden function instead of calling
29376 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29377 register, so it is better to call __stack_chk_fail directly. */
29380 ix86_stack_protect_fail (void)
29382 return TARGET_64BIT
29383 ? default_external_stack_protect_fail ()
29384 : default_hidden_stack_protect_fail ();
29387 /* Select a format to encode pointers in exception handling data. CODE
29388 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29389 true if the symbol may be affected by dynamic relocations.
29391 ??? All x86 object file formats are capable of representing this.
29392 After all, the relocation needed is the same as for the call insn.
29393 Whether or not a particular assembler allows us to enter such, I
29394 guess we'll have to see. */
29396 asm_preferred_eh_data_format (int code, int global)
29400 int type = DW_EH_PE_sdata8;
29402 || ix86_cmodel == CM_SMALL_PIC
29403 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29404 type = DW_EH_PE_sdata4;
29405 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29407 if (ix86_cmodel == CM_SMALL
29408 || (ix86_cmodel == CM_MEDIUM && code))
29409 return DW_EH_PE_udata4;
29410 return DW_EH_PE_absptr;
29413 /* Expand copysign from SIGN to the positive value ABS_VALUE
29414 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29417 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29419 enum machine_mode mode = GET_MODE (sign);
29420 rtx sgn = gen_reg_rtx (mode);
29421 if (mask == NULL_RTX)
29423 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29424 if (!VECTOR_MODE_P (mode))
29426 /* We need to generate a scalar mode mask in this case. */
29427 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29428 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29429 mask = gen_reg_rtx (mode);
29430 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29434 mask = gen_rtx_NOT (mode, mask);
29435 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29436 gen_rtx_AND (mode, mask, sign)));
29437 emit_insn (gen_rtx_SET (VOIDmode, result,
29438 gen_rtx_IOR (mode, abs_value, sgn)));
29441 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29442 mask for masking out the sign-bit is stored in *SMASK, if that is
29445 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29447 enum machine_mode mode = GET_MODE (op0);
29450 xa = gen_reg_rtx (mode);
29451 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29452 if (!VECTOR_MODE_P (mode))
29454 /* We need to generate a scalar mode mask in this case. */
29455 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29456 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29457 mask = gen_reg_rtx (mode);
29458 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29460 emit_insn (gen_rtx_SET (VOIDmode, xa,
29461 gen_rtx_AND (mode, op0, mask)));
29469 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29470 swapping the operands if SWAP_OPERANDS is true. The expanded
29471 code is a forward jump to a newly created label in case the
29472 comparison is true. The generated label rtx is returned. */
29474 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29475 bool swap_operands)
29486 label = gen_label_rtx ();
29487 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29488 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29489 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29490 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29491 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29492 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29493 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29494 JUMP_LABEL (tmp) = label;
29499 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29500 using comparison code CODE. Operands are swapped for the comparison if
29501 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29503 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29504 bool swap_operands)
29506 enum machine_mode mode = GET_MODE (op0);
29507 rtx mask = gen_reg_rtx (mode);
29516 if (mode == DFmode)
29517 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29518 gen_rtx_fmt_ee (code, mode, op0, op1)));
29520 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29521 gen_rtx_fmt_ee (code, mode, op0, op1)));
29526 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29527 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29529 ix86_gen_TWO52 (enum machine_mode mode)
29531 REAL_VALUE_TYPE TWO52r;
29534 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29535 TWO52 = const_double_from_real_value (TWO52r, mode);
29536 TWO52 = force_reg (mode, TWO52);
29541 /* Expand SSE sequence for computing lround from OP1 storing
29544 ix86_expand_lround (rtx op0, rtx op1)
29546 /* C code for the stuff we're doing below:
29547 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29550 enum machine_mode mode = GET_MODE (op1);
29551 const struct real_format *fmt;
29552 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29555 /* load nextafter (0.5, 0.0) */
29556 fmt = REAL_MODE_FORMAT (mode);
29557 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29558 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29560 /* adj = copysign (0.5, op1) */
29561 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29562 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29564 /* adj = op1 + adj */
29565 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29567 /* op0 = (imode)adj */
29568 expand_fix (op0, adj, 0);
29571 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29574 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29576 /* C code for the stuff we're doing below (for do_floor):
29578 xi -= (double)xi > op1 ? 1 : 0;
29581 enum machine_mode fmode = GET_MODE (op1);
29582 enum machine_mode imode = GET_MODE (op0);
29583 rtx ireg, freg, label, tmp;
29585 /* reg = (long)op1 */
29586 ireg = gen_reg_rtx (imode);
29587 expand_fix (ireg, op1, 0);
29589 /* freg = (double)reg */
29590 freg = gen_reg_rtx (fmode);
29591 expand_float (freg, ireg, 0);
29593 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29594 label = ix86_expand_sse_compare_and_jump (UNLE,
29595 freg, op1, !do_floor);
29596 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29597 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29598 emit_move_insn (ireg, tmp);
29600 emit_label (label);
29601 LABEL_NUSES (label) = 1;
29603 emit_move_insn (op0, ireg);
29606 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29607 result in OPERAND0. */
29609 ix86_expand_rint (rtx operand0, rtx operand1)
29611 /* C code for the stuff we're doing below:
29612 xa = fabs (operand1);
29613 if (!isless (xa, 2**52))
29615 xa = xa + 2**52 - 2**52;
29616 return copysign (xa, operand1);
29618 enum machine_mode mode = GET_MODE (operand0);
29619 rtx res, xa, label, TWO52, mask;
29621 res = gen_reg_rtx (mode);
29622 emit_move_insn (res, operand1);
29624 /* xa = abs (operand1) */
29625 xa = ix86_expand_sse_fabs (res, &mask);
29627 /* if (!isless (xa, TWO52)) goto label; */
29628 TWO52 = ix86_gen_TWO52 (mode);
29629 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29631 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29632 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29634 ix86_sse_copysign_to_positive (res, xa, res, mask);
29636 emit_label (label);
29637 LABEL_NUSES (label) = 1;
29639 emit_move_insn (operand0, res);
29642 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29645 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29647 /* C code for the stuff we expand below.
29648 double xa = fabs (x), x2;
29649 if (!isless (xa, TWO52))
29651 xa = xa + TWO52 - TWO52;
29652 x2 = copysign (xa, x);
29661 enum machine_mode mode = GET_MODE (operand0);
29662 rtx xa, TWO52, tmp, label, one, res, mask;
29664 TWO52 = ix86_gen_TWO52 (mode);
29666 /* Temporary for holding the result, initialized to the input
29667 operand to ease control flow. */
29668 res = gen_reg_rtx (mode);
29669 emit_move_insn (res, operand1);
29671 /* xa = abs (operand1) */
29672 xa = ix86_expand_sse_fabs (res, &mask);
29674 /* if (!isless (xa, TWO52)) goto label; */
29675 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29677 /* xa = xa + TWO52 - TWO52; */
29678 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29679 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29681 /* xa = copysign (xa, operand1) */
29682 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29684 /* generate 1.0 or -1.0 */
29685 one = force_reg (mode,
29686 const_double_from_real_value (do_floor
29687 ? dconst1 : dconstm1, mode));
29689 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29690 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29691 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29692 gen_rtx_AND (mode, one, tmp)));
29693 /* We always need to subtract here to preserve signed zero. */
29694 tmp = expand_simple_binop (mode, MINUS,
29695 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29696 emit_move_insn (res, tmp);
29698 emit_label (label);
29699 LABEL_NUSES (label) = 1;
29701 emit_move_insn (operand0, res);
29704 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29707 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29709 /* C code for the stuff we expand below.
29710 double xa = fabs (x), x2;
29711 if (!isless (xa, TWO52))
29713 x2 = (double)(long)x;
29720 if (HONOR_SIGNED_ZEROS (mode))
29721 return copysign (x2, x);
29724 enum machine_mode mode = GET_MODE (operand0);
29725 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29727 TWO52 = ix86_gen_TWO52 (mode);
29729 /* Temporary for holding the result, initialized to the input
29730 operand to ease control flow. */
29731 res = gen_reg_rtx (mode);
29732 emit_move_insn (res, operand1);
29734 /* xa = abs (operand1) */
29735 xa = ix86_expand_sse_fabs (res, &mask);
29737 /* if (!isless (xa, TWO52)) goto label; */
29738 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29740 /* xa = (double)(long)x */
29741 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29742 expand_fix (xi, res, 0);
29743 expand_float (xa, xi, 0);
29746 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29748 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29749 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29750 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29751 gen_rtx_AND (mode, one, tmp)));
29752 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29753 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29754 emit_move_insn (res, tmp);
29756 if (HONOR_SIGNED_ZEROS (mode))
29757 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29759 emit_label (label);
29760 LABEL_NUSES (label) = 1;
29762 emit_move_insn (operand0, res);
29765 /* Expand SSE sequence for computing round from OPERAND1 storing
29766 into OPERAND0. Sequence that works without relying on DImode truncation
29767 via cvttsd2siq that is only available on 64bit targets. */
29769 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29771 /* C code for the stuff we expand below.
29772 double xa = fabs (x), xa2, x2;
29773 if (!isless (xa, TWO52))
29775 Using the absolute value and copying back sign makes
29776 -0.0 -> -0.0 correct.
29777 xa2 = xa + TWO52 - TWO52;
29782 else if (dxa > 0.5)
29784 x2 = copysign (xa2, x);
29787 enum machine_mode mode = GET_MODE (operand0);
29788 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29790 TWO52 = ix86_gen_TWO52 (mode);
29792 /* Temporary for holding the result, initialized to the input
29793 operand to ease control flow. */
29794 res = gen_reg_rtx (mode);
29795 emit_move_insn (res, operand1);
29797 /* xa = abs (operand1) */
29798 xa = ix86_expand_sse_fabs (res, &mask);
29800 /* if (!isless (xa, TWO52)) goto label; */
29801 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29803 /* xa2 = xa + TWO52 - TWO52; */
29804 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29805 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29807 /* dxa = xa2 - xa; */
29808 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29810 /* generate 0.5, 1.0 and -0.5 */
29811 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29812 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29813 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29817 tmp = gen_reg_rtx (mode);
29818 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29819 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29820 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29821 gen_rtx_AND (mode, one, tmp)));
29822 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29823 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29824 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29825 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29826 gen_rtx_AND (mode, one, tmp)));
29827 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29829 /* res = copysign (xa2, operand1) */
29830 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29832 emit_label (label);
29833 LABEL_NUSES (label) = 1;
29835 emit_move_insn (operand0, res);
29838 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29841 ix86_expand_trunc (rtx operand0, rtx operand1)
29843 /* C code for SSE variant we expand below.
29844 double xa = fabs (x), x2;
29845 if (!isless (xa, TWO52))
29847 x2 = (double)(long)x;
29848 if (HONOR_SIGNED_ZEROS (mode))
29849 return copysign (x2, x);
29852 enum machine_mode mode = GET_MODE (operand0);
29853 rtx xa, xi, TWO52, label, res, mask;
29855 TWO52 = ix86_gen_TWO52 (mode);
29857 /* Temporary for holding the result, initialized to the input
29858 operand to ease control flow. */
29859 res = gen_reg_rtx (mode);
29860 emit_move_insn (res, operand1);
29862 /* xa = abs (operand1) */
29863 xa = ix86_expand_sse_fabs (res, &mask);
29865 /* if (!isless (xa, TWO52)) goto label; */
29866 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29868 /* x = (double)(long)x */
29869 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29870 expand_fix (xi, res, 0);
29871 expand_float (res, xi, 0);
29873 if (HONOR_SIGNED_ZEROS (mode))
29874 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29876 emit_label (label);
29877 LABEL_NUSES (label) = 1;
29879 emit_move_insn (operand0, res);
29882 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29885 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29887 enum machine_mode mode = GET_MODE (operand0);
29888 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29890 /* C code for SSE variant we expand below.
29891 double xa = fabs (x), x2;
29892 if (!isless (xa, TWO52))
29894 xa2 = xa + TWO52 - TWO52;
29898 x2 = copysign (xa2, x);
29902 TWO52 = ix86_gen_TWO52 (mode);
29904 /* Temporary for holding the result, initialized to the input
29905 operand to ease control flow. */
29906 res = gen_reg_rtx (mode);
29907 emit_move_insn (res, operand1);
29909 /* xa = abs (operand1) */
29910 xa = ix86_expand_sse_fabs (res, &smask);
29912 /* if (!isless (xa, TWO52)) goto label; */
29913 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29915 /* res = xa + TWO52 - TWO52; */
29916 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29917 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29918 emit_move_insn (res, tmp);
29921 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29923 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29924 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29925 emit_insn (gen_rtx_SET (VOIDmode, mask,
29926 gen_rtx_AND (mode, mask, one)));
29927 tmp = expand_simple_binop (mode, MINUS,
29928 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29929 emit_move_insn (res, tmp);
29931 /* res = copysign (res, operand1) */
29932 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29934 emit_label (label);
29935 LABEL_NUSES (label) = 1;
29937 emit_move_insn (operand0, res);
29940 /* Expand SSE sequence for computing round from OPERAND1 storing
29943 ix86_expand_round (rtx operand0, rtx operand1)
29945 /* C code for the stuff we're doing below:
29946 double xa = fabs (x);
29947 if (!isless (xa, TWO52))
29949 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29950 return copysign (xa, x);
29952 enum machine_mode mode = GET_MODE (operand0);
29953 rtx res, TWO52, xa, label, xi, half, mask;
29954 const struct real_format *fmt;
29955 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29957 /* Temporary for holding the result, initialized to the input
29958 operand to ease control flow. */
29959 res = gen_reg_rtx (mode);
29960 emit_move_insn (res, operand1);
29962 TWO52 = ix86_gen_TWO52 (mode);
29963 xa = ix86_expand_sse_fabs (res, &mask);
29964 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29966 /* load nextafter (0.5, 0.0) */
29967 fmt = REAL_MODE_FORMAT (mode);
29968 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29969 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29971 /* xa = xa + 0.5 */
29972 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29973 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29975 /* xa = (double)(int64_t)xa */
29976 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29977 expand_fix (xi, xa, 0);
29978 expand_float (xa, xi, 0);
29980 /* res = copysign (xa, operand1) */
29981 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29983 emit_label (label);
29984 LABEL_NUSES (label) = 1;
29986 emit_move_insn (operand0, res);
29990 /* Validate whether a SSE5 instruction is valid or not.
29991 OPERANDS is the array of operands.
29992 NUM is the number of operands.
29993 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29994 NUM_MEMORY is the maximum number of memory operands to accept.
29995 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29998 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29999 bool uses_oc0, int num_memory, bool commutative)
30005 /* Count the number of memory arguments */
30008 for (i = 0; i < num; i++)
30010 enum machine_mode mode = GET_MODE (operands[i]);
30011 if (register_operand (operands[i], mode))
30014 else if (memory_operand (operands[i], mode))
30016 mem_mask |= (1 << i);
30022 rtx pattern = PATTERN (insn);
30024 /* allow 0 for pcmov */
30025 if (GET_CODE (pattern) != SET
30026 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
30028 || operands[i] != CONST0_RTX (mode))
30033 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
30034 a memory operation. */
30035 if (num_memory < 0)
30037 num_memory = -num_memory;
30038 if ((mem_mask & (1 << (num-1))) != 0)
30040 mem_mask &= ~(1 << (num-1));
30045 /* If there were no memory operations, allow the insn */
30049 /* Do not allow the destination register to be a memory operand. */
30050 else if (mem_mask & (1 << 0))
30053 /* If there are too many memory operations, disallow the instruction. While
30054 the hardware only allows 1 memory reference, before register allocation
30055 for some insns, we allow two memory operations sometimes in order to allow
30056 code like the following to be optimized:
30058 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30060 or similar cases that are vectorized into using the fmaddss
30062 else if (mem_count > num_memory)
30065 /* Don't allow more than one memory operation if not optimizing. */
30066 else if (mem_count > 1 && !optimize)
30069 else if (num == 4 && mem_count == 1)
30071 /* formats (destination is the first argument), example fmaddss:
30072 xmm1, xmm1, xmm2, xmm3/mem
30073 xmm1, xmm1, xmm2/mem, xmm3
30074 xmm1, xmm2, xmm3/mem, xmm1
30075 xmm1, xmm2/mem, xmm3, xmm1 */
30077 return ((mem_mask == (1 << 1))
30078 || (mem_mask == (1 << 2))
30079 || (mem_mask == (1 << 3)));
30081 /* format, example pmacsdd:
30082 xmm1, xmm2, xmm3/mem, xmm1 */
30084 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30086 return (mem_mask == (1 << 2));
30089 else if (num == 4 && num_memory == 2)
30091 /* If there are two memory operations, we can load one of the memory ops
30092 into the destination register. This is for optimizing the
30093 multiply/add ops, which the combiner has optimized both the multiply
30094 and the add insns to have a memory operation. We have to be careful
30095 that the destination doesn't overlap with the inputs. */
30096 rtx op0 = operands[0];
30098 if (reg_mentioned_p (op0, operands[1])
30099 || reg_mentioned_p (op0, operands[2])
30100 || reg_mentioned_p (op0, operands[3]))
30103 /* formats (destination is the first argument), example fmaddss:
30104 xmm1, xmm1, xmm2, xmm3/mem
30105 xmm1, xmm1, xmm2/mem, xmm3
30106 xmm1, xmm2, xmm3/mem, xmm1
30107 xmm1, xmm2/mem, xmm3, xmm1
30109 For the oc0 case, we will load either operands[1] or operands[3] into
30110 operands[0], so any combination of 2 memory operands is ok. */
30114 /* format, example pmacsdd:
30115 xmm1, xmm2, xmm3/mem, xmm1
30117 For the integer multiply/add instructions be more restrictive and
30118 require operands[2] and operands[3] to be the memory operands. */
30120 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30122 return (mem_mask == ((1 << 2) | (1 << 3)));
30125 else if (num == 3 && num_memory == 1)
30127 /* formats, example protb:
30128 xmm1, xmm2, xmm3/mem
30129 xmm1, xmm2/mem, xmm3 */
30131 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30133 /* format, example comeq:
30134 xmm1, xmm2, xmm3/mem */
30136 return (mem_mask == (1 << 2));
30140 gcc_unreachable ();
30146 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30147 hardware will allow by using the destination register to load one of the
30148 memory operations. Presently this is used by the multiply/add routines to
30149 allow 2 memory references. */
30152 ix86_expand_sse5_multiple_memory (rtx operands[],
30154 enum machine_mode mode)
30156 rtx op0 = operands[0];
30158 || memory_operand (op0, mode)
30159 || reg_mentioned_p (op0, operands[1])
30160 || reg_mentioned_p (op0, operands[2])
30161 || reg_mentioned_p (op0, operands[3]))
30162 gcc_unreachable ();
30164 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30165 the destination register. */
30166 if (memory_operand (operands[1], mode))
30168 emit_move_insn (op0, operands[1]);
30171 else if (memory_operand (operands[3], mode))
30173 emit_move_insn (op0, operands[3]);
30177 gcc_unreachable ();
30183 /* Table of valid machine attributes. */
30184 static const struct attribute_spec ix86_attribute_table[] =
30186 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30187 /* Stdcall attribute says callee is responsible for popping arguments
30188 if they are not variable. */
30189 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30190 /* Fastcall attribute says callee is responsible for popping arguments
30191 if they are not variable. */
30192 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30193 /* Cdecl attribute says the callee is a normal C declaration */
30194 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30195 /* Regparm attribute specifies how many integer arguments are to be
30196 passed in registers. */
30197 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30198 /* Sseregparm attribute says we are using x86_64 calling conventions
30199 for FP arguments. */
30200 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30201 /* force_align_arg_pointer says this function realigns the stack at entry. */
30202 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30203 false, true, true, ix86_handle_cconv_attribute },
30204 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30205 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30206 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30207 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30209 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30210 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30211 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30212 SUBTARGET_ATTRIBUTE_TABLE,
30214 /* ms_abi and sysv_abi calling convention function attributes. */
30215 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30216 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30218 { NULL, 0, 0, false, false, false, NULL }
30221 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30223 x86_builtin_vectorization_cost (bool runtime_test)
30225 /* If the branch of the runtime test is taken - i.e. - the vectorized
30226 version is skipped - this incurs a misprediction cost (because the
30227 vectorized version is expected to be the fall-through). So we subtract
30228 the latency of a mispredicted branch from the costs that are incured
30229 when the vectorized version is executed.
30231 TODO: The values in individual target tables have to be tuned or new
30232 fields may be needed. For eg. on K8, the default branch path is the
30233 not-taken path. If the taken path is predicted correctly, the minimum
30234 penalty of going down the taken-path is 1 cycle. If the taken-path is
30235 not predicted correctly, then the minimum penalty is 10 cycles. */
30239 return (-(ix86_cost->cond_taken_branch_cost));
30245 /* This function returns the calling abi specific va_list type node.
30246 It returns the FNDECL specific va_list type. */
30249 ix86_fn_abi_va_list (tree fndecl)
30252 return va_list_type_node;
30253 gcc_assert (fndecl != NULL_TREE);
30255 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30256 return ms_va_list_type_node;
30258 return sysv_va_list_type_node;
30261 /* Returns the canonical va_list type specified by TYPE. If there
30262 is no valid TYPE provided, it return NULL_TREE. */
30265 ix86_canonical_va_list_type (tree type)
30269 /* Resolve references and pointers to va_list type. */
30270 if (INDIRECT_REF_P (type))
30271 type = TREE_TYPE (type);
30272 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30273 type = TREE_TYPE (type);
30277 wtype = va_list_type_node;
30278 gcc_assert (wtype != NULL_TREE);
30280 if (TREE_CODE (wtype) == ARRAY_TYPE)
30282 /* If va_list is an array type, the argument may have decayed
30283 to a pointer type, e.g. by being passed to another function.
30284 In that case, unwrap both types so that we can compare the
30285 underlying records. */
30286 if (TREE_CODE (htype) == ARRAY_TYPE
30287 || POINTER_TYPE_P (htype))
30289 wtype = TREE_TYPE (wtype);
30290 htype = TREE_TYPE (htype);
30293 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30294 return va_list_type_node;
30295 wtype = sysv_va_list_type_node;
30296 gcc_assert (wtype != NULL_TREE);
30298 if (TREE_CODE (wtype) == ARRAY_TYPE)
30300 /* If va_list is an array type, the argument may have decayed
30301 to a pointer type, e.g. by being passed to another function.
30302 In that case, unwrap both types so that we can compare the
30303 underlying records. */
30304 if (TREE_CODE (htype) == ARRAY_TYPE
30305 || POINTER_TYPE_P (htype))
30307 wtype = TREE_TYPE (wtype);
30308 htype = TREE_TYPE (htype);
30311 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30312 return sysv_va_list_type_node;
30313 wtype = ms_va_list_type_node;
30314 gcc_assert (wtype != NULL_TREE);
30316 if (TREE_CODE (wtype) == ARRAY_TYPE)
30318 /* If va_list is an array type, the argument may have decayed
30319 to a pointer type, e.g. by being passed to another function.
30320 In that case, unwrap both types so that we can compare the
30321 underlying records. */
30322 if (TREE_CODE (htype) == ARRAY_TYPE
30323 || POINTER_TYPE_P (htype))
30325 wtype = TREE_TYPE (wtype);
30326 htype = TREE_TYPE (htype);
30329 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30330 return ms_va_list_type_node;
30333 return std_canonical_va_list_type (type);
30336 /* Iterate through the target-specific builtin types for va_list.
30337 IDX denotes the iterator, *PTREE is set to the result type of
30338 the va_list builtin, and *PNAME to its internal type.
30339 Returns zero if there is no element for this index, otherwise
30340 IDX should be increased upon the next call.
30341 Note, do not iterate a base builtin's name like __builtin_va_list.
30342 Used from c_common_nodes_and_builtins. */
30345 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30351 *ptree = ms_va_list_type_node;
30352 *pname = "__builtin_ms_va_list";
30355 *ptree = sysv_va_list_type_node;
30356 *pname = "__builtin_sysv_va_list";
30364 /* Initialize the GCC target structure. */
30365 #undef TARGET_RETURN_IN_MEMORY
30366 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30368 #undef TARGET_LEGITIMIZE_ADDRESS
30369 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30371 #undef TARGET_ATTRIBUTE_TABLE
30372 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30373 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30374 # undef TARGET_MERGE_DECL_ATTRIBUTES
30375 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30378 #undef TARGET_COMP_TYPE_ATTRIBUTES
30379 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30381 #undef TARGET_INIT_BUILTINS
30382 #define TARGET_INIT_BUILTINS ix86_init_builtins
30383 #undef TARGET_EXPAND_BUILTIN
30384 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30386 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30387 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30388 ix86_builtin_vectorized_function
30390 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30391 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30393 #undef TARGET_BUILTIN_RECIPROCAL
30394 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30396 #undef TARGET_ASM_FUNCTION_EPILOGUE
30397 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30399 #undef TARGET_ENCODE_SECTION_INFO
30400 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30401 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30403 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30406 #undef TARGET_ASM_OPEN_PAREN
30407 #define TARGET_ASM_OPEN_PAREN ""
30408 #undef TARGET_ASM_CLOSE_PAREN
30409 #define TARGET_ASM_CLOSE_PAREN ""
30411 #undef TARGET_ASM_ALIGNED_HI_OP
30412 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30413 #undef TARGET_ASM_ALIGNED_SI_OP
30414 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30416 #undef TARGET_ASM_ALIGNED_DI_OP
30417 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30420 #undef TARGET_ASM_UNALIGNED_HI_OP
30421 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30422 #undef TARGET_ASM_UNALIGNED_SI_OP
30423 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30424 #undef TARGET_ASM_UNALIGNED_DI_OP
30425 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30427 #undef TARGET_SCHED_ADJUST_COST
30428 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30429 #undef TARGET_SCHED_ISSUE_RATE
30430 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30433 ia32_multipass_dfa_lookahead
30435 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30436 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30439 #undef TARGET_HAVE_TLS
30440 #define TARGET_HAVE_TLS true
30442 #undef TARGET_CANNOT_FORCE_CONST_MEM
30443 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30444 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30445 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30447 #undef TARGET_DELEGITIMIZE_ADDRESS
30448 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30450 #undef TARGET_MS_BITFIELD_LAYOUT_P
30451 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30454 #undef TARGET_BINDS_LOCAL_P
30455 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30457 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30458 #undef TARGET_BINDS_LOCAL_P
30459 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30462 #undef TARGET_ASM_OUTPUT_MI_THUNK
30463 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30464 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30465 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30467 #undef TARGET_ASM_FILE_START
30468 #define TARGET_ASM_FILE_START x86_file_start
30470 #undef TARGET_DEFAULT_TARGET_FLAGS
30471 #define TARGET_DEFAULT_TARGET_FLAGS \
30473 | TARGET_SUBTARGET_DEFAULT \
30474 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30476 #undef TARGET_HANDLE_OPTION
30477 #define TARGET_HANDLE_OPTION ix86_handle_option
30479 #undef TARGET_RTX_COSTS
30480 #define TARGET_RTX_COSTS ix86_rtx_costs
30481 #undef TARGET_ADDRESS_COST
30482 #define TARGET_ADDRESS_COST ix86_address_cost
30484 #undef TARGET_FIXED_CONDITION_CODE_REGS
30485 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30486 #undef TARGET_CC_MODES_COMPATIBLE
30487 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30489 #undef TARGET_MACHINE_DEPENDENT_REORG
30490 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30492 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30493 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30495 #undef TARGET_BUILD_BUILTIN_VA_LIST
30496 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30498 #undef TARGET_FN_ABI_VA_LIST
30499 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30501 #undef TARGET_CANONICAL_VA_LIST_TYPE
30502 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30504 #undef TARGET_EXPAND_BUILTIN_VA_START
30505 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30507 #undef TARGET_MD_ASM_CLOBBERS
30508 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30510 #undef TARGET_PROMOTE_PROTOTYPES
30511 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30512 #undef TARGET_STRUCT_VALUE_RTX
30513 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30514 #undef TARGET_SETUP_INCOMING_VARARGS
30515 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30516 #undef TARGET_MUST_PASS_IN_STACK
30517 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30518 #undef TARGET_PASS_BY_REFERENCE
30519 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30520 #undef TARGET_INTERNAL_ARG_POINTER
30521 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30522 #undef TARGET_UPDATE_STACK_BOUNDARY
30523 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30524 #undef TARGET_GET_DRAP_RTX
30525 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30526 #undef TARGET_STRICT_ARGUMENT_NAMING
30527 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30530 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30532 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30533 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30535 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30536 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30538 #undef TARGET_C_MODE_FOR_SUFFIX
30539 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30542 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30543 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30546 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30547 #undef TARGET_INSERT_ATTRIBUTES
30548 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30551 #undef TARGET_MANGLE_TYPE
30552 #define TARGET_MANGLE_TYPE ix86_mangle_type
30554 #undef TARGET_STACK_PROTECT_FAIL
30555 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30557 #undef TARGET_FUNCTION_VALUE
30558 #define TARGET_FUNCTION_VALUE ix86_function_value
30560 #undef TARGET_SECONDARY_RELOAD
30561 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30563 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30564 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30566 #undef TARGET_SET_CURRENT_FUNCTION
30567 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30569 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30570 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30572 #undef TARGET_OPTION_SAVE
30573 #define TARGET_OPTION_SAVE ix86_function_specific_save
30575 #undef TARGET_OPTION_RESTORE
30576 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30578 #undef TARGET_OPTION_PRINT
30579 #define TARGET_OPTION_PRINT ix86_function_specific_print
30581 #undef TARGET_OPTION_CAN_INLINE_P
30582 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30584 #undef TARGET_EXPAND_TO_RTL_HOOK
30585 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30587 #undef TARGET_LEGITIMATE_ADDRESS_P
30588 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30590 struct gcc_target targetm = TARGET_INITIALIZER;
30592 #include "gt-i386.h"