1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_MOV0 */
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1291 /* X86_TUNE_READ_MODIFY */
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1304 /* X86_TUNE_QIMODE_MATH */
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1376 /* X86_TUNE_SHIFT1 */
1379 /* X86_TUNE_USE_FFREEP */
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1440 m_AMDFAM10 | m_GENERIC,
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1481 static enum stringop_alg stringop_alg = no_stringop;
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1523 /* The "default" register map used in 32bit mode. */
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1536 static int const x86_64_int_parameter_registers[6] =
1538 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1539 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1542 static int const x86_64_ms_abi_int_parameter_registers[4] =
1544 2 /*RCX*/, 1 /*RDX*/,
1545 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1548 static int const x86_64_int_return_registers[4] =
1550 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1553 /* The "default" register map used in 64bit mode. */
1554 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1556 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1557 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1558 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1559 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1560 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1561 8,9,10,11,12,13,14,15, /* extended integer registers */
1562 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1565 /* Define the register numbers to be used in Dwarf debugging information.
1566 The SVR4 reference port C compiler uses the following register numbers
1567 in its Dwarf output code:
1568 0 for %eax (gcc regno = 0)
1569 1 for %ecx (gcc regno = 2)
1570 2 for %edx (gcc regno = 1)
1571 3 for %ebx (gcc regno = 3)
1572 4 for %esp (gcc regno = 7)
1573 5 for %ebp (gcc regno = 6)
1574 6 for %esi (gcc regno = 4)
1575 7 for %edi (gcc regno = 5)
1576 The following three DWARF register numbers are never generated by
1577 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1578 believes these numbers have these meanings.
1579 8 for %eip (no gcc equivalent)
1580 9 for %eflags (gcc regno = 17)
1581 10 for %trapno (no gcc equivalent)
1582 It is not at all clear how we should number the FP stack registers
1583 for the x86 architecture. If the version of SDB on x86/svr4 were
1584 a bit less brain dead with respect to floating-point then we would
1585 have a precedent to follow with respect to DWARF register numbers
1586 for x86 FP registers, but the SDB on x86/svr4 is so completely
1587 broken with respect to FP registers that it is hardly worth thinking
1588 of it as something to strive for compatibility with.
1589 The version of x86/svr4 SDB I have at the moment does (partially)
1590 seem to believe that DWARF register number 11 is associated with
1591 the x86 register %st(0), but that's about all. Higher DWARF
1592 register numbers don't seem to be associated with anything in
1593 particular, and even for DWARF regno 11, SDB only seems to under-
1594 stand that it should say that a variable lives in %st(0) (when
1595 asked via an `=' command) if we said it was in DWARF regno 11,
1596 but SDB still prints garbage when asked for the value of the
1597 variable in question (via a `/' command).
1598 (Also note that the labels SDB prints for various FP stack regs
1599 when doing an `x' command are all wrong.)
1600 Note that these problems generally don't affect the native SVR4
1601 C compiler because it doesn't allow the use of -O with -g and
1602 because when it is *not* optimizing, it allocates a memory
1603 location for each floating-point variable, and the memory
1604 location is what gets described in the DWARF AT_location
1605 attribute for the variable in question.
1606 Regardless of the severe mental illness of the x86/svr4 SDB, we
1607 do something sensible here and we use the following DWARF
1608 register numbers. Note that these are all stack-top-relative
1610 11 for %st(0) (gcc regno = 8)
1611 12 for %st(1) (gcc regno = 9)
1612 13 for %st(2) (gcc regno = 10)
1613 14 for %st(3) (gcc regno = 11)
1614 15 for %st(4) (gcc regno = 12)
1615 16 for %st(5) (gcc regno = 13)
1616 17 for %st(6) (gcc regno = 14)
1617 18 for %st(7) (gcc regno = 15)
1619 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1621 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1622 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1623 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1624 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1625 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1626 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1630 /* Test and compare insns in i386.md store the information needed to
1631 generate branch and scc insns here. */
1633 rtx ix86_compare_op0 = NULL_RTX;
1634 rtx ix86_compare_op1 = NULL_RTX;
1635 rtx ix86_compare_emitted = NULL_RTX;
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1671 HOST_WIDE_INT frame;
1673 int outgoing_arguments_size;
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1697 /* Which cpu are we scheduling for. */
1698 enum attr_cpu ix86_schedule;
1700 /* Which cpu are we optimizing for. */
1701 enum processor_type ix86_tune;
1703 /* Which instruction set architecture to use. */
1704 enum processor_type ix86_arch;
1706 /* true if sse prefetch instruction is not NOOP. */
1707 int x86_prefetch_sse;
1709 /* ix86_regparm_string as a number */
1710 static int ix86_regparm;
1712 /* -mstackrealign option */
1713 extern int ix86_force_align_arg_pointer;
1714 static const char ix86_force_align_arg_pointer_string[]
1715 = "force_align_arg_pointer";
1717 static rtx (*ix86_gen_leave) (void);
1718 static rtx (*ix86_gen_pop1) (rtx);
1719 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1720 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1721 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1722 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1723 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1724 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1726 /* Preferred alignment for stack boundary in bits. */
1727 unsigned int ix86_preferred_stack_boundary;
1729 /* Alignment for incoming stack boundary in bits specified at
1731 static unsigned int ix86_user_incoming_stack_boundary;
1733 /* Default alignment for incoming stack boundary in bits. */
1734 static unsigned int ix86_default_incoming_stack_boundary;
1736 /* Alignment for incoming stack boundary in bits. */
1737 unsigned int ix86_incoming_stack_boundary;
1739 /* Values 1-5: see jump.c */
1740 int ix86_branch_cost;
1742 /* Calling abi specific va_list type nodes. */
1743 static GTY(()) tree sysv_va_list_type_node;
1744 static GTY(()) tree ms_va_list_type_node;
1746 /* Variables which are this size or smaller are put in the data/bss
1747 or ldata/lbss sections. */
1749 int ix86_section_threshold = 65536;
1751 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1752 char internal_label_prefix[16];
1753 int internal_label_prefix_len;
1755 /* Fence to use after loop using movnt. */
1758 /* Register class used for passing given 64bit part of the argument.
1759 These represent classes as documented by the PS ABI, with the exception
1760 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1761 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1763 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1764 whenever possible (upper half does contain padding). */
1765 enum x86_64_reg_class
1768 X86_64_INTEGER_CLASS,
1769 X86_64_INTEGERSI_CLASS,
1777 X86_64_COMPLEX_X87_CLASS,
1780 static const char * const x86_64_reg_class_name[] =
1782 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1783 "sseup", "x87", "x87up", "cplx87", "no"
1786 #define MAX_CLASSES 4
1788 /* Table of constants used by fldpi, fldln2, etc.... */
1789 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1790 static bool ext_80387_constants_init = 0;
1793 static struct machine_function * ix86_init_machine_status (void);
1794 static rtx ix86_function_value (const_tree, const_tree, bool);
1795 static int ix86_function_regparm (const_tree, const_tree);
1796 static void ix86_compute_frame_layout (struct ix86_frame *);
1797 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1799 static void ix86_add_new_builtins (int);
1801 enum ix86_function_specific_strings
1803 IX86_FUNCTION_SPECIFIC_ARCH,
1804 IX86_FUNCTION_SPECIFIC_TUNE,
1805 IX86_FUNCTION_SPECIFIC_FPMATH,
1806 IX86_FUNCTION_SPECIFIC_MAX
1809 static char *ix86_target_string (int, int, const char *, const char *,
1810 const char *, bool);
1811 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1812 static void ix86_function_specific_save (struct cl_target_option *);
1813 static void ix86_function_specific_restore (struct cl_target_option *);
1814 static void ix86_function_specific_print (FILE *, int,
1815 struct cl_target_option *);
1816 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1817 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1818 static bool ix86_can_inline_p (tree, tree);
1819 static void ix86_set_current_function (tree);
1822 /* The svr4 ABI for the i386 says that records and unions are returned
1824 #ifndef DEFAULT_PCC_STRUCT_RETURN
1825 #define DEFAULT_PCC_STRUCT_RETURN 1
1828 /* Whether -mtune= or -march= were specified */
1829 static int ix86_tune_defaulted;
1830 static int ix86_arch_specified;
1832 /* Bit flags that specify the ISA we are compiling for. */
1833 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1835 /* A mask of ix86_isa_flags that includes bit X if X
1836 was set or cleared on the command line. */
1837 static int ix86_isa_flags_explicit;
1839 /* Define a set of ISAs which are available when a given ISA is
1840 enabled. MMX and SSE ISAs are handled separately. */
1842 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1843 #define OPTION_MASK_ISA_3DNOW_SET \
1844 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1846 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1847 #define OPTION_MASK_ISA_SSE2_SET \
1848 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1849 #define OPTION_MASK_ISA_SSE3_SET \
1850 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1851 #define OPTION_MASK_ISA_SSSE3_SET \
1852 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1853 #define OPTION_MASK_ISA_SSE4_1_SET \
1854 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1855 #define OPTION_MASK_ISA_SSE4_2_SET \
1856 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1857 #define OPTION_MASK_ISA_AVX_SET \
1858 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1859 #define OPTION_MASK_ISA_FMA_SET \
1860 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1862 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1864 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1866 #define OPTION_MASK_ISA_SSE4A_SET \
1867 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1868 #define OPTION_MASK_ISA_SSE5_SET \
1869 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1871 /* AES and PCLMUL need SSE2 because they use xmm registers */
1872 #define OPTION_MASK_ISA_AES_SET \
1873 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1874 #define OPTION_MASK_ISA_PCLMUL_SET \
1875 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1877 #define OPTION_MASK_ISA_ABM_SET \
1878 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1879 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1880 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1881 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1883 /* Define a set of ISAs which aren't available when a given ISA is
1884 disabled. MMX and SSE ISAs are handled separately. */
1886 #define OPTION_MASK_ISA_MMX_UNSET \
1887 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1888 #define OPTION_MASK_ISA_3DNOW_UNSET \
1889 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1890 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1892 #define OPTION_MASK_ISA_SSE_UNSET \
1893 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1894 #define OPTION_MASK_ISA_SSE2_UNSET \
1895 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1896 #define OPTION_MASK_ISA_SSE3_UNSET \
1897 (OPTION_MASK_ISA_SSE3 \
1898 | OPTION_MASK_ISA_SSSE3_UNSET \
1899 | OPTION_MASK_ISA_SSE4A_UNSET )
1900 #define OPTION_MASK_ISA_SSSE3_UNSET \
1901 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1902 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1903 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1904 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1905 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1906 #define OPTION_MASK_ISA_AVX_UNSET \
1907 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1908 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1910 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1912 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1914 #define OPTION_MASK_ISA_SSE4A_UNSET \
1915 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1916 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1917 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1918 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1919 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1920 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1921 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1922 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1924 /* Vectorization library interface and handlers. */
1925 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1926 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1927 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1929 /* Processor target table, indexed by processor number */
1932 const struct processor_costs *cost; /* Processor costs */
1933 const int align_loop; /* Default alignments. */
1934 const int align_loop_max_skip;
1935 const int align_jump;
1936 const int align_jump_max_skip;
1937 const int align_func;
1940 static const struct ptt processor_target_table[PROCESSOR_max] =
1942 {&i386_cost, 4, 3, 4, 3, 4},
1943 {&i486_cost, 16, 15, 16, 15, 16},
1944 {&pentium_cost, 16, 7, 16, 7, 16},
1945 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1946 {&geode_cost, 0, 0, 0, 0, 0},
1947 {&k6_cost, 32, 7, 32, 7, 32},
1948 {&athlon_cost, 16, 7, 16, 7, 16},
1949 {&pentium4_cost, 0, 0, 0, 0, 0},
1950 {&k8_cost, 16, 7, 16, 7, 16},
1951 {&nocona_cost, 0, 0, 0, 0, 0},
1952 {&core2_cost, 16, 10, 16, 10, 16},
1953 {&generic32_cost, 16, 7, 16, 7, 16},
1954 {&generic64_cost, 16, 10, 16, 10, 16},
1955 {&amdfam10_cost, 32, 24, 32, 7, 32}
1958 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1983 /* Implement TARGET_HANDLE_OPTION. */
1986 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1993 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1994 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1998 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1999 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2006 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2007 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2011 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2012 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2022 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2027 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2028 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2035 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2036 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2040 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2041 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2048 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2049 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2053 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2054 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2061 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2062 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2066 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2067 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2074 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2075 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2079 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2080 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2087 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2113 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2114 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2118 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2124 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2129 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2136 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2141 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2149 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2154 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2162 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2167 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2175 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2180 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2188 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2193 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2201 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2206 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2214 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2219 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2220 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2242 /* Return a string the documents the current -m options. The caller is
2243 responsible for freeing the string. */
2246 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2247 const char *fpmath, bool add_nl_p)
2249 struct ix86_target_opts
2251 const char *option; /* option string */
2252 int mask; /* isa mask options */
2255 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2256 preceding options while match those first. */
2257 static struct ix86_target_opts isa_opts[] =
2259 { "-m64", OPTION_MASK_ISA_64BIT },
2260 { "-msse5", OPTION_MASK_ISA_SSE5 },
2261 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2262 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2263 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2264 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2265 { "-msse3", OPTION_MASK_ISA_SSE3 },
2266 { "-msse2", OPTION_MASK_ISA_SSE2 },
2267 { "-msse", OPTION_MASK_ISA_SSE },
2268 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2269 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2270 { "-mmmx", OPTION_MASK_ISA_MMX },
2271 { "-mabm", OPTION_MASK_ISA_ABM },
2272 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2273 { "-maes", OPTION_MASK_ISA_AES },
2274 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2278 static struct ix86_target_opts flag_opts[] =
2280 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2281 { "-m80387", MASK_80387 },
2282 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2283 { "-malign-double", MASK_ALIGN_DOUBLE },
2284 { "-mcld", MASK_CLD },
2285 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2286 { "-mieee-fp", MASK_IEEE_FP },
2287 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2288 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2289 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2290 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2291 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2292 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2293 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2294 { "-mno-red-zone", MASK_NO_RED_ZONE },
2295 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2296 { "-mrecip", MASK_RECIP },
2297 { "-mrtd", MASK_RTD },
2298 { "-msseregparm", MASK_SSEREGPARM },
2299 { "-mstack-arg-probe", MASK_STACK_PROBE },
2300 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2303 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2304 + sizeof (flag_opts) / sizeof (flag_opts[0])
2308 char target_other[40];
2317 memset (opts, '\0', sizeof (opts));
2319 /* Add -march= option. */
2322 opts[num][0] = "-march=";
2323 opts[num++][1] = arch;
2326 /* Add -mtune= option. */
2329 opts[num][0] = "-mtune=";
2330 opts[num++][1] = tune;
2333 /* Pick out the options in isa options. */
2334 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2336 if ((isa & isa_opts[i].mask) != 0)
2338 opts[num++][0] = isa_opts[i].option;
2339 isa &= ~ isa_opts[i].mask;
2343 if (isa && add_nl_p)
2345 opts[num++][0] = isa_other;
2346 sprintf (isa_other, "(other isa: 0x%x)", isa);
2349 /* Add flag options. */
2350 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2352 if ((flags & flag_opts[i].mask) != 0)
2354 opts[num++][0] = flag_opts[i].option;
2355 flags &= ~ flag_opts[i].mask;
2359 if (flags && add_nl_p)
2361 opts[num++][0] = target_other;
2362 sprintf (target_other, "(other flags: 0x%x)", isa);
2365 /* Add -fpmath= option. */
2368 opts[num][0] = "-mfpmath=";
2369 opts[num++][1] = fpmath;
2376 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2378 /* Size the string. */
2380 sep_len = (add_nl_p) ? 3 : 1;
2381 for (i = 0; i < num; i++)
2384 for (j = 0; j < 2; j++)
2386 len += strlen (opts[i][j]);
2389 /* Build the string. */
2390 ret = ptr = (char *) xmalloc (len);
2393 for (i = 0; i < num; i++)
2397 for (j = 0; j < 2; j++)
2398 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2405 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2413 for (j = 0; j < 2; j++)
2416 memcpy (ptr, opts[i][j], len2[j]);
2418 line_len += len2[j];
2423 gcc_assert (ret + len >= ptr);
2428 /* Function that is callable from the debugger to print the current
2431 ix86_debug_options (void)
2433 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2434 ix86_arch_string, ix86_tune_string,
2435 ix86_fpmath_string, true);
2439 fprintf (stderr, "%s\n\n", opts);
2443 fprintf (stderr, "<no options>\n\n");
2448 /* Sometimes certain combinations of command options do not make
2449 sense on a particular target machine. You can define a macro
2450 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2451 defined, is executed once just after all the command options have
2454 Don't use this macro to turn on various extra optimizations for
2455 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2458 override_options (bool main_args_p)
2461 unsigned int ix86_arch_mask, ix86_tune_mask;
2466 /* Comes from final.c -- no real reason to change it. */
2467 #define MAX_CODE_ALIGN 16
2475 PTA_PREFETCH_SSE = 1 << 4,
2477 PTA_3DNOW_A = 1 << 6,
2481 PTA_POPCNT = 1 << 10,
2483 PTA_SSE4A = 1 << 12,
2484 PTA_NO_SAHF = 1 << 13,
2485 PTA_SSE4_1 = 1 << 14,
2486 PTA_SSE4_2 = 1 << 15,
2489 PTA_PCLMUL = 1 << 18,
2496 const char *const name; /* processor name or nickname. */
2497 const enum processor_type processor;
2498 const enum attr_cpu schedule;
2499 const unsigned /*enum pta_flags*/ flags;
2501 const processor_alias_table[] =
2503 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2504 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2505 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2506 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2507 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2508 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2509 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2510 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2511 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2512 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2513 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2514 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2515 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2517 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2519 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2520 PTA_MMX | PTA_SSE | PTA_SSE2},
2521 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2522 PTA_MMX |PTA_SSE | PTA_SSE2},
2523 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2524 PTA_MMX | PTA_SSE | PTA_SSE2},
2525 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2526 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2527 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2528 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2529 | PTA_CX16 | PTA_NO_SAHF},
2530 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2531 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2532 | PTA_SSSE3 | PTA_CX16},
2533 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2534 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2535 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2536 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2537 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2538 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2539 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2540 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2541 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2542 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2544 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2546 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2547 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2548 {"x86-64", PROCESSOR_K8, CPU_K8,
2549 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2550 {"k8", PROCESSOR_K8, CPU_K8,
2551 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2552 | PTA_SSE2 | PTA_NO_SAHF},
2553 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2554 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2555 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2556 {"opteron", PROCESSOR_K8, CPU_K8,
2557 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2558 | PTA_SSE2 | PTA_NO_SAHF},
2559 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2560 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2561 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2562 {"athlon64", PROCESSOR_K8, CPU_K8,
2563 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2564 | PTA_SSE2 | PTA_NO_SAHF},
2565 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2566 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2567 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2568 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2570 | PTA_SSE2 | PTA_NO_SAHF},
2571 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2573 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2574 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2576 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2577 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2578 0 /* flags are only used for -march switch. */ },
2579 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2580 PTA_64BIT /* flags are only used for -march switch. */ },
2583 int const pta_size = ARRAY_SIZE (processor_alias_table);
2585 /* Set up prefix/suffix so the error messages refer to either the command
2586 line argument, or the attribute(target). */
2595 prefix = "option(\"";
2600 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2601 SUBTARGET_OVERRIDE_OPTIONS;
2604 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2605 SUBSUBTARGET_OVERRIDE_OPTIONS;
2608 /* -fPIC is the default for x86_64. */
2609 if (TARGET_MACHO && TARGET_64BIT)
2612 /* Set the default values for switches whose default depends on TARGET_64BIT
2613 in case they weren't overwritten by command line options. */
2616 /* Mach-O doesn't support omitting the frame pointer for now. */
2617 if (flag_omit_frame_pointer == 2)
2618 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2619 if (flag_asynchronous_unwind_tables == 2)
2620 flag_asynchronous_unwind_tables = 1;
2621 if (flag_pcc_struct_return == 2)
2622 flag_pcc_struct_return = 0;
2626 if (flag_omit_frame_pointer == 2)
2627 flag_omit_frame_pointer = 0;
2628 if (flag_asynchronous_unwind_tables == 2)
2629 flag_asynchronous_unwind_tables = 0;
2630 if (flag_pcc_struct_return == 2)
2631 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2634 /* Need to check -mtune=generic first. */
2635 if (ix86_tune_string)
2637 if (!strcmp (ix86_tune_string, "generic")
2638 || !strcmp (ix86_tune_string, "i686")
2639 /* As special support for cross compilers we read -mtune=native
2640 as -mtune=generic. With native compilers we won't see the
2641 -mtune=native, as it was changed by the driver. */
2642 || !strcmp (ix86_tune_string, "native"))
2645 ix86_tune_string = "generic64";
2647 ix86_tune_string = "generic32";
2649 /* If this call is for setting the option attribute, allow the
2650 generic32/generic64 that was previously set. */
2651 else if (!main_args_p
2652 && (!strcmp (ix86_tune_string, "generic32")
2653 || !strcmp (ix86_tune_string, "generic64")))
2655 else if (!strncmp (ix86_tune_string, "generic", 7))
2656 error ("bad value (%s) for %stune=%s %s",
2657 ix86_tune_string, prefix, suffix, sw);
2661 if (ix86_arch_string)
2662 ix86_tune_string = ix86_arch_string;
2663 if (!ix86_tune_string)
2665 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2666 ix86_tune_defaulted = 1;
2669 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2670 need to use a sensible tune option. */
2671 if (!strcmp (ix86_tune_string, "generic")
2672 || !strcmp (ix86_tune_string, "x86-64")
2673 || !strcmp (ix86_tune_string, "i686"))
2676 ix86_tune_string = "generic64";
2678 ix86_tune_string = "generic32";
2681 if (ix86_stringop_string)
2683 if (!strcmp (ix86_stringop_string, "rep_byte"))
2684 stringop_alg = rep_prefix_1_byte;
2685 else if (!strcmp (ix86_stringop_string, "libcall"))
2686 stringop_alg = libcall;
2687 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2688 stringop_alg = rep_prefix_4_byte;
2689 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2690 stringop_alg = rep_prefix_8_byte;
2691 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2692 stringop_alg = loop_1_byte;
2693 else if (!strcmp (ix86_stringop_string, "loop"))
2694 stringop_alg = loop;
2695 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2696 stringop_alg = unrolled_loop;
2698 error ("bad value (%s) for %sstringop-strategy=%s %s",
2699 ix86_stringop_string, prefix, suffix, sw);
2701 if (!strcmp (ix86_tune_string, "x86-64"))
2702 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2703 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2704 prefix, suffix, prefix, suffix, prefix, suffix);
2706 if (!ix86_arch_string)
2707 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2709 ix86_arch_specified = 1;
2711 if (!strcmp (ix86_arch_string, "generic"))
2712 error ("generic CPU can be used only for %stune=%s %s",
2713 prefix, suffix, sw);
2714 if (!strncmp (ix86_arch_string, "generic", 7))
2715 error ("bad value (%s) for %sarch=%s %s",
2716 ix86_arch_string, prefix, suffix, sw);
2718 if (ix86_cmodel_string != 0)
2720 if (!strcmp (ix86_cmodel_string, "small"))
2721 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2722 else if (!strcmp (ix86_cmodel_string, "medium"))
2723 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2724 else if (!strcmp (ix86_cmodel_string, "large"))
2725 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2727 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2728 else if (!strcmp (ix86_cmodel_string, "32"))
2729 ix86_cmodel = CM_32;
2730 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2731 ix86_cmodel = CM_KERNEL;
2733 error ("bad value (%s) for %scmodel=%s %s",
2734 ix86_cmodel_string, prefix, suffix, sw);
2738 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2739 use of rip-relative addressing. This eliminates fixups that
2740 would otherwise be needed if this object is to be placed in a
2741 DLL, and is essentially just as efficient as direct addressing. */
2742 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2743 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2744 else if (TARGET_64BIT)
2745 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2747 ix86_cmodel = CM_32;
2749 if (ix86_asm_string != 0)
2752 && !strcmp (ix86_asm_string, "intel"))
2753 ix86_asm_dialect = ASM_INTEL;
2754 else if (!strcmp (ix86_asm_string, "att"))
2755 ix86_asm_dialect = ASM_ATT;
2757 error ("bad value (%s) for %sasm=%s %s",
2758 ix86_asm_string, prefix, suffix, sw);
2760 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2761 error ("code model %qs not supported in the %s bit mode",
2762 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2763 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2764 sorry ("%i-bit mode not compiled in",
2765 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2767 for (i = 0; i < pta_size; i++)
2768 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2770 ix86_schedule = processor_alias_table[i].schedule;
2771 ix86_arch = processor_alias_table[i].processor;
2772 /* Default cpu tuning to the architecture. */
2773 ix86_tune = ix86_arch;
2775 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2776 error ("CPU you selected does not support x86-64 "
2779 if (processor_alias_table[i].flags & PTA_MMX
2780 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2781 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2782 if (processor_alias_table[i].flags & PTA_3DNOW
2783 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2784 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2785 if (processor_alias_table[i].flags & PTA_3DNOW_A
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2787 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2788 if (processor_alias_table[i].flags & PTA_SSE
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2790 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2791 if (processor_alias_table[i].flags & PTA_SSE2
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2793 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2794 if (processor_alias_table[i].flags & PTA_SSE3
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2797 if (processor_alias_table[i].flags & PTA_SSSE3
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2800 if (processor_alias_table[i].flags & PTA_SSE4_1
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2803 if (processor_alias_table[i].flags & PTA_SSE4_2
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2805 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2806 if (processor_alias_table[i].flags & PTA_AVX
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2808 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2809 if (processor_alias_table[i].flags & PTA_FMA
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2811 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2812 if (processor_alias_table[i].flags & PTA_SSE4A
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2814 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2815 if (processor_alias_table[i].flags & PTA_SSE5
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2817 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2818 if (processor_alias_table[i].flags & PTA_ABM
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2820 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2821 if (processor_alias_table[i].flags & PTA_CX16
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2823 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2824 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2826 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2827 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2829 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2830 if (processor_alias_table[i].flags & PTA_AES
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2832 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2833 if (processor_alias_table[i].flags & PTA_PCLMUL
2834 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2835 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2836 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2837 x86_prefetch_sse = true;
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 ix86_arch_mask = 1u << ix86_arch;
2847 for (i = 0; i < X86_ARCH_LAST; ++i)
2848 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2850 for (i = 0; i < pta_size; i++)
2851 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2853 ix86_schedule = processor_alias_table[i].schedule;
2854 ix86_tune = processor_alias_table[i].processor;
2855 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2857 if (ix86_tune_defaulted)
2859 ix86_tune_string = "x86-64";
2860 for (i = 0; i < pta_size; i++)
2861 if (! strcmp (ix86_tune_string,
2862 processor_alias_table[i].name))
2864 ix86_schedule = processor_alias_table[i].schedule;
2865 ix86_tune = processor_alias_table[i].processor;
2868 error ("CPU you selected does not support x86-64 "
2871 /* Intel CPUs have always interpreted SSE prefetch instructions as
2872 NOPs; so, we can enable SSE prefetch instructions even when
2873 -mtune (rather than -march) points us to a processor that has them.
2874 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2875 higher processors. */
2877 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2878 x86_prefetch_sse = true;
2882 error ("bad value (%s) for %stune=%s %s",
2883 ix86_tune_string, prefix, suffix, sw);
2885 ix86_tune_mask = 1u << ix86_tune;
2886 for (i = 0; i < X86_TUNE_LAST; ++i)
2887 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2890 ix86_cost = &ix86_size_cost;
2892 ix86_cost = processor_target_table[ix86_tune].cost;
2894 /* Arrange to set up i386_stack_locals for all functions. */
2895 init_machine_status = ix86_init_machine_status;
2897 /* Validate -mregparm= value. */
2898 if (ix86_regparm_string)
2901 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2902 i = atoi (ix86_regparm_string);
2903 if (i < 0 || i > REGPARM_MAX)
2904 error ("%sregparm=%d%s is not between 0 and %d",
2905 prefix, i, suffix, REGPARM_MAX);
2910 ix86_regparm = REGPARM_MAX;
2912 /* If the user has provided any of the -malign-* options,
2913 warn and use that value only if -falign-* is not set.
2914 Remove this code in GCC 3.2 or later. */
2915 if (ix86_align_loops_string)
2917 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2918 prefix, suffix, prefix, suffix);
2919 if (align_loops == 0)
2921 i = atoi (ix86_align_loops_string);
2922 if (i < 0 || i > MAX_CODE_ALIGN)
2923 error ("%salign-loops=%d%s is not between 0 and %d",
2924 prefix, i, suffix, MAX_CODE_ALIGN);
2926 align_loops = 1 << i;
2930 if (ix86_align_jumps_string)
2932 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2933 prefix, suffix, prefix, suffix);
2934 if (align_jumps == 0)
2936 i = atoi (ix86_align_jumps_string);
2937 if (i < 0 || i > MAX_CODE_ALIGN)
2938 error ("%salign-loops=%d%s is not between 0 and %d",
2939 prefix, i, suffix, MAX_CODE_ALIGN);
2941 align_jumps = 1 << i;
2945 if (ix86_align_funcs_string)
2947 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2948 prefix, suffix, prefix, suffix);
2949 if (align_functions == 0)
2951 i = atoi (ix86_align_funcs_string);
2952 if (i < 0 || i > MAX_CODE_ALIGN)
2953 error ("%salign-loops=%d%s is not between 0 and %d",
2954 prefix, i, suffix, MAX_CODE_ALIGN);
2956 align_functions = 1 << i;
2960 /* Default align_* from the processor table. */
2961 if (align_loops == 0)
2963 align_loops = processor_target_table[ix86_tune].align_loop;
2964 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2966 if (align_jumps == 0)
2968 align_jumps = processor_target_table[ix86_tune].align_jump;
2969 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2971 if (align_functions == 0)
2973 align_functions = processor_target_table[ix86_tune].align_func;
2976 /* Validate -mbranch-cost= value, or provide default. */
2977 ix86_branch_cost = ix86_cost->branch_cost;
2978 if (ix86_branch_cost_string)
2980 i = atoi (ix86_branch_cost_string);
2982 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2984 ix86_branch_cost = i;
2986 if (ix86_section_threshold_string)
2988 i = atoi (ix86_section_threshold_string);
2990 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2992 ix86_section_threshold = i;
2995 if (ix86_tls_dialect_string)
2997 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2998 ix86_tls_dialect = TLS_DIALECT_GNU;
2999 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3000 ix86_tls_dialect = TLS_DIALECT_GNU2;
3001 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3002 ix86_tls_dialect = TLS_DIALECT_SUN;
3004 error ("bad value (%s) for %stls-dialect=%s %s",
3005 ix86_tls_dialect_string, prefix, suffix, sw);
3008 if (ix87_precision_string)
3010 i = atoi (ix87_precision_string);
3011 if (i != 32 && i != 64 && i != 80)
3012 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3017 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3019 /* Enable by default the SSE and MMX builtins. Do allow the user to
3020 explicitly disable any of these. In particular, disabling SSE and
3021 MMX for kernel code is extremely useful. */
3022 if (!ix86_arch_specified)
3024 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3025 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3028 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3032 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3034 if (!ix86_arch_specified)
3036 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3038 /* i386 ABI does not specify red zone. It still makes sense to use it
3039 when programmer takes care to stack from being destroyed. */
3040 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3041 target_flags |= MASK_NO_RED_ZONE;
3044 /* Keep nonleaf frame pointers. */
3045 if (flag_omit_frame_pointer)
3046 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3047 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3048 flag_omit_frame_pointer = 1;
3050 /* If we're doing fast math, we don't care about comparison order
3051 wrt NaNs. This lets us use a shorter comparison sequence. */
3052 if (flag_finite_math_only)
3053 target_flags &= ~MASK_IEEE_FP;
3055 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3056 since the insns won't need emulation. */
3057 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3058 target_flags &= ~MASK_NO_FANCY_MATH_387;
3060 /* Likewise, if the target doesn't have a 387, or we've specified
3061 software floating point, don't use 387 inline intrinsics. */
3063 target_flags |= MASK_NO_FANCY_MATH_387;
3065 /* Turn on MMX builtins for -msse. */
3068 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3069 x86_prefetch_sse = true;
3072 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3073 if (TARGET_SSE4_2 || TARGET_ABM)
3074 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3076 /* Validate -mpreferred-stack-boundary= value or default it to
3077 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3078 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3079 if (ix86_preferred_stack_boundary_string)
3081 i = atoi (ix86_preferred_stack_boundary_string);
3082 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3083 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3084 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3086 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3089 /* Set the default value for -mstackrealign. */
3090 if (ix86_force_align_arg_pointer == -1)
3091 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3093 /* Validate -mincoming-stack-boundary= value or default it to
3094 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3095 if (ix86_force_align_arg_pointer)
3096 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3098 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3099 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3100 if (ix86_incoming_stack_boundary_string)
3102 i = atoi (ix86_incoming_stack_boundary_string);
3103 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3104 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3105 i, TARGET_64BIT ? 4 : 2);
3108 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3109 ix86_incoming_stack_boundary
3110 = ix86_user_incoming_stack_boundary;
3114 /* Accept -msseregparm only if at least SSE support is enabled. */
3115 if (TARGET_SSEREGPARM
3117 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3119 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3120 if (ix86_fpmath_string != 0)
3122 if (! strcmp (ix86_fpmath_string, "387"))
3123 ix86_fpmath = FPMATH_387;
3124 else if (! strcmp (ix86_fpmath_string, "sse"))
3128 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3129 ix86_fpmath = FPMATH_387;
3132 ix86_fpmath = FPMATH_SSE;
3134 else if (! strcmp (ix86_fpmath_string, "387,sse")
3135 || ! strcmp (ix86_fpmath_string, "387+sse")
3136 || ! strcmp (ix86_fpmath_string, "sse,387")
3137 || ! strcmp (ix86_fpmath_string, "sse+387")
3138 || ! strcmp (ix86_fpmath_string, "both"))
3142 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3143 ix86_fpmath = FPMATH_387;
3145 else if (!TARGET_80387)
3147 warning (0, "387 instruction set disabled, using SSE arithmetics");
3148 ix86_fpmath = FPMATH_SSE;
3151 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3154 error ("bad value (%s) for %sfpmath=%s %s",
3155 ix86_fpmath_string, prefix, suffix, sw);
3158 /* If the i387 is disabled, then do not return values in it. */
3160 target_flags &= ~MASK_FLOAT_RETURNS;
3162 /* Use external vectorized library in vectorizing intrinsics. */
3163 if (ix86_veclibabi_string)
3165 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3166 ix86_veclib_handler = ix86_veclibabi_svml;
3167 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3168 ix86_veclib_handler = ix86_veclibabi_acml;
3170 error ("unknown vectorization library ABI type (%s) for "
3171 "%sveclibabi=%s %s", ix86_veclibabi_string,
3172 prefix, suffix, sw);
3175 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3176 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3178 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3180 /* ??? Unwind info is not correct around the CFG unless either a frame
3181 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3182 unwind info generation to be aware of the CFG and propagating states
3184 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3185 || flag_exceptions || flag_non_call_exceptions)
3186 && flag_omit_frame_pointer
3187 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3189 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3190 warning (0, "unwind tables currently require either a frame pointer "
3191 "or %saccumulate-outgoing-args%s for correctness",
3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3196 /* If stack probes are required, the space used for large function
3197 arguments on the stack must also be probed, so enable
3198 -maccumulate-outgoing-args so this happens in the prologue. */
3199 if (TARGET_STACK_PROBE
3200 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3202 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3203 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3204 "for correctness", prefix, suffix);
3205 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3208 /* For sane SSE instruction set generation we need fcomi instruction.
3209 It is safe to enable all CMOVE instructions. */
3213 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3216 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3217 p = strchr (internal_label_prefix, 'X');
3218 internal_label_prefix_len = p - internal_label_prefix;
3222 /* When scheduling description is not available, disable scheduler pass
3223 so it won't slow down the compilation and make x87 code slower. */
3224 if (!TARGET_SCHEDULE)
3225 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3227 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3228 set_param_value ("simultaneous-prefetches",
3229 ix86_cost->simultaneous_prefetches);
3230 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3231 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3232 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3233 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3234 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3235 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3237 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3238 can be optimized to ap = __builtin_next_arg (0). */
3240 targetm.expand_builtin_va_start = NULL;
3244 ix86_gen_leave = gen_leave_rex64;
3245 ix86_gen_pop1 = gen_popdi1;
3246 ix86_gen_add3 = gen_adddi3;
3247 ix86_gen_sub3 = gen_subdi3;
3248 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3249 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3250 ix86_gen_monitor = gen_sse3_monitor64;
3251 ix86_gen_andsp = gen_anddi3;
3255 ix86_gen_leave = gen_leave;
3256 ix86_gen_pop1 = gen_popsi1;
3257 ix86_gen_add3 = gen_addsi3;
3258 ix86_gen_sub3 = gen_subsi3;
3259 ix86_gen_sub3_carry = gen_subsi3_carry;
3260 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3261 ix86_gen_monitor = gen_sse3_monitor;
3262 ix86_gen_andsp = gen_andsi3;
3266 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3268 target_flags |= MASK_CLD & ~target_flags_explicit;
3271 /* Save the initial options in case the user does function specific options */
3273 target_option_default_node = target_option_current_node
3274 = build_target_option_node ();
3277 /* Save the current options */
3280 ix86_function_specific_save (struct cl_target_option *ptr)
3282 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3283 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3284 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3285 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3286 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3288 ptr->arch = ix86_arch;
3289 ptr->schedule = ix86_schedule;
3290 ptr->tune = ix86_tune;
3291 ptr->fpmath = ix86_fpmath;
3292 ptr->branch_cost = ix86_branch_cost;
3293 ptr->tune_defaulted = ix86_tune_defaulted;
3294 ptr->arch_specified = ix86_arch_specified;
3295 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3296 ptr->target_flags_explicit = target_flags_explicit;
3299 /* Restore the current options */
3302 ix86_function_specific_restore (struct cl_target_option *ptr)
3304 enum processor_type old_tune = ix86_tune;
3305 enum processor_type old_arch = ix86_arch;
3306 unsigned int ix86_arch_mask, ix86_tune_mask;
3309 ix86_arch = ptr->arch;
3310 ix86_schedule = ptr->schedule;
3311 ix86_tune = ptr->tune;
3312 ix86_fpmath = ptr->fpmath;
3313 ix86_branch_cost = ptr->branch_cost;
3314 ix86_tune_defaulted = ptr->tune_defaulted;
3315 ix86_arch_specified = ptr->arch_specified;
3316 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3317 target_flags_explicit = ptr->target_flags_explicit;
3319 /* Recreate the arch feature tests if the arch changed */
3320 if (old_arch != ix86_arch)
3322 ix86_arch_mask = 1u << ix86_arch;
3323 for (i = 0; i < X86_ARCH_LAST; ++i)
3324 ix86_arch_features[i]
3325 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3328 /* Recreate the tune optimization tests */
3329 if (old_tune != ix86_tune)
3331 ix86_tune_mask = 1u << ix86_tune;
3332 for (i = 0; i < X86_TUNE_LAST; ++i)
3333 ix86_tune_features[i]
3334 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3338 /* Print the current options */
3341 ix86_function_specific_print (FILE *file, int indent,
3342 struct cl_target_option *ptr)
3345 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3346 NULL, NULL, NULL, false);
3348 fprintf (file, "%*sarch = %d (%s)\n",
3351 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3352 ? cpu_names[ptr->arch]
3355 fprintf (file, "%*stune = %d (%s)\n",
3358 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3359 ? cpu_names[ptr->tune]
3362 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3363 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3364 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3365 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3369 fprintf (file, "%*s%s\n", indent, "", target_string);
3370 free (target_string);
3375 /* Inner function to process the attribute((target(...))), take an argument and
3376 set the current options from the argument. If we have a list, recursively go
3380 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3385 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3386 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3387 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3388 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3403 enum ix86_opt_type type;
3408 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3409 IX86_ATTR_ISA ("abm", OPT_mabm),
3410 IX86_ATTR_ISA ("aes", OPT_maes),
3411 IX86_ATTR_ISA ("avx", OPT_mavx),
3412 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3413 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3414 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3415 IX86_ATTR_ISA ("sse", OPT_msse),
3416 IX86_ATTR_ISA ("sse2", OPT_msse2),
3417 IX86_ATTR_ISA ("sse3", OPT_msse3),
3418 IX86_ATTR_ISA ("sse4", OPT_msse4),
3419 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3420 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3421 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3422 IX86_ATTR_ISA ("sse5", OPT_msse5),
3423 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3425 /* string options */
3426 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3427 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3428 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3431 IX86_ATTR_YES ("cld",
3435 IX86_ATTR_NO ("fancy-math-387",
3436 OPT_mfancy_math_387,
3437 MASK_NO_FANCY_MATH_387),
3439 IX86_ATTR_NO ("fused-madd",
3441 MASK_NO_FUSED_MADD),
3443 IX86_ATTR_YES ("ieee-fp",
3447 IX86_ATTR_YES ("inline-all-stringops",
3448 OPT_minline_all_stringops,
3449 MASK_INLINE_ALL_STRINGOPS),
3451 IX86_ATTR_YES ("inline-stringops-dynamically",
3452 OPT_minline_stringops_dynamically,
3453 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3455 IX86_ATTR_NO ("align-stringops",
3456 OPT_mno_align_stringops,
3457 MASK_NO_ALIGN_STRINGOPS),
3459 IX86_ATTR_YES ("recip",
3465 /* If this is a list, recurse to get the options. */
3466 if (TREE_CODE (args) == TREE_LIST)
3470 for (; args; args = TREE_CHAIN (args))
3471 if (TREE_VALUE (args)
3472 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3478 else if (TREE_CODE (args) != STRING_CST)
3481 /* Handle multiple arguments separated by commas. */
3482 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3484 while (next_optstr && *next_optstr != '\0')
3486 char *p = next_optstr;
3488 char *comma = strchr (next_optstr, ',');
3489 const char *opt_string;
3490 size_t len, opt_len;
3495 enum ix86_opt_type type = ix86_opt_unknown;
3501 len = comma - next_optstr;
3502 next_optstr = comma + 1;
3510 /* Recognize no-xxx. */
3511 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3520 /* Find the option. */
3523 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3525 type = attrs[i].type;
3526 opt_len = attrs[i].len;
3527 if (ch == attrs[i].string[0]
3528 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3529 && memcmp (p, attrs[i].string, opt_len) == 0)
3532 mask = attrs[i].mask;
3533 opt_string = attrs[i].string;
3538 /* Process the option. */
3541 error ("attribute(target(\"%s\")) is unknown", orig_p);
3545 else if (type == ix86_opt_isa)
3546 ix86_handle_option (opt, p, opt_set_p);
3548 else if (type == ix86_opt_yes || type == ix86_opt_no)
3550 if (type == ix86_opt_no)
3551 opt_set_p = !opt_set_p;
3554 target_flags |= mask;
3556 target_flags &= ~mask;
3559 else if (type == ix86_opt_str)
3563 error ("option(\"%s\") was already specified", opt_string);
3567 p_strings[opt] = xstrdup (p + opt_len);
3577 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3580 ix86_valid_target_attribute_tree (tree args)
3582 const char *orig_arch_string = ix86_arch_string;
3583 const char *orig_tune_string = ix86_tune_string;
3584 const char *orig_fpmath_string = ix86_fpmath_string;
3585 int orig_tune_defaulted = ix86_tune_defaulted;
3586 int orig_arch_specified = ix86_arch_specified;
3587 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3590 struct cl_target_option *def
3591 = TREE_TARGET_OPTION (target_option_default_node);
3593 /* Process each of the options on the chain. */
3594 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3597 /* If the changed options are different from the default, rerun override_options,
3598 and then save the options away. The string options are are attribute options,
3599 and will be undone when we copy the save structure. */
3600 if (ix86_isa_flags != def->ix86_isa_flags
3601 || target_flags != def->target_flags
3602 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3603 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3604 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3606 /* If we are using the default tune= or arch=, undo the string assigned,
3607 and use the default. */
3608 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3609 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3610 else if (!orig_arch_specified)
3611 ix86_arch_string = NULL;
3613 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3614 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3615 else if (orig_tune_defaulted)
3616 ix86_tune_string = NULL;
3618 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3619 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3620 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3621 else if (!TARGET_64BIT && TARGET_SSE)
3622 ix86_fpmath_string = "sse,387";
3624 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3625 override_options (false);
3627 /* Add any builtin functions with the new isa if any. */
3628 ix86_add_new_builtins (ix86_isa_flags);
3630 /* Save the current options unless we are validating options for
3632 t = build_target_option_node ();
3634 ix86_arch_string = orig_arch_string;
3635 ix86_tune_string = orig_tune_string;
3636 ix86_fpmath_string = orig_fpmath_string;
3638 /* Free up memory allocated to hold the strings */
3639 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3640 if (option_strings[i])
3641 free (option_strings[i]);
3647 /* Hook to validate attribute((target("string"))). */
3650 ix86_valid_target_attribute_p (tree fndecl,
3651 tree ARG_UNUSED (name),
3653 int ARG_UNUSED (flags))
3655 struct cl_target_option cur_target;
3657 tree old_optimize = build_optimization_node ();
3658 tree new_target, new_optimize;
3659 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3661 /* If the function changed the optimization levels as well as setting target
3662 options, start with the optimizations specified. */
3663 if (func_optimize && func_optimize != old_optimize)
3664 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3666 /* The target attributes may also change some optimization flags, so update
3667 the optimization options if necessary. */
3668 cl_target_option_save (&cur_target);
3669 new_target = ix86_valid_target_attribute_tree (args);
3670 new_optimize = build_optimization_node ();
3677 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3679 if (old_optimize != new_optimize)
3680 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3683 cl_target_option_restore (&cur_target);
3685 if (old_optimize != new_optimize)
3686 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3692 /* Hook to determine if one function can safely inline another. */
3695 ix86_can_inline_p (tree caller, tree callee)
3698 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3699 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3701 /* If callee has no option attributes, then it is ok to inline. */
3705 /* If caller has no option attributes, but callee does then it is not ok to
3707 else if (!caller_tree)
3712 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3713 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3715 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3716 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3718 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3719 != callee_opts->ix86_isa_flags)
3722 /* See if we have the same non-isa options. */
3723 else if (caller_opts->target_flags != callee_opts->target_flags)
3726 /* See if arch, tune, etc. are the same. */
3727 else if (caller_opts->arch != callee_opts->arch)
3730 else if (caller_opts->tune != callee_opts->tune)
3733 else if (caller_opts->fpmath != callee_opts->fpmath)
3736 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3747 /* Remember the last target of ix86_set_current_function. */
3748 static GTY(()) tree ix86_previous_fndecl;
3750 /* Establish appropriate back-end context for processing the function
3751 FNDECL. The argument might be NULL to indicate processing at top
3752 level, outside of any function scope. */
3754 ix86_set_current_function (tree fndecl)
3756 /* Only change the context if the function changes. This hook is called
3757 several times in the course of compiling a function, and we don't want to
3758 slow things down too much or call target_reinit when it isn't safe. */
3759 if (fndecl && fndecl != ix86_previous_fndecl)
3761 tree old_tree = (ix86_previous_fndecl
3762 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3765 tree new_tree = (fndecl
3766 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3769 ix86_previous_fndecl = fndecl;
3770 if (old_tree == new_tree)
3775 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3781 struct cl_target_option *def
3782 = TREE_TARGET_OPTION (target_option_current_node);
3784 cl_target_option_restore (def);
3791 /* Return true if this goes in large data/bss. */
3794 ix86_in_large_data_p (tree exp)
3796 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3799 /* Functions are never large data. */
3800 if (TREE_CODE (exp) == FUNCTION_DECL)
3803 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3805 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3806 if (strcmp (section, ".ldata") == 0
3807 || strcmp (section, ".lbss") == 0)
3813 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3815 /* If this is an incomplete type with size 0, then we can't put it
3816 in data because it might be too big when completed. */
3817 if (!size || size > ix86_section_threshold)
3824 /* Switch to the appropriate section for output of DECL.
3825 DECL is either a `VAR_DECL' node or a constant of some sort.
3826 RELOC indicates whether forming the initial value of DECL requires
3827 link-time relocations. */
3829 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3833 x86_64_elf_select_section (tree decl, int reloc,
3834 unsigned HOST_WIDE_INT align)
3836 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3837 && ix86_in_large_data_p (decl))
3839 const char *sname = NULL;
3840 unsigned int flags = SECTION_WRITE;
3841 switch (categorize_decl_for_section (decl, reloc))
3846 case SECCAT_DATA_REL:
3847 sname = ".ldata.rel";
3849 case SECCAT_DATA_REL_LOCAL:
3850 sname = ".ldata.rel.local";
3852 case SECCAT_DATA_REL_RO:
3853 sname = ".ldata.rel.ro";
3855 case SECCAT_DATA_REL_RO_LOCAL:
3856 sname = ".ldata.rel.ro.local";
3860 flags |= SECTION_BSS;
3863 case SECCAT_RODATA_MERGE_STR:
3864 case SECCAT_RODATA_MERGE_STR_INIT:
3865 case SECCAT_RODATA_MERGE_CONST:
3869 case SECCAT_SRODATA:
3876 /* We don't split these for medium model. Place them into
3877 default sections and hope for best. */
3879 case SECCAT_EMUTLS_VAR:
3880 case SECCAT_EMUTLS_TMPL:
3885 /* We might get called with string constants, but get_named_section
3886 doesn't like them as they are not DECLs. Also, we need to set
3887 flags in that case. */
3889 return get_section (sname, flags, NULL);
3890 return get_named_section (decl, sname, reloc);
3893 return default_elf_select_section (decl, reloc, align);
3896 /* Build up a unique section name, expressed as a
3897 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3898 RELOC indicates whether the initial value of EXP requires
3899 link-time relocations. */
3901 static void ATTRIBUTE_UNUSED
3902 x86_64_elf_unique_section (tree decl, int reloc)
3904 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3905 && ix86_in_large_data_p (decl))
3907 const char *prefix = NULL;
3908 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3909 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3911 switch (categorize_decl_for_section (decl, reloc))
3914 case SECCAT_DATA_REL:
3915 case SECCAT_DATA_REL_LOCAL:
3916 case SECCAT_DATA_REL_RO:
3917 case SECCAT_DATA_REL_RO_LOCAL:
3918 prefix = one_only ? ".ld" : ".ldata";
3921 prefix = one_only ? ".lb" : ".lbss";
3924 case SECCAT_RODATA_MERGE_STR:
3925 case SECCAT_RODATA_MERGE_STR_INIT:
3926 case SECCAT_RODATA_MERGE_CONST:
3927 prefix = one_only ? ".lr" : ".lrodata";
3929 case SECCAT_SRODATA:
3936 /* We don't split these for medium model. Place them into
3937 default sections and hope for best. */
3939 case SECCAT_EMUTLS_VAR:
3940 prefix = targetm.emutls.var_section;
3942 case SECCAT_EMUTLS_TMPL:
3943 prefix = targetm.emutls.tmpl_section;
3948 const char *name, *linkonce;
3951 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3952 name = targetm.strip_name_encoding (name);
3954 /* If we're using one_only, then there needs to be a .gnu.linkonce
3955 prefix to the section name. */
3956 linkonce = one_only ? ".gnu.linkonce" : "";
3958 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3960 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3964 default_unique_section (decl, reloc);
3967 #ifdef COMMON_ASM_OP
3968 /* This says how to output assembler code to declare an
3969 uninitialized external linkage data object.
3971 For medium model x86-64 we need to use .largecomm opcode for
3974 x86_elf_aligned_common (FILE *file,
3975 const char *name, unsigned HOST_WIDE_INT size,
3978 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3979 && size > (unsigned int)ix86_section_threshold)
3980 fprintf (file, ".largecomm\t");
3982 fprintf (file, "%s", COMMON_ASM_OP);
3983 assemble_name (file, name);
3984 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3985 size, align / BITS_PER_UNIT);
3989 /* Utility function for targets to use in implementing
3990 ASM_OUTPUT_ALIGNED_BSS. */
3993 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3994 const char *name, unsigned HOST_WIDE_INT size,
3997 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3998 && size > (unsigned int)ix86_section_threshold)
3999 switch_to_section (get_named_section (decl, ".lbss", 0));
4001 switch_to_section (bss_section);
4002 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4003 #ifdef ASM_DECLARE_OBJECT_NAME
4004 last_assemble_variable_decl = decl;
4005 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4007 /* Standard thing is just output label for the object. */
4008 ASM_OUTPUT_LABEL (file, name);
4009 #endif /* ASM_DECLARE_OBJECT_NAME */
4010 ASM_OUTPUT_SKIP (file, size ? size : 1);
4014 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4016 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4017 make the problem with not enough registers even worse. */
4018 #ifdef INSN_SCHEDULING
4020 flag_schedule_insns = 0;
4024 /* The Darwin libraries never set errno, so we might as well
4025 avoid calling them when that's the only reason we would. */
4026 flag_errno_math = 0;
4028 /* The default values of these switches depend on the TARGET_64BIT
4029 that is not known at this moment. Mark these values with 2 and
4030 let user the to override these. In case there is no command line option
4031 specifying them, we will set the defaults in override_options. */
4033 flag_omit_frame_pointer = 2;
4034 flag_pcc_struct_return = 2;
4035 flag_asynchronous_unwind_tables = 2;
4036 flag_vect_cost_model = 1;
4037 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4038 SUBTARGET_OPTIMIZATION_OPTIONS;
4042 /* Decide whether we can make a sibling call to a function. DECL is the
4043 declaration of the function being targeted by the call and EXP is the
4044 CALL_EXPR representing the call. */
4047 ix86_function_ok_for_sibcall (tree decl, tree exp)
4052 /* If we are generating position-independent code, we cannot sibcall
4053 optimize any indirect call, or a direct call to a global function,
4054 as the PLT requires %ebx be live. */
4055 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4062 func = TREE_TYPE (CALL_EXPR_FN (exp));
4063 if (POINTER_TYPE_P (func))
4064 func = TREE_TYPE (func);
4067 /* Check that the return value locations are the same. Like
4068 if we are returning floats on the 80387 register stack, we cannot
4069 make a sibcall from a function that doesn't return a float to a
4070 function that does or, conversely, from a function that does return
4071 a float to a function that doesn't; the necessary stack adjustment
4072 would not be executed. This is also the place we notice
4073 differences in the return value ABI. Note that it is ok for one
4074 of the functions to have void return type as long as the return
4075 value of the other is passed in a register. */
4076 a = ix86_function_value (TREE_TYPE (exp), func, false);
4077 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4079 if (STACK_REG_P (a) || STACK_REG_P (b))
4081 if (!rtx_equal_p (a, b))
4084 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4086 else if (!rtx_equal_p (a, b))
4089 /* If this call is indirect, we'll need to be able to use a call-clobbered
4090 register for the address of the target function. Make sure that all
4091 such registers are not used for passing parameters. */
4092 if (!decl && !TARGET_64BIT)
4096 /* We're looking at the CALL_EXPR, we need the type of the function. */
4097 type = CALL_EXPR_FN (exp); /* pointer expression */
4098 type = TREE_TYPE (type); /* pointer type */
4099 type = TREE_TYPE (type); /* function type */
4101 if (ix86_function_regparm (type, NULL) >= 3)
4103 /* ??? Need to count the actual number of registers to be used,
4104 not the possible number of registers. Fix later. */
4109 /* Dllimport'd functions are also called indirectly. */
4110 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4112 && decl && DECL_DLLIMPORT_P (decl)
4113 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4116 /* If we need to align the outgoing stack, then sibcalling would
4117 unalign the stack, which may break the called function. */
4118 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4121 /* Otherwise okay. That also includes certain types of indirect calls. */
4125 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4126 calling convention attributes;
4127 arguments as in struct attribute_spec.handler. */
4130 ix86_handle_cconv_attribute (tree *node, tree name,
4132 int flags ATTRIBUTE_UNUSED,
4135 if (TREE_CODE (*node) != FUNCTION_TYPE
4136 && TREE_CODE (*node) != METHOD_TYPE
4137 && TREE_CODE (*node) != FIELD_DECL
4138 && TREE_CODE (*node) != TYPE_DECL)
4140 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4141 IDENTIFIER_POINTER (name));
4142 *no_add_attrs = true;
4146 /* Can combine regparm with all attributes but fastcall. */
4147 if (is_attribute_p ("regparm", name))
4151 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4153 error ("fastcall and regparm attributes are not compatible");
4156 cst = TREE_VALUE (args);
4157 if (TREE_CODE (cst) != INTEGER_CST)
4159 warning (OPT_Wattributes,
4160 "%qs attribute requires an integer constant argument",
4161 IDENTIFIER_POINTER (name));
4162 *no_add_attrs = true;
4164 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4166 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4167 IDENTIFIER_POINTER (name), REGPARM_MAX);
4168 *no_add_attrs = true;
4176 /* Do not warn when emulating the MS ABI. */
4177 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4178 warning (OPT_Wattributes, "%qs attribute ignored",
4179 IDENTIFIER_POINTER (name));
4180 *no_add_attrs = true;
4184 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4185 if (is_attribute_p ("fastcall", name))
4187 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4189 error ("fastcall and cdecl attributes are not compatible");
4191 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4193 error ("fastcall and stdcall attributes are not compatible");
4195 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4197 error ("fastcall and regparm attributes are not compatible");
4201 /* Can combine stdcall with fastcall (redundant), regparm and
4203 else if (is_attribute_p ("stdcall", name))
4205 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4207 error ("stdcall and cdecl attributes are not compatible");
4209 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4211 error ("stdcall and fastcall attributes are not compatible");
4215 /* Can combine cdecl with regparm and sseregparm. */
4216 else if (is_attribute_p ("cdecl", name))
4218 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4220 error ("stdcall and cdecl attributes are not compatible");
4222 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4224 error ("fastcall and cdecl attributes are not compatible");
4228 /* Can combine sseregparm with all attributes. */
4233 /* Return 0 if the attributes for two types are incompatible, 1 if they
4234 are compatible, and 2 if they are nearly compatible (which causes a
4235 warning to be generated). */
4238 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4240 /* Check for mismatch of non-default calling convention. */
4241 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4243 if (TREE_CODE (type1) != FUNCTION_TYPE
4244 && TREE_CODE (type1) != METHOD_TYPE)
4247 /* Check for mismatched fastcall/regparm types. */
4248 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4249 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4250 || (ix86_function_regparm (type1, NULL)
4251 != ix86_function_regparm (type2, NULL)))
4254 /* Check for mismatched sseregparm types. */
4255 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4256 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4259 /* Check for mismatched return types (cdecl vs stdcall). */
4260 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4261 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4267 /* Return the regparm value for a function with the indicated TYPE and DECL.
4268 DECL may be NULL when calling function indirectly
4269 or considering a libcall. */
4272 ix86_function_regparm (const_tree type, const_tree decl)
4275 int regparm = ix86_regparm;
4277 static bool error_issued;
4281 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4283 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4286 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4290 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4292 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4294 /* We can't use regparm(3) for nested functions because
4295 these pass static chain pointer in %ecx register. */
4296 if (!error_issued && regparm == 3
4297 && decl_function_context (decl)
4298 && !DECL_NO_STATIC_CHAIN (decl))
4300 error ("nested functions are limited to 2 register parameters");
4301 error_issued = true;
4309 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4312 /* Use register calling convention for local functions when possible. */
4313 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4316 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4317 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4320 int local_regparm, globals = 0, regno;
4323 /* Make sure no regparm register is taken by a
4324 fixed register variable. */
4325 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4326 if (fixed_regs[local_regparm])
4329 /* We can't use regparm(3) for nested functions as these use
4330 static chain pointer in third argument. */
4331 if (local_regparm == 3
4332 && decl_function_context (decl)
4333 && !DECL_NO_STATIC_CHAIN (decl))
4336 /* If the function realigns its stackpointer, the prologue will
4337 clobber %ecx. If we've already generated code for the callee,
4338 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4339 scanning the attributes for the self-realigning property. */
4340 f = DECL_STRUCT_FUNCTION (decl);
4341 /* Since current internal arg pointer won't conflict with
4342 parameter passing regs, so no need to change stack
4343 realignment and adjust regparm number.
4345 Each fixed register usage increases register pressure,
4346 so less registers should be used for argument passing.
4347 This functionality can be overriden by an explicit
4349 for (regno = 0; regno <= DI_REG; regno++)
4350 if (fixed_regs[regno])
4354 = globals < local_regparm ? local_regparm - globals : 0;
4356 if (local_regparm > regparm)
4357 regparm = local_regparm;
4364 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4365 DFmode (2) arguments in SSE registers for a function with the
4366 indicated TYPE and DECL. DECL may be NULL when calling function
4367 indirectly or considering a libcall. Otherwise return 0. */
4370 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4372 gcc_assert (!TARGET_64BIT);
4374 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4375 by the sseregparm attribute. */
4376 if (TARGET_SSEREGPARM
4377 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4384 error ("Calling %qD with attribute sseregparm without "
4385 "SSE/SSE2 enabled", decl);
4387 error ("Calling %qT with attribute sseregparm without "
4388 "SSE/SSE2 enabled", type);
4396 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4397 (and DFmode for SSE2) arguments in SSE registers. */
4398 if (decl && TARGET_SSE_MATH && !profile_flag)
4400 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4401 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4403 return TARGET_SSE2 ? 2 : 1;
4409 /* Return true if EAX is live at the start of the function. Used by
4410 ix86_expand_prologue to determine if we need special help before
4411 calling allocate_stack_worker. */
4414 ix86_eax_live_at_start_p (void)
4416 /* Cheat. Don't bother working forward from ix86_function_regparm
4417 to the function type to whether an actual argument is located in
4418 eax. Instead just look at cfg info, which is still close enough
4419 to correct at this point. This gives false positives for broken
4420 functions that might use uninitialized data that happens to be
4421 allocated in eax, but who cares? */
4422 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4425 /* Value is the number of bytes of arguments automatically
4426 popped when returning from a subroutine call.
4427 FUNDECL is the declaration node of the function (as a tree),
4428 FUNTYPE is the data type of the function (as a tree),
4429 or for a library call it is an identifier node for the subroutine name.
4430 SIZE is the number of bytes of arguments passed on the stack.
4432 On the 80386, the RTD insn may be used to pop them if the number
4433 of args is fixed, but if the number is variable then the caller
4434 must pop them all. RTD can't be used for library calls now
4435 because the library is compiled with the Unix compiler.
4436 Use of RTD is a selectable option, since it is incompatible with
4437 standard Unix calling sequences. If the option is not selected,
4438 the caller must always pop the args.
4440 The attribute stdcall is equivalent to RTD on a per module basis. */
4443 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4447 /* None of the 64-bit ABIs pop arguments. */
4451 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4453 /* Cdecl functions override -mrtd, and never pop the stack. */
4454 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4456 /* Stdcall and fastcall functions will pop the stack if not
4458 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4459 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4462 if (rtd && ! stdarg_p (funtype))
4466 /* Lose any fake structure return argument if it is passed on the stack. */
4467 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4468 && !KEEP_AGGREGATE_RETURN_POINTER)
4470 int nregs = ix86_function_regparm (funtype, fundecl);
4472 return GET_MODE_SIZE (Pmode);
4478 /* Argument support functions. */
4480 /* Return true when register may be used to pass function parameters. */
4482 ix86_function_arg_regno_p (int regno)
4485 const int *parm_regs;
4490 return (regno < REGPARM_MAX
4491 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4493 return (regno < REGPARM_MAX
4494 || (TARGET_MMX && MMX_REGNO_P (regno)
4495 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4496 || (TARGET_SSE && SSE_REGNO_P (regno)
4497 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4502 if (SSE_REGNO_P (regno) && TARGET_SSE)
4507 if (TARGET_SSE && SSE_REGNO_P (regno)
4508 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4512 /* TODO: The function should depend on current function ABI but
4513 builtins.c would need updating then. Therefore we use the
4516 /* RAX is used as hidden argument to va_arg functions. */
4517 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4520 if (DEFAULT_ABI == MS_ABI)
4521 parm_regs = x86_64_ms_abi_int_parameter_registers;
4523 parm_regs = x86_64_int_parameter_registers;
4524 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4525 : X86_64_REGPARM_MAX); i++)
4526 if (regno == parm_regs[i])
4531 /* Return if we do not know how to pass TYPE solely in registers. */
4534 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4536 if (must_pass_in_stack_var_size_or_pad (mode, type))
4539 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4540 The layout_type routine is crafty and tries to trick us into passing
4541 currently unsupported vector types on the stack by using TImode. */
4542 return (!TARGET_64BIT && mode == TImode
4543 && type && TREE_CODE (type) != VECTOR_TYPE);
4546 /* It returns the size, in bytes, of the area reserved for arguments passed
4547 in registers for the function represented by fndecl dependent to the used
4550 ix86_reg_parm_stack_space (const_tree fndecl)
4552 int call_abi = SYSV_ABI;
4553 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4554 call_abi = ix86_function_abi (fndecl);
4556 call_abi = ix86_function_type_abi (fndecl);
4557 if (call_abi == MS_ABI)
4562 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4565 ix86_function_type_abi (const_tree fntype)
4567 if (TARGET_64BIT && fntype != NULL)
4570 if (DEFAULT_ABI == SYSV_ABI)
4571 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4573 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4581 ix86_function_abi (const_tree fndecl)
4585 return ix86_function_type_abi (TREE_TYPE (fndecl));
4588 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4591 ix86_cfun_abi (void)
4593 if (! cfun || ! TARGET_64BIT)
4595 return cfun->machine->call_abi;
4599 extern void init_regs (void);
4601 /* Implementation of call abi switching target hook. Specific to FNDECL
4602 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4603 for more details. */
4605 ix86_call_abi_override (const_tree fndecl)
4607 if (fndecl == NULL_TREE)
4608 cfun->machine->call_abi = DEFAULT_ABI;
4610 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4613 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4614 re-initialization of init_regs each time we switch function context since
4615 this is needed only during RTL expansion. */
4617 ix86_maybe_switch_abi (void)
4620 call_used_regs[4 /*RSI*/] == (cfun->machine->call_abi == MS_ABI))
4624 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4625 for a call to a function whose data type is FNTYPE.
4626 For a library call, FNTYPE is 0. */
4629 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4630 tree fntype, /* tree ptr for function decl */
4631 rtx libname, /* SYMBOL_REF of library name or 0 */
4634 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4635 memset (cum, 0, sizeof (*cum));
4638 cum->call_abi = ix86_function_abi (fndecl);
4640 cum->call_abi = ix86_function_type_abi (fntype);
4641 /* Set up the number of registers to use for passing arguments. */
4642 cum->nregs = ix86_regparm;
4645 if (cum->call_abi != DEFAULT_ABI)
4646 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4651 cum->sse_nregs = SSE_REGPARM_MAX;
4654 if (cum->call_abi != DEFAULT_ABI)
4655 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4656 : X64_SSE_REGPARM_MAX;
4660 cum->mmx_nregs = MMX_REGPARM_MAX;
4661 cum->warn_avx = true;
4662 cum->warn_sse = true;
4663 cum->warn_mmx = true;
4665 /* Because type might mismatch in between caller and callee, we need to
4666 use actual type of function for local calls.
4667 FIXME: cgraph_analyze can be told to actually record if function uses
4668 va_start so for local functions maybe_vaarg can be made aggressive
4670 FIXME: once typesytem is fixed, we won't need this code anymore. */
4672 fntype = TREE_TYPE (fndecl);
4673 cum->maybe_vaarg = (fntype
4674 ? (!prototype_p (fntype) || stdarg_p (fntype))
4679 /* If there are variable arguments, then we won't pass anything
4680 in registers in 32-bit mode. */
4681 if (stdarg_p (fntype))
4692 /* Use ecx and edx registers if function has fastcall attribute,
4693 else look for regparm information. */
4696 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4702 cum->nregs = ix86_function_regparm (fntype, fndecl);
4705 /* Set up the number of SSE registers used for passing SFmode
4706 and DFmode arguments. Warn for mismatching ABI. */
4707 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4711 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4712 But in the case of vector types, it is some vector mode.
4714 When we have only some of our vector isa extensions enabled, then there
4715 are some modes for which vector_mode_supported_p is false. For these
4716 modes, the generic vector support in gcc will choose some non-vector mode
4717 in order to implement the type. By computing the natural mode, we'll
4718 select the proper ABI location for the operand and not depend on whatever
4719 the middle-end decides to do with these vector types. */
4721 static enum machine_mode
4722 type_natural_mode (const_tree type)
4724 enum machine_mode mode = TYPE_MODE (type);
4726 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4728 HOST_WIDE_INT size = int_size_in_bytes (type);
4729 if ((size == 8 || size == 16)
4730 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4731 && TYPE_VECTOR_SUBPARTS (type) > 1)
4733 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4735 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4736 mode = MIN_MODE_VECTOR_FLOAT;
4738 mode = MIN_MODE_VECTOR_INT;
4740 /* Get the mode which has this inner mode and number of units. */
4741 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4742 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4743 && GET_MODE_INNER (mode) == innermode)
4753 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4754 this may not agree with the mode that the type system has chosen for the
4755 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4756 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4759 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4764 if (orig_mode != BLKmode)
4765 tmp = gen_rtx_REG (orig_mode, regno);
4768 tmp = gen_rtx_REG (mode, regno);
4769 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4770 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4776 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4777 of this code is to classify each 8bytes of incoming argument by the register
4778 class and assign registers accordingly. */
4780 /* Return the union class of CLASS1 and CLASS2.
4781 See the x86-64 PS ABI for details. */
4783 static enum x86_64_reg_class
4784 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4786 /* Rule #1: If both classes are equal, this is the resulting class. */
4787 if (class1 == class2)
4790 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4792 if (class1 == X86_64_NO_CLASS)
4794 if (class2 == X86_64_NO_CLASS)
4797 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4798 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4799 return X86_64_MEMORY_CLASS;
4801 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4802 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4803 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4804 return X86_64_INTEGERSI_CLASS;
4805 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4806 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4807 return X86_64_INTEGER_CLASS;
4809 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4811 if (class1 == X86_64_X87_CLASS
4812 || class1 == X86_64_X87UP_CLASS
4813 || class1 == X86_64_COMPLEX_X87_CLASS
4814 || class2 == X86_64_X87_CLASS
4815 || class2 == X86_64_X87UP_CLASS
4816 || class2 == X86_64_COMPLEX_X87_CLASS)
4817 return X86_64_MEMORY_CLASS;
4819 /* Rule #6: Otherwise class SSE is used. */
4820 return X86_64_SSE_CLASS;
4823 /* Classify the argument of type TYPE and mode MODE.
4824 CLASSES will be filled by the register class used to pass each word
4825 of the operand. The number of words is returned. In case the parameter
4826 should be passed in memory, 0 is returned. As a special case for zero
4827 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4829 BIT_OFFSET is used internally for handling records and specifies offset
4830 of the offset in bits modulo 256 to avoid overflow cases.
4832 See the x86-64 PS ABI for details.
4836 classify_argument (enum machine_mode mode, const_tree type,
4837 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4839 HOST_WIDE_INT bytes =
4840 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4841 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4843 /* Variable sized entities are always passed/returned in memory. */
4847 if (mode != VOIDmode
4848 && targetm.calls.must_pass_in_stack (mode, type))
4851 if (type && AGGREGATE_TYPE_P (type))
4855 enum x86_64_reg_class subclasses[MAX_CLASSES];
4857 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4861 for (i = 0; i < words; i++)
4862 classes[i] = X86_64_NO_CLASS;
4864 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4865 signalize memory class, so handle it as special case. */
4868 classes[0] = X86_64_NO_CLASS;
4872 /* Classify each field of record and merge classes. */
4873 switch (TREE_CODE (type))
4876 /* And now merge the fields of structure. */
4877 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4879 if (TREE_CODE (field) == FIELD_DECL)
4883 if (TREE_TYPE (field) == error_mark_node)
4886 /* Bitfields are always classified as integer. Handle them
4887 early, since later code would consider them to be
4888 misaligned integers. */
4889 if (DECL_BIT_FIELD (field))
4891 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4892 i < ((int_bit_position (field) + (bit_offset % 64))
4893 + tree_low_cst (DECL_SIZE (field), 0)
4896 merge_classes (X86_64_INTEGER_CLASS,
4901 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4902 TREE_TYPE (field), subclasses,
4903 (int_bit_position (field)
4904 + bit_offset) % 256);
4907 for (i = 0; i < num; i++)
4910 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4912 merge_classes (subclasses[i], classes[i + pos]);
4920 /* Arrays are handled as small records. */
4923 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4924 TREE_TYPE (type), subclasses, bit_offset);
4928 /* The partial classes are now full classes. */
4929 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4930 subclasses[0] = X86_64_SSE_CLASS;
4931 if (subclasses[0] == X86_64_INTEGERSI_CLASS
4932 && !((bit_offset % 64) == 0 && bytes == 4))
4933 subclasses[0] = X86_64_INTEGER_CLASS;
4935 for (i = 0; i < words; i++)
4936 classes[i] = subclasses[i % num];
4941 case QUAL_UNION_TYPE:
4942 /* Unions are similar to RECORD_TYPE but offset is always 0.
4944 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4946 if (TREE_CODE (field) == FIELD_DECL)
4950 if (TREE_TYPE (field) == error_mark_node)
4953 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4954 TREE_TYPE (field), subclasses,
4958 for (i = 0; i < num; i++)
4959 classes[i] = merge_classes (subclasses[i], classes[i]);
4968 /* Final merger cleanup. */
4969 for (i = 0; i < words; i++)
4971 /* If one class is MEMORY, everything should be passed in
4973 if (classes[i] == X86_64_MEMORY_CLASS)
4976 /* The X86_64_SSEUP_CLASS should be always preceded by
4977 X86_64_SSE_CLASS. */
4978 if (classes[i] == X86_64_SSEUP_CLASS
4979 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4980 classes[i] = X86_64_SSE_CLASS;
4982 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4983 if (classes[i] == X86_64_X87UP_CLASS
4984 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4985 classes[i] = X86_64_SSE_CLASS;
4990 /* Compute alignment needed. We align all types to natural boundaries with
4991 exception of XFmode that is aligned to 64bits. */
4992 if (mode != VOIDmode && mode != BLKmode)
4994 int mode_alignment = GET_MODE_BITSIZE (mode);
4997 mode_alignment = 128;
4998 else if (mode == XCmode)
4999 mode_alignment = 256;
5000 if (COMPLEX_MODE_P (mode))
5001 mode_alignment /= 2;
5002 /* Misaligned fields are always returned in memory. */
5003 if (bit_offset % mode_alignment)
5007 /* for V1xx modes, just use the base mode */
5008 if (VECTOR_MODE_P (mode) && mode != V1DImode
5009 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5010 mode = GET_MODE_INNER (mode);
5012 /* Classification of atomic types. */
5017 classes[0] = X86_64_SSE_CLASS;
5020 classes[0] = X86_64_SSE_CLASS;
5021 classes[1] = X86_64_SSEUP_CLASS;
5031 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5035 classes[0] = X86_64_INTEGERSI_CLASS;
5038 else if (size <= 64)
5040 classes[0] = X86_64_INTEGER_CLASS;
5043 else if (size <= 64+32)
5045 classes[0] = X86_64_INTEGER_CLASS;
5046 classes[1] = X86_64_INTEGERSI_CLASS;
5049 else if (size <= 64+64)
5051 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5059 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5066 if (!(bit_offset % 64))
5067 classes[0] = X86_64_SSESF_CLASS;
5069 classes[0] = X86_64_SSE_CLASS;
5072 classes[0] = X86_64_SSEDF_CLASS;
5075 classes[0] = X86_64_X87_CLASS;
5076 classes[1] = X86_64_X87UP_CLASS;
5079 classes[0] = X86_64_SSE_CLASS;
5080 classes[1] = X86_64_SSEUP_CLASS;
5083 classes[0] = X86_64_SSE_CLASS;
5086 classes[0] = X86_64_SSEDF_CLASS;
5087 classes[1] = X86_64_SSEDF_CLASS;
5090 classes[0] = X86_64_COMPLEX_X87_CLASS;
5093 /* This modes is larger than 16 bytes. */
5101 classes[0] = X86_64_AVX_CLASS;
5109 classes[0] = X86_64_SSE_CLASS;
5110 classes[1] = X86_64_SSEUP_CLASS;
5117 classes[0] = X86_64_SSE_CLASS;
5123 gcc_assert (VECTOR_MODE_P (mode));
5128 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5130 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5131 classes[0] = X86_64_INTEGERSI_CLASS;
5133 classes[0] = X86_64_INTEGER_CLASS;
5134 classes[1] = X86_64_INTEGER_CLASS;
5135 return 1 + (bytes > 8);
5139 /* Examine the argument and return set number of register required in each
5140 class. Return 0 iff parameter should be passed in memory. */
5142 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5143 int *int_nregs, int *sse_nregs)
5145 enum x86_64_reg_class regclass[MAX_CLASSES];
5146 int n = classify_argument (mode, type, regclass, 0);
5152 for (n--; n >= 0; n--)
5153 switch (regclass[n])
5155 case X86_64_INTEGER_CLASS:
5156 case X86_64_INTEGERSI_CLASS:
5159 case X86_64_AVX_CLASS:
5160 case X86_64_SSE_CLASS:
5161 case X86_64_SSESF_CLASS:
5162 case X86_64_SSEDF_CLASS:
5165 case X86_64_NO_CLASS:
5166 case X86_64_SSEUP_CLASS:
5168 case X86_64_X87_CLASS:
5169 case X86_64_X87UP_CLASS:
5173 case X86_64_COMPLEX_X87_CLASS:
5174 return in_return ? 2 : 0;
5175 case X86_64_MEMORY_CLASS:
5181 /* Construct container for the argument used by GCC interface. See
5182 FUNCTION_ARG for the detailed description. */
5185 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5186 const_tree type, int in_return, int nintregs, int nsseregs,
5187 const int *intreg, int sse_regno)
5189 /* The following variables hold the static issued_error state. */
5190 static bool issued_sse_arg_error;
5191 static bool issued_sse_ret_error;
5192 static bool issued_x87_ret_error;
5194 enum machine_mode tmpmode;
5196 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5197 enum x86_64_reg_class regclass[MAX_CLASSES];
5201 int needed_sseregs, needed_intregs;
5202 rtx exp[MAX_CLASSES];
5205 n = classify_argument (mode, type, regclass, 0);
5208 if (!examine_argument (mode, type, in_return, &needed_intregs,
5211 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5214 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5215 some less clueful developer tries to use floating-point anyway. */
5216 if (needed_sseregs && !TARGET_SSE)
5220 if (!issued_sse_ret_error)
5222 error ("SSE register return with SSE disabled");
5223 issued_sse_ret_error = true;
5226 else if (!issued_sse_arg_error)
5228 error ("SSE register argument with SSE disabled");
5229 issued_sse_arg_error = true;
5234 /* Likewise, error if the ABI requires us to return values in the
5235 x87 registers and the user specified -mno-80387. */
5236 if (!TARGET_80387 && in_return)
5237 for (i = 0; i < n; i++)
5238 if (regclass[i] == X86_64_X87_CLASS
5239 || regclass[i] == X86_64_X87UP_CLASS
5240 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5242 if (!issued_x87_ret_error)
5244 error ("x87 register return with x87 disabled");
5245 issued_x87_ret_error = true;
5250 /* First construct simple cases. Avoid SCmode, since we want to use
5251 single register to pass this type. */
5252 if (n == 1 && mode != SCmode)
5253 switch (regclass[0])
5255 case X86_64_INTEGER_CLASS:
5256 case X86_64_INTEGERSI_CLASS:
5257 return gen_rtx_REG (mode, intreg[0]);
5258 case X86_64_AVX_CLASS:
5259 case X86_64_SSE_CLASS:
5260 case X86_64_SSESF_CLASS:
5261 case X86_64_SSEDF_CLASS:
5262 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5263 case X86_64_X87_CLASS:
5264 case X86_64_COMPLEX_X87_CLASS:
5265 return gen_rtx_REG (mode, FIRST_STACK_REG);
5266 case X86_64_NO_CLASS:
5267 /* Zero sized array, struct or class. */
5272 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5273 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5274 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5277 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5278 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5279 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5280 && regclass[1] == X86_64_INTEGER_CLASS
5281 && (mode == CDImode || mode == TImode || mode == TFmode)
5282 && intreg[0] + 1 == intreg[1])
5283 return gen_rtx_REG (mode, intreg[0]);
5285 /* Otherwise figure out the entries of the PARALLEL. */
5286 for (i = 0; i < n; i++)
5288 switch (regclass[i])
5290 case X86_64_NO_CLASS:
5292 case X86_64_INTEGER_CLASS:
5293 case X86_64_INTEGERSI_CLASS:
5294 /* Merge TImodes on aligned occasions here too. */
5295 if (i * 8 + 8 > bytes)
5296 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5297 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5301 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5302 if (tmpmode == BLKmode)
5304 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5305 gen_rtx_REG (tmpmode, *intreg),
5309 case X86_64_SSESF_CLASS:
5310 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5311 gen_rtx_REG (SFmode,
5312 SSE_REGNO (sse_regno)),
5316 case X86_64_SSEDF_CLASS:
5317 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5318 gen_rtx_REG (DFmode,
5319 SSE_REGNO (sse_regno)),
5323 case X86_64_SSE_CLASS:
5324 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5328 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5329 gen_rtx_REG (tmpmode,
5330 SSE_REGNO (sse_regno)),
5332 if (tmpmode == TImode)
5341 /* Empty aligned struct, union or class. */
5345 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5346 for (i = 0; i < nexps; i++)
5347 XVECEXP (ret, 0, i) = exp [i];
5351 /* Update the data in CUM to advance over an argument of mode MODE
5352 and data type TYPE. (TYPE is null for libcalls where that information
5353 may not be available.) */
5356 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5357 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5373 cum->words += words;
5374 cum->nregs -= words;
5375 cum->regno += words;
5377 if (cum->nregs <= 0)
5385 if (cum->float_in_sse < 2)
5388 if (cum->float_in_sse < 1)
5406 if (!type || !AGGREGATE_TYPE_P (type))
5408 cum->sse_words += words;
5409 cum->sse_nregs -= 1;
5410 cum->sse_regno += 1;
5411 if (cum->sse_nregs <= 0)
5424 if (!type || !AGGREGATE_TYPE_P (type))
5426 cum->mmx_words += words;
5427 cum->mmx_nregs -= 1;
5428 cum->mmx_regno += 1;
5429 if (cum->mmx_nregs <= 0)
5440 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5441 tree type, HOST_WIDE_INT words, int named)
5443 int int_nregs, sse_nregs;
5445 /* Unnamed 256bit vector mode parameters are passed on stack. */
5446 if (!named && VALID_AVX256_REG_MODE (mode))
5449 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5450 cum->words += words;
5451 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5453 cum->nregs -= int_nregs;
5454 cum->sse_nregs -= sse_nregs;
5455 cum->regno += int_nregs;
5456 cum->sse_regno += sse_nregs;
5459 cum->words += words;
5463 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5464 HOST_WIDE_INT words)
5466 /* Otherwise, this should be passed indirect. */
5467 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5469 cum->words += words;
5478 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5479 tree type, int named)
5481 HOST_WIDE_INT bytes, words;
5483 if (mode == BLKmode)
5484 bytes = int_size_in_bytes (type);
5486 bytes = GET_MODE_SIZE (mode);
5487 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5490 mode = type_natural_mode (type);
5492 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5493 function_arg_advance_ms_64 (cum, bytes, words);
5494 else if (TARGET_64BIT)
5495 function_arg_advance_64 (cum, mode, type, words, named);
5497 function_arg_advance_32 (cum, mode, type, bytes, words);
5500 /* Define where to put the arguments to a function.
5501 Value is zero to push the argument on the stack,
5502 or a hard register in which to store the argument.
5504 MODE is the argument's machine mode.
5505 TYPE is the data type of the argument (as a tree).
5506 This is null for libcalls where that information may
5508 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5509 the preceding args and about the function being called.
5510 NAMED is nonzero if this argument is a named parameter
5511 (otherwise it is an extra parameter matching an ellipsis). */
5514 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5515 enum machine_mode orig_mode, tree type,
5516 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5518 static bool warnedavx, warnedsse, warnedmmx;
5520 /* Avoid the AL settings for the Unix64 ABI. */
5521 if (mode == VOIDmode)
5537 if (words <= cum->nregs)
5539 int regno = cum->regno;
5541 /* Fastcall allocates the first two DWORD (SImode) or
5542 smaller arguments to ECX and EDX if it isn't an
5548 || (type && AGGREGATE_TYPE_P (type)))
5551 /* ECX not EAX is the first allocated register. */
5552 if (regno == AX_REG)
5555 return gen_rtx_REG (mode, regno);
5560 if (cum->float_in_sse < 2)
5563 if (cum->float_in_sse < 1)
5567 /* In 32bit, we pass TImode in xmm registers. */
5574 if (!type || !AGGREGATE_TYPE_P (type))
5576 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5579 warning (0, "SSE vector argument without SSE enabled "
5583 return gen_reg_or_parallel (mode, orig_mode,
5584 cum->sse_regno + FIRST_SSE_REG);
5589 /* In 32bit, we pass OImode in ymm registers. */
5596 if (!type || !AGGREGATE_TYPE_P (type))
5598 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5601 warning (0, "AVX vector argument without AVX enabled "
5605 return gen_reg_or_parallel (mode, orig_mode,
5606 cum->sse_regno + FIRST_SSE_REG);
5615 if (!type || !AGGREGATE_TYPE_P (type))
5617 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5620 warning (0, "MMX vector argument without MMX enabled "
5624 return gen_reg_or_parallel (mode, orig_mode,
5625 cum->mmx_regno + FIRST_MMX_REG);
5634 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5635 enum machine_mode orig_mode, tree type, int named)
5637 static bool warnedavx;
5639 /* Handle a hidden AL argument containing number of registers
5640 for varargs x86-64 functions. */
5641 if (mode == VOIDmode)
5642 return GEN_INT (cum->maybe_vaarg
5643 ? (cum->sse_nregs < 0
5644 ? (cum->call_abi == DEFAULT_ABI
5646 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5647 : X64_SSE_REGPARM_MAX))
5662 /* In 64bit, we pass TImode in interger registers and OImode on
5664 if (!type || !AGGREGATE_TYPE_P (type))
5666 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5669 warning (0, "AVX vector argument without AVX enabled "
5674 /* Unnamed 256bit vector mode parameters are passed on stack. */
5680 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5682 &x86_64_int_parameter_registers [cum->regno],
5687 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5688 enum machine_mode orig_mode, int named,
5689 HOST_WIDE_INT bytes)
5693 /* Avoid the AL settings for the Unix64 ABI. */
5694 if (mode == VOIDmode)
5697 /* If we've run out of registers, it goes on the stack. */
5698 if (cum->nregs == 0)
5701 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5703 /* Only floating point modes are passed in anything but integer regs. */
5704 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5707 regno = cum->regno + FIRST_SSE_REG;
5712 /* Unnamed floating parameters are passed in both the
5713 SSE and integer registers. */
5714 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5715 t2 = gen_rtx_REG (mode, regno);
5716 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5717 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5718 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5721 /* Handle aggregated types passed in register. */
5722 if (orig_mode == BLKmode)
5724 if (bytes > 0 && bytes <= 8)
5725 mode = (bytes > 4 ? DImode : SImode);
5726 if (mode == BLKmode)
5730 return gen_reg_or_parallel (mode, orig_mode, regno);
5734 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5735 tree type, int named)
5737 enum machine_mode mode = omode;
5738 HOST_WIDE_INT bytes, words;
5740 if (mode == BLKmode)
5741 bytes = int_size_in_bytes (type);
5743 bytes = GET_MODE_SIZE (mode);
5744 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5746 /* To simplify the code below, represent vector types with a vector mode
5747 even if MMX/SSE are not active. */
5748 if (type && TREE_CODE (type) == VECTOR_TYPE)
5749 mode = type_natural_mode (type);
5751 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5752 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5753 else if (TARGET_64BIT)
5754 return function_arg_64 (cum, mode, omode, type, named);
5756 return function_arg_32 (cum, mode, omode, type, bytes, words);
5759 /* A C expression that indicates when an argument must be passed by
5760 reference. If nonzero for an argument, a copy of that argument is
5761 made in memory and a pointer to the argument is passed instead of
5762 the argument itself. The pointer is passed in whatever way is
5763 appropriate for passing a pointer to that type. */
5766 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5767 enum machine_mode mode ATTRIBUTE_UNUSED,
5768 const_tree type, bool named ATTRIBUTE_UNUSED)
5770 /* See Windows x64 Software Convention. */
5771 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5773 int msize = (int) GET_MODE_SIZE (mode);
5776 /* Arrays are passed by reference. */
5777 if (TREE_CODE (type) == ARRAY_TYPE)
5780 if (AGGREGATE_TYPE_P (type))
5782 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5783 are passed by reference. */
5784 msize = int_size_in_bytes (type);
5788 /* __m128 is passed by reference. */
5790 case 1: case 2: case 4: case 8:
5796 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5802 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5805 contains_aligned_value_p (tree type)
5807 enum machine_mode mode = TYPE_MODE (type);
5808 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5812 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5814 if (TYPE_ALIGN (type) < 128)
5817 if (AGGREGATE_TYPE_P (type))
5819 /* Walk the aggregates recursively. */
5820 switch (TREE_CODE (type))
5824 case QUAL_UNION_TYPE:
5828 /* Walk all the structure fields. */
5829 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5831 if (TREE_CODE (field) == FIELD_DECL
5832 && contains_aligned_value_p (TREE_TYPE (field)))
5839 /* Just for use if some languages passes arrays by value. */
5840 if (contains_aligned_value_p (TREE_TYPE (type)))
5851 /* Gives the alignment boundary, in bits, of an argument with the
5852 specified mode and type. */
5855 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5860 /* Since canonical type is used for call, we convert it to
5861 canonical type if needed. */
5862 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5863 type = TYPE_CANONICAL (type);
5864 align = TYPE_ALIGN (type);
5867 align = GET_MODE_ALIGNMENT (mode);
5868 if (align < PARM_BOUNDARY)
5869 align = PARM_BOUNDARY;
5870 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5871 natural boundaries. */
5872 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5874 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5875 make an exception for SSE modes since these require 128bit
5878 The handling here differs from field_alignment. ICC aligns MMX
5879 arguments to 4 byte boundaries, while structure fields are aligned
5880 to 8 byte boundaries. */
5883 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5884 align = PARM_BOUNDARY;
5888 if (!contains_aligned_value_p (type))
5889 align = PARM_BOUNDARY;
5892 if (align > BIGGEST_ALIGNMENT)
5893 align = BIGGEST_ALIGNMENT;
5897 /* Return true if N is a possible register number of function value. */
5900 ix86_function_value_regno_p (int regno)
5907 case FIRST_FLOAT_REG:
5908 /* TODO: The function should depend on current function ABI but
5909 builtins.c would need updating then. Therefore we use the
5911 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5913 return TARGET_FLOAT_RETURNS_IN_80387;
5919 if (TARGET_MACHO || TARGET_64BIT)
5927 /* Define how to find the value returned by a function.
5928 VALTYPE is the data type of the value (as a tree).
5929 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5930 otherwise, FUNC is 0. */
5933 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5934 const_tree fntype, const_tree fn)
5938 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5939 we normally prevent this case when mmx is not available. However
5940 some ABIs may require the result to be returned like DImode. */
5941 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5942 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5944 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5945 we prevent this case when sse is not available. However some ABIs
5946 may require the result to be returned like integer TImode. */
5947 else if (mode == TImode
5948 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5949 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5951 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5952 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5953 regno = FIRST_FLOAT_REG;
5955 /* Most things go in %eax. */
5958 /* Override FP return register with %xmm0 for local functions when
5959 SSE math is enabled or for functions with sseregparm attribute. */
5960 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5962 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5963 if ((sse_level >= 1 && mode == SFmode)
5964 || (sse_level == 2 && mode == DFmode))
5965 regno = FIRST_SSE_REG;
5968 return gen_rtx_REG (orig_mode, regno);
5972 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5977 /* Handle libcalls, which don't provide a type node. */
5978 if (valtype == NULL)
5990 return gen_rtx_REG (mode, FIRST_SSE_REG);
5993 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5997 return gen_rtx_REG (mode, AX_REG);
6001 ret = construct_container (mode, orig_mode, valtype, 1,
6002 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6003 x86_64_int_return_registers, 0);
6005 /* For zero sized structures, construct_container returns NULL, but we
6006 need to keep rest of compiler happy by returning meaningful value. */
6008 ret = gen_rtx_REG (orig_mode, AX_REG);
6014 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6016 unsigned int regno = AX_REG;
6020 switch (GET_MODE_SIZE (mode))
6023 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6024 && !COMPLEX_MODE_P (mode))
6025 regno = FIRST_SSE_REG;
6029 if (mode == SFmode || mode == DFmode)
6030 regno = FIRST_SSE_REG;
6036 return gen_rtx_REG (orig_mode, regno);
6040 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6041 enum machine_mode orig_mode, enum machine_mode mode)
6043 const_tree fn, fntype;
6046 if (fntype_or_decl && DECL_P (fntype_or_decl))
6047 fn = fntype_or_decl;
6048 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6050 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6051 return function_value_ms_64 (orig_mode, mode);
6052 else if (TARGET_64BIT)
6053 return function_value_64 (orig_mode, mode, valtype);
6055 return function_value_32 (orig_mode, mode, fntype, fn);
6059 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6060 bool outgoing ATTRIBUTE_UNUSED)
6062 enum machine_mode mode, orig_mode;
6064 orig_mode = TYPE_MODE (valtype);
6065 mode = type_natural_mode (valtype);
6066 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6070 ix86_libcall_value (enum machine_mode mode)
6072 return ix86_function_value_1 (NULL, NULL, mode, mode);
6075 /* Return true iff type is returned in memory. */
6077 static int ATTRIBUTE_UNUSED
6078 return_in_memory_32 (const_tree type, enum machine_mode mode)
6082 if (mode == BLKmode)
6085 size = int_size_in_bytes (type);
6087 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6090 if (VECTOR_MODE_P (mode) || mode == TImode)
6092 /* User-created vectors small enough to fit in EAX. */
6096 /* MMX/3dNow values are returned in MM0,
6097 except when it doesn't exits. */
6099 return (TARGET_MMX ? 0 : 1);
6101 /* SSE values are returned in XMM0, except when it doesn't exist. */
6103 return (TARGET_SSE ? 0 : 1);
6114 static int ATTRIBUTE_UNUSED
6115 return_in_memory_64 (const_tree type, enum machine_mode mode)
6117 int needed_intregs, needed_sseregs;
6118 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6121 static int ATTRIBUTE_UNUSED
6122 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6124 HOST_WIDE_INT size = int_size_in_bytes (type);
6126 /* __m128 is returned in xmm0. */
6127 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6128 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6131 /* Otherwise, the size must be exactly in [1248]. */
6132 return (size != 1 && size != 2 && size != 4 && size != 8);
6136 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6138 #ifdef SUBTARGET_RETURN_IN_MEMORY
6139 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6141 const enum machine_mode mode = type_natural_mode (type);
6143 if (TARGET_64BIT_MS_ABI)
6144 return return_in_memory_ms_64 (type, mode);
6145 else if (TARGET_64BIT)
6146 return return_in_memory_64 (type, mode);
6148 return return_in_memory_32 (type, mode);
6152 /* Return false iff TYPE is returned in memory. This version is used
6153 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6154 but differs notably in that when MMX is available, 8-byte vectors
6155 are returned in memory, rather than in MMX registers. */
6158 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6161 enum machine_mode mode = type_natural_mode (type);
6164 return return_in_memory_64 (type, mode);
6166 if (mode == BLKmode)
6169 size = int_size_in_bytes (type);
6171 if (VECTOR_MODE_P (mode))
6173 /* Return in memory only if MMX registers *are* available. This
6174 seems backwards, but it is consistent with the existing
6181 else if (mode == TImode)
6183 else if (mode == XFmode)
6189 /* When returning SSE vector types, we have a choice of either
6190 (1) being abi incompatible with a -march switch, or
6191 (2) generating an error.
6192 Given no good solution, I think the safest thing is one warning.
6193 The user won't be able to use -Werror, but....
6195 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6196 called in response to actually generating a caller or callee that
6197 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6198 via aggregate_value_p for general type probing from tree-ssa. */
6201 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6203 static bool warnedsse, warnedmmx;
6205 if (!TARGET_64BIT && type)
6207 /* Look at the return type of the function, not the function type. */
6208 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6210 if (!TARGET_SSE && !warnedsse)
6213 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6216 warning (0, "SSE vector return without SSE enabled "
6221 if (!TARGET_MMX && !warnedmmx)
6223 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6226 warning (0, "MMX vector return without MMX enabled "
6236 /* Create the va_list data type. */
6238 /* Returns the calling convention specific va_list date type.
6239 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6242 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6244 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6246 /* For i386 we use plain pointer to argument area. */
6247 if (!TARGET_64BIT || abi == MS_ABI)
6248 return build_pointer_type (char_type_node);
6250 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6251 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6253 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6254 unsigned_type_node);
6255 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6256 unsigned_type_node);
6257 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6259 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6262 va_list_gpr_counter_field = f_gpr;
6263 va_list_fpr_counter_field = f_fpr;
6265 DECL_FIELD_CONTEXT (f_gpr) = record;
6266 DECL_FIELD_CONTEXT (f_fpr) = record;
6267 DECL_FIELD_CONTEXT (f_ovf) = record;
6268 DECL_FIELD_CONTEXT (f_sav) = record;
6270 TREE_CHAIN (record) = type_decl;
6271 TYPE_NAME (record) = type_decl;
6272 TYPE_FIELDS (record) = f_gpr;
6273 TREE_CHAIN (f_gpr) = f_fpr;
6274 TREE_CHAIN (f_fpr) = f_ovf;
6275 TREE_CHAIN (f_ovf) = f_sav;
6277 layout_type (record);
6279 /* The correct type is an array type of one element. */
6280 return build_array_type (record, build_index_type (size_zero_node));
6283 /* Setup the builtin va_list data type and for 64-bit the additional
6284 calling convention specific va_list data types. */
6287 ix86_build_builtin_va_list (void)
6289 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6291 /* Initialize abi specific va_list builtin types. */
6295 if (DEFAULT_ABI == MS_ABI)
6297 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6298 if (TREE_CODE (t) != RECORD_TYPE)
6299 t = build_variant_type_copy (t);
6300 sysv_va_list_type_node = t;
6305 if (TREE_CODE (t) != RECORD_TYPE)
6306 t = build_variant_type_copy (t);
6307 sysv_va_list_type_node = t;
6309 if (DEFAULT_ABI != MS_ABI)
6311 t = ix86_build_builtin_va_list_abi (MS_ABI);
6312 if (TREE_CODE (t) != RECORD_TYPE)
6313 t = build_variant_type_copy (t);
6314 ms_va_list_type_node = t;
6319 if (TREE_CODE (t) != RECORD_TYPE)
6320 t = build_variant_type_copy (t);
6321 ms_va_list_type_node = t;
6328 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6331 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6340 int regparm = ix86_regparm;
6342 if (cum->call_abi != DEFAULT_ABI)
6343 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6345 /* GPR size of varargs save area. */
6346 if (cfun->va_list_gpr_size)
6347 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6349 ix86_varargs_gpr_size = 0;
6351 /* FPR size of varargs save area. We don't need it if we don't pass
6352 anything in SSE registers. */
6353 if (cum->sse_nregs && cfun->va_list_fpr_size)
6354 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6356 ix86_varargs_fpr_size = 0;
6358 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6361 save_area = frame_pointer_rtx;
6362 set = get_varargs_alias_set ();
6364 for (i = cum->regno;
6366 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6369 mem = gen_rtx_MEM (Pmode,
6370 plus_constant (save_area, i * UNITS_PER_WORD));
6371 MEM_NOTRAP_P (mem) = 1;
6372 set_mem_alias_set (mem, set);
6373 emit_move_insn (mem, gen_rtx_REG (Pmode,
6374 x86_64_int_parameter_registers[i]));
6377 if (ix86_varargs_fpr_size)
6379 /* Now emit code to save SSE registers. The AX parameter contains number
6380 of SSE parameter registers used to call this function. We use
6381 sse_prologue_save insn template that produces computed jump across
6382 SSE saves. We need some preparation work to get this working. */
6384 label = gen_label_rtx ();
6385 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6387 /* Compute address to jump to :
6388 label - eax*4 + nnamed_sse_arguments*4 Or
6389 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6390 tmp_reg = gen_reg_rtx (Pmode);
6391 nsse_reg = gen_reg_rtx (Pmode);
6392 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6393 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6394 gen_rtx_MULT (Pmode, nsse_reg,
6397 /* vmovaps is one byte longer than movaps. */
6399 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6400 gen_rtx_PLUS (Pmode, tmp_reg,
6406 gen_rtx_CONST (DImode,
6407 gen_rtx_PLUS (DImode,
6409 GEN_INT (cum->sse_regno
6410 * (TARGET_AVX ? 5 : 4)))));
6412 emit_move_insn (nsse_reg, label_ref);
6413 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6415 /* Compute address of memory block we save into. We always use pointer
6416 pointing 127 bytes after first byte to store - this is needed to keep
6417 instruction size limited by 4 bytes (5 bytes for AVX) with one
6418 byte displacement. */
6419 tmp_reg = gen_reg_rtx (Pmode);
6420 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6421 plus_constant (save_area,
6422 ix86_varargs_gpr_size + 127)));
6423 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6424 MEM_NOTRAP_P (mem) = 1;
6425 set_mem_alias_set (mem, set);
6426 set_mem_align (mem, BITS_PER_WORD);
6428 /* And finally do the dirty job! */
6429 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6430 GEN_INT (cum->sse_regno), label));
6435 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6437 alias_set_type set = get_varargs_alias_set ();
6440 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6444 mem = gen_rtx_MEM (Pmode,
6445 plus_constant (virtual_incoming_args_rtx,
6446 i * UNITS_PER_WORD));
6447 MEM_NOTRAP_P (mem) = 1;
6448 set_mem_alias_set (mem, set);
6450 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6451 emit_move_insn (mem, reg);
6456 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6457 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6460 CUMULATIVE_ARGS next_cum;
6463 /* This argument doesn't appear to be used anymore. Which is good,
6464 because the old code here didn't suppress rtl generation. */
6465 gcc_assert (!no_rtl);
6470 fntype = TREE_TYPE (current_function_decl);
6472 /* For varargs, we do not want to skip the dummy va_dcl argument.
6473 For stdargs, we do want to skip the last named argument. */
6475 if (stdarg_p (fntype))
6476 function_arg_advance (&next_cum, mode, type, 1);
6478 if (cum->call_abi == MS_ABI)
6479 setup_incoming_varargs_ms_64 (&next_cum);
6481 setup_incoming_varargs_64 (&next_cum);
6484 /* Checks if TYPE is of kind va_list char *. */
6487 is_va_list_char_pointer (tree type)
6491 /* For 32-bit it is always true. */
6494 canonic = ix86_canonical_va_list_type (type);
6495 return (canonic == ms_va_list_type_node
6496 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6499 /* Implement va_start. */
6502 ix86_va_start (tree valist, rtx nextarg)
6504 HOST_WIDE_INT words, n_gpr, n_fpr;
6505 tree f_gpr, f_fpr, f_ovf, f_sav;
6506 tree gpr, fpr, ovf, sav, t;
6509 /* Only 64bit target needs something special. */
6510 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6512 std_expand_builtin_va_start (valist, nextarg);
6516 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6517 f_fpr = TREE_CHAIN (f_gpr);
6518 f_ovf = TREE_CHAIN (f_fpr);
6519 f_sav = TREE_CHAIN (f_ovf);
6521 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6522 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6523 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6524 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6525 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6527 /* Count number of gp and fp argument registers used. */
6528 words = crtl->args.info.words;
6529 n_gpr = crtl->args.info.regno;
6530 n_fpr = crtl->args.info.sse_regno;
6532 if (cfun->va_list_gpr_size)
6534 type = TREE_TYPE (gpr);
6535 t = build2 (MODIFY_EXPR, type,
6536 gpr, build_int_cst (type, n_gpr * 8));
6537 TREE_SIDE_EFFECTS (t) = 1;
6538 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6541 if (TARGET_SSE && cfun->va_list_fpr_size)
6543 type = TREE_TYPE (fpr);
6544 t = build2 (MODIFY_EXPR, type, fpr,
6545 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6546 TREE_SIDE_EFFECTS (t) = 1;
6547 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6550 /* Find the overflow area. */
6551 type = TREE_TYPE (ovf);
6552 t = make_tree (type, crtl->args.internal_arg_pointer);
6554 t = build2 (POINTER_PLUS_EXPR, type, t,
6555 size_int (words * UNITS_PER_WORD));
6556 t = build2 (MODIFY_EXPR, type, ovf, t);
6557 TREE_SIDE_EFFECTS (t) = 1;
6558 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6560 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6562 /* Find the register save area.
6563 Prologue of the function save it right above stack frame. */
6564 type = TREE_TYPE (sav);
6565 t = make_tree (type, frame_pointer_rtx);
6566 if (!ix86_varargs_gpr_size)
6567 t = build2 (POINTER_PLUS_EXPR, type, t,
6568 size_int (-8 * X86_64_REGPARM_MAX));
6569 t = build2 (MODIFY_EXPR, type, sav, t);
6570 TREE_SIDE_EFFECTS (t) = 1;
6571 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6575 /* Implement va_arg. */
6578 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6581 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6582 tree f_gpr, f_fpr, f_ovf, f_sav;
6583 tree gpr, fpr, ovf, sav, t;
6585 tree lab_false, lab_over = NULL_TREE;
6590 enum machine_mode nat_mode;
6593 /* Only 64bit target needs something special. */
6594 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6595 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6597 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6598 f_fpr = TREE_CHAIN (f_gpr);
6599 f_ovf = TREE_CHAIN (f_fpr);
6600 f_sav = TREE_CHAIN (f_ovf);
6602 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6603 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6604 valist = build_va_arg_indirect_ref (valist);
6605 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6606 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6607 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6609 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6611 type = build_pointer_type (type);
6612 size = int_size_in_bytes (type);
6613 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6615 nat_mode = type_natural_mode (type);
6624 /* Unnamed 256bit vector mode parameters are passed on stack. */
6625 if (ix86_cfun_abi () == SYSV_ABI)
6632 container = construct_container (nat_mode, TYPE_MODE (type),
6633 type, 0, X86_64_REGPARM_MAX,
6634 X86_64_SSE_REGPARM_MAX, intreg,
6639 /* Pull the value out of the saved registers. */
6641 addr = create_tmp_var (ptr_type_node, "addr");
6642 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6646 int needed_intregs, needed_sseregs;
6648 tree int_addr, sse_addr;
6650 lab_false = create_artificial_label ();
6651 lab_over = create_artificial_label ();
6653 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6655 need_temp = (!REG_P (container)
6656 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6657 || TYPE_ALIGN (type) > 128));
6659 /* In case we are passing structure, verify that it is consecutive block
6660 on the register save area. If not we need to do moves. */
6661 if (!need_temp && !REG_P (container))
6663 /* Verify that all registers are strictly consecutive */
6664 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6668 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6670 rtx slot = XVECEXP (container, 0, i);
6671 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6672 || INTVAL (XEXP (slot, 1)) != i * 16)
6680 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6682 rtx slot = XVECEXP (container, 0, i);
6683 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6684 || INTVAL (XEXP (slot, 1)) != i * 8)
6696 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6697 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6698 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6699 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6702 /* First ensure that we fit completely in registers. */
6705 t = build_int_cst (TREE_TYPE (gpr),
6706 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6707 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6708 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6709 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6710 gimplify_and_add (t, pre_p);
6714 t = build_int_cst (TREE_TYPE (fpr),
6715 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6716 + X86_64_REGPARM_MAX * 8);
6717 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6718 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6719 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6720 gimplify_and_add (t, pre_p);
6723 /* Compute index to start of area used for integer regs. */
6726 /* int_addr = gpr + sav; */
6727 t = fold_convert (sizetype, gpr);
6728 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6729 gimplify_assign (int_addr, t, pre_p);
6733 /* sse_addr = fpr + sav; */
6734 t = fold_convert (sizetype, fpr);
6735 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6736 gimplify_assign (sse_addr, t, pre_p);
6741 tree temp = create_tmp_var (type, "va_arg_tmp");
6744 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6745 gimplify_assign (addr, t, pre_p);
6747 for (i = 0; i < XVECLEN (container, 0); i++)
6749 rtx slot = XVECEXP (container, 0, i);
6750 rtx reg = XEXP (slot, 0);
6751 enum machine_mode mode = GET_MODE (reg);
6752 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6753 tree addr_type = build_pointer_type (piece_type);
6754 tree daddr_type = build_pointer_type_for_mode (piece_type,
6758 tree dest_addr, dest;
6760 if (SSE_REGNO_P (REGNO (reg)))
6762 src_addr = sse_addr;
6763 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6767 src_addr = int_addr;
6768 src_offset = REGNO (reg) * 8;
6770 src_addr = fold_convert (addr_type, src_addr);
6771 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6772 size_int (src_offset));
6773 src = build_va_arg_indirect_ref (src_addr);
6775 dest_addr = fold_convert (daddr_type, addr);
6776 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
6777 size_int (INTVAL (XEXP (slot, 1))));
6778 dest = build_va_arg_indirect_ref (dest_addr);
6780 gimplify_assign (dest, src, pre_p);
6786 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6787 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6788 gimplify_assign (gpr, t, pre_p);
6793 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6794 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6795 gimplify_assign (fpr, t, pre_p);
6798 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6800 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6803 /* ... otherwise out of the overflow area. */
6805 /* When we align parameter on stack for caller, if the parameter
6806 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6807 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6808 here with caller. */
6809 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6810 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6811 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6813 /* Care for on-stack alignment if needed. */
6814 if (arg_boundary <= 64
6815 || integer_zerop (TYPE_SIZE (type)))
6819 HOST_WIDE_INT align = arg_boundary / 8;
6820 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6821 size_int (align - 1));
6822 t = fold_convert (sizetype, t);
6823 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6825 t = fold_convert (TREE_TYPE (ovf), t);
6827 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6828 gimplify_assign (addr, t, pre_p);
6830 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6831 size_int (rsize * UNITS_PER_WORD));
6832 gimplify_assign (unshare_expr (ovf), t, pre_p);
6835 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6837 ptrtype = build_pointer_type (type);
6838 addr = fold_convert (ptrtype, addr);
6841 addr = build_va_arg_indirect_ref (addr);
6842 return build_va_arg_indirect_ref (addr);
6845 /* Return nonzero if OPNUM's MEM should be matched
6846 in movabs* patterns. */
6849 ix86_check_movabs (rtx insn, int opnum)
6853 set = PATTERN (insn);
6854 if (GET_CODE (set) == PARALLEL)
6855 set = XVECEXP (set, 0, 0);
6856 gcc_assert (GET_CODE (set) == SET);
6857 mem = XEXP (set, opnum);
6858 while (GET_CODE (mem) == SUBREG)
6859 mem = SUBREG_REG (mem);
6860 gcc_assert (MEM_P (mem));
6861 return (volatile_ok || !MEM_VOLATILE_P (mem));
6864 /* Initialize the table of extra 80387 mathematical constants. */
6867 init_ext_80387_constants (void)
6869 static const char * cst[5] =
6871 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6872 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6873 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6874 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6875 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6879 for (i = 0; i < 5; i++)
6881 real_from_string (&ext_80387_constants_table[i], cst[i]);
6882 /* Ensure each constant is rounded to XFmode precision. */
6883 real_convert (&ext_80387_constants_table[i],
6884 XFmode, &ext_80387_constants_table[i]);
6887 ext_80387_constants_init = 1;
6890 /* Return true if the constant is something that can be loaded with
6891 a special instruction. */
6894 standard_80387_constant_p (rtx x)
6896 enum machine_mode mode = GET_MODE (x);
6900 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6903 if (x == CONST0_RTX (mode))
6905 if (x == CONST1_RTX (mode))
6908 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6910 /* For XFmode constants, try to find a special 80387 instruction when
6911 optimizing for size or on those CPUs that benefit from them. */
6913 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6917 if (! ext_80387_constants_init)
6918 init_ext_80387_constants ();
6920 for (i = 0; i < 5; i++)
6921 if (real_identical (&r, &ext_80387_constants_table[i]))
6925 /* Load of the constant -0.0 or -1.0 will be split as
6926 fldz;fchs or fld1;fchs sequence. */
6927 if (real_isnegzero (&r))
6929 if (real_identical (&r, &dconstm1))
6935 /* Return the opcode of the special instruction to be used to load
6939 standard_80387_constant_opcode (rtx x)
6941 switch (standard_80387_constant_p (x))
6965 /* Return the CONST_DOUBLE representing the 80387 constant that is
6966 loaded by the specified special instruction. The argument IDX
6967 matches the return value from standard_80387_constant_p. */
6970 standard_80387_constant_rtx (int idx)
6974 if (! ext_80387_constants_init)
6975 init_ext_80387_constants ();
6991 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6995 /* Return 1 if mode is a valid mode for sse. */
6997 standard_sse_mode_p (enum machine_mode mode)
7014 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7015 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7016 modes and AVX is enabled. */
7019 standard_sse_constant_p (rtx x)
7021 enum machine_mode mode = GET_MODE (x);
7023 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7025 if (vector_all_ones_operand (x, mode))
7027 if (standard_sse_mode_p (mode))
7028 return TARGET_SSE2 ? 2 : -2;
7029 else if (VALID_AVX256_REG_MODE (mode))
7030 return TARGET_AVX ? 3 : -3;
7036 /* Return the opcode of the special instruction to be used to load
7040 standard_sse_constant_opcode (rtx insn, rtx x)
7042 switch (standard_sse_constant_p (x))
7045 switch (get_attr_mode (insn))
7048 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7050 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7052 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7054 return "vxorps\t%x0, %x0, %x0";
7056 return "vxorpd\t%x0, %x0, %x0";
7058 return "vpxor\t%x0, %x0, %x0";
7064 switch (get_attr_mode (insn))
7069 return "vpcmpeqd\t%0, %0, %0";
7075 return "pcmpeqd\t%0, %0";
7080 /* Returns 1 if OP contains a symbol reference */
7083 symbolic_reference_mentioned_p (rtx op)
7088 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7091 fmt = GET_RTX_FORMAT (GET_CODE (op));
7092 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7098 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7099 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7103 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7110 /* Return 1 if it is appropriate to emit `ret' instructions in the
7111 body of a function. Do this only if the epilogue is simple, needing a
7112 couple of insns. Prior to reloading, we can't tell how many registers
7113 must be saved, so return 0 then. Return 0 if there is no frame
7114 marker to de-allocate. */
7117 ix86_can_use_return_insn_p (void)
7119 struct ix86_frame frame;
7121 if (! reload_completed || frame_pointer_needed)
7124 /* Don't allow more than 32 pop, since that's all we can do
7125 with one instruction. */
7126 if (crtl->args.pops_args
7127 && crtl->args.size >= 32768)
7130 ix86_compute_frame_layout (&frame);
7131 return frame.to_allocate == 0 && frame.nregs == 0;
7134 /* Value should be nonzero if functions must have frame pointers.
7135 Zero means the frame pointer need not be set up (and parms may
7136 be accessed via the stack pointer) in functions that seem suitable. */
7139 ix86_frame_pointer_required (void)
7141 /* If we accessed previous frames, then the generated code expects
7142 to be able to access the saved ebp value in our frame. */
7143 if (cfun->machine->accesses_prev_frame)
7146 /* Several x86 os'es need a frame pointer for other reasons,
7147 usually pertaining to setjmp. */
7148 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7151 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7152 the frame pointer by default. Turn it back on now if we've not
7153 got a leaf function. */
7154 if (TARGET_OMIT_LEAF_FRAME_POINTER
7155 && (!current_function_is_leaf
7156 || ix86_current_function_calls_tls_descriptor))
7165 /* Record that the current function accesses previous call frames. */
7168 ix86_setup_frame_addresses (void)
7170 cfun->machine->accesses_prev_frame = 1;
7173 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7174 # define USE_HIDDEN_LINKONCE 1
7176 # define USE_HIDDEN_LINKONCE 0
7179 static int pic_labels_used;
7181 /* Fills in the label name that should be used for a pc thunk for
7182 the given register. */
7185 get_pc_thunk_name (char name[32], unsigned int regno)
7187 gcc_assert (!TARGET_64BIT);
7189 if (USE_HIDDEN_LINKONCE)
7190 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7192 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7196 /* This function generates code for -fpic that loads %ebx with
7197 the return address of the caller and then returns. */
7200 ix86_file_end (void)
7205 for (regno = 0; regno < 8; ++regno)
7209 if (! ((pic_labels_used >> regno) & 1))
7212 get_pc_thunk_name (name, regno);
7217 switch_to_section (darwin_sections[text_coal_section]);
7218 fputs ("\t.weak_definition\t", asm_out_file);
7219 assemble_name (asm_out_file, name);
7220 fputs ("\n\t.private_extern\t", asm_out_file);
7221 assemble_name (asm_out_file, name);
7222 fputs ("\n", asm_out_file);
7223 ASM_OUTPUT_LABEL (asm_out_file, name);
7227 if (USE_HIDDEN_LINKONCE)
7231 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7233 TREE_PUBLIC (decl) = 1;
7234 TREE_STATIC (decl) = 1;
7235 DECL_ONE_ONLY (decl) = 1;
7237 (*targetm.asm_out.unique_section) (decl, 0);
7238 switch_to_section (get_named_section (decl, NULL, 0));
7240 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7241 fputs ("\t.hidden\t", asm_out_file);
7242 assemble_name (asm_out_file, name);
7243 fputc ('\n', asm_out_file);
7244 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7248 switch_to_section (text_section);
7249 ASM_OUTPUT_LABEL (asm_out_file, name);
7252 xops[0] = gen_rtx_REG (Pmode, regno);
7253 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7254 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7255 output_asm_insn ("ret", xops);
7258 if (NEED_INDICATE_EXEC_STACK)
7259 file_end_indicate_exec_stack ();
7262 /* Emit code for the SET_GOT patterns. */
7265 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7271 if (TARGET_VXWORKS_RTP && flag_pic)
7273 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7274 xops[2] = gen_rtx_MEM (Pmode,
7275 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7276 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7278 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7279 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7280 an unadorned address. */
7281 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7282 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7283 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7287 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7289 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7291 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7294 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7296 output_asm_insn ("call\t%a2", xops);
7299 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7300 is what will be referenced by the Mach-O PIC subsystem. */
7302 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7305 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7306 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7309 output_asm_insn ("pop%z0\t%0", xops);
7314 get_pc_thunk_name (name, REGNO (dest));
7315 pic_labels_used |= 1 << REGNO (dest);
7317 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7318 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7319 output_asm_insn ("call\t%X2", xops);
7320 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7321 is what will be referenced by the Mach-O PIC subsystem. */
7324 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7326 targetm.asm_out.internal_label (asm_out_file, "L",
7327 CODE_LABEL_NUMBER (label));
7334 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7335 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7337 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7342 /* Generate an "push" pattern for input ARG. */
7347 return gen_rtx_SET (VOIDmode,
7349 gen_rtx_PRE_DEC (Pmode,
7350 stack_pointer_rtx)),
7354 /* Return >= 0 if there is an unused call-clobbered register available
7355 for the entire function. */
7358 ix86_select_alt_pic_regnum (void)
7360 if (current_function_is_leaf && !crtl->profile
7361 && !ix86_current_function_calls_tls_descriptor)
7364 /* Can't use the same register for both PIC and DRAP. */
7366 drap = REGNO (crtl->drap_reg);
7369 for (i = 2; i >= 0; --i)
7370 if (i != drap && !df_regs_ever_live_p (i))
7374 return INVALID_REGNUM;
7377 /* Return 1 if we need to save REGNO. */
7379 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7381 if (pic_offset_table_rtx
7382 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7383 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7385 || crtl->calls_eh_return
7386 || crtl->uses_const_pool))
7388 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7393 if (crtl->calls_eh_return && maybe_eh_return)
7398 unsigned test = EH_RETURN_DATA_REGNO (i);
7399 if (test == INVALID_REGNUM)
7407 && regno == REGNO (crtl->drap_reg))
7410 return (df_regs_ever_live_p (regno)
7411 && !call_used_regs[regno]
7412 && !fixed_regs[regno]
7413 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7416 /* Return number of registers to be saved on the stack. */
7419 ix86_nsaved_regs (void)
7424 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7425 if (ix86_save_reg (regno, true))
7430 /* Given FROM and TO register numbers, say whether this elimination is
7431 allowed. If stack alignment is needed, we can only replace argument
7432 pointer with hard frame pointer, or replace frame pointer with stack
7433 pointer. Otherwise, frame pointer elimination is automatically
7434 handled and all other eliminations are valid. */
7437 ix86_can_eliminate (int from, int to)
7439 if (stack_realign_fp)
7440 return ((from == ARG_POINTER_REGNUM
7441 && to == HARD_FRAME_POINTER_REGNUM)
7442 || (from == FRAME_POINTER_REGNUM
7443 && to == STACK_POINTER_REGNUM));
7445 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7448 /* Return the offset between two registers, one to be eliminated, and the other
7449 its replacement, at the start of a routine. */
7452 ix86_initial_elimination_offset (int from, int to)
7454 struct ix86_frame frame;
7455 ix86_compute_frame_layout (&frame);
7457 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7458 return frame.hard_frame_pointer_offset;
7459 else if (from == FRAME_POINTER_REGNUM
7460 && to == HARD_FRAME_POINTER_REGNUM)
7461 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7464 gcc_assert (to == STACK_POINTER_REGNUM);
7466 if (from == ARG_POINTER_REGNUM)
7467 return frame.stack_pointer_offset;
7469 gcc_assert (from == FRAME_POINTER_REGNUM);
7470 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7474 /* Fill structure ix86_frame about frame of currently computed function. */
7477 ix86_compute_frame_layout (struct ix86_frame *frame)
7479 HOST_WIDE_INT total_size;
7480 unsigned int stack_alignment_needed;
7481 HOST_WIDE_INT offset;
7482 unsigned int preferred_alignment;
7483 HOST_WIDE_INT size = get_frame_size ();
7485 frame->nregs = ix86_nsaved_regs ();
7488 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7489 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7491 gcc_assert (!size || stack_alignment_needed);
7492 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7493 gcc_assert (preferred_alignment <= stack_alignment_needed);
7495 /* During reload iteration the amount of registers saved can change.
7496 Recompute the value as needed. Do not recompute when amount of registers
7497 didn't change as reload does multiple calls to the function and does not
7498 expect the decision to change within single iteration. */
7499 if (!optimize_function_for_size_p (cfun)
7500 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7502 int count = frame->nregs;
7504 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7505 /* The fast prologue uses move instead of push to save registers. This
7506 is significantly longer, but also executes faster as modern hardware
7507 can execute the moves in parallel, but can't do that for push/pop.
7509 Be careful about choosing what prologue to emit: When function takes
7510 many instructions to execute we may use slow version as well as in
7511 case function is known to be outside hot spot (this is known with
7512 feedback only). Weight the size of function by number of registers
7513 to save as it is cheap to use one or two push instructions but very
7514 slow to use many of them. */
7516 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7517 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7518 || (flag_branch_probabilities
7519 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7520 cfun->machine->use_fast_prologue_epilogue = false;
7522 cfun->machine->use_fast_prologue_epilogue
7523 = !expensive_function_p (count);
7525 if (TARGET_PROLOGUE_USING_MOVE
7526 && cfun->machine->use_fast_prologue_epilogue)
7527 frame->save_regs_using_mov = true;
7529 frame->save_regs_using_mov = false;
7532 /* Skip return address and saved base pointer. */
7533 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7535 frame->hard_frame_pointer_offset = offset;
7537 /* Set offset to aligned because the realigned frame starts from
7539 if (stack_realign_fp)
7540 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7542 /* Register save area */
7543 offset += frame->nregs * UNITS_PER_WORD;
7546 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7547 offset += frame->va_arg_size;
7549 /* Align start of frame for local function. */
7550 frame->padding1 = ((offset + stack_alignment_needed - 1)
7551 & -stack_alignment_needed) - offset;
7553 offset += frame->padding1;
7555 /* Frame pointer points here. */
7556 frame->frame_pointer_offset = offset;
7560 /* Add outgoing arguments area. Can be skipped if we eliminated
7561 all the function calls as dead code.
7562 Skipping is however impossible when function calls alloca. Alloca
7563 expander assumes that last crtl->outgoing_args_size
7564 of stack frame are unused. */
7565 if (ACCUMULATE_OUTGOING_ARGS
7566 && (!current_function_is_leaf || cfun->calls_alloca
7567 || ix86_current_function_calls_tls_descriptor))
7569 offset += crtl->outgoing_args_size;
7570 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7573 frame->outgoing_arguments_size = 0;
7575 /* Align stack boundary. Only needed if we're calling another function
7577 if (!current_function_is_leaf || cfun->calls_alloca
7578 || ix86_current_function_calls_tls_descriptor)
7579 frame->padding2 = ((offset + preferred_alignment - 1)
7580 & -preferred_alignment) - offset;
7582 frame->padding2 = 0;
7584 offset += frame->padding2;
7586 /* We've reached end of stack frame. */
7587 frame->stack_pointer_offset = offset;
7589 /* Size prologue needs to allocate. */
7590 frame->to_allocate =
7591 (size + frame->padding1 + frame->padding2
7592 + frame->outgoing_arguments_size + frame->va_arg_size);
7594 if ((!frame->to_allocate && frame->nregs <= 1)
7595 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7596 frame->save_regs_using_mov = false;
7598 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7599 && current_function_is_leaf
7600 && !ix86_current_function_calls_tls_descriptor)
7602 frame->red_zone_size = frame->to_allocate;
7603 if (frame->save_regs_using_mov)
7604 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7605 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7606 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7609 frame->red_zone_size = 0;
7610 frame->to_allocate -= frame->red_zone_size;
7611 frame->stack_pointer_offset -= frame->red_zone_size;
7613 fprintf (stderr, "\n");
7614 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7615 fprintf (stderr, "size: %ld\n", (long)size);
7616 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7617 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7618 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7619 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7620 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7621 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7622 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7623 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7624 (long)frame->hard_frame_pointer_offset);
7625 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7626 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7627 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7628 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7632 /* Emit code to save registers in the prologue. */
7635 ix86_emit_save_regs (void)
7640 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7641 if (ix86_save_reg (regno, true))
7643 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7644 RTX_FRAME_RELATED_P (insn) = 1;
7648 /* Emit code to save registers using MOV insns. First register
7649 is restored from POINTER + OFFSET. */
7651 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7656 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7657 if (ix86_save_reg (regno, true))
7659 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7661 gen_rtx_REG (Pmode, regno));
7662 RTX_FRAME_RELATED_P (insn) = 1;
7663 offset += UNITS_PER_WORD;
7667 /* Expand prologue or epilogue stack adjustment.
7668 The pattern exist to put a dependency on all ebp-based memory accesses.
7669 STYLE should be negative if instructions should be marked as frame related,
7670 zero if %r11 register is live and cannot be freely used and positive
7674 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7679 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7680 else if (x86_64_immediate_operand (offset, DImode))
7681 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7685 /* r11 is used by indirect sibcall return as well, set before the
7686 epilogue and used after the epilogue. ATM indirect sibcall
7687 shouldn't be used together with huge frame sizes in one
7688 function because of the frame_size check in sibcall.c. */
7690 r11 = gen_rtx_REG (DImode, R11_REG);
7691 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7693 RTX_FRAME_RELATED_P (insn) = 1;
7694 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7698 RTX_FRAME_RELATED_P (insn) = 1;
7701 /* Find an available register to be used as dynamic realign argument
7702 pointer regsiter. Such a register will be written in prologue and
7703 used in begin of body, so it must not be
7704 1. parameter passing register.
7706 We reuse static-chain register if it is available. Otherwise, we
7707 use DI for i386 and R13 for x86-64. We chose R13 since it has
7710 Return: the regno of chosen register. */
7713 find_drap_reg (void)
7715 tree decl = cfun->decl;
7719 /* Use R13 for nested function or function need static chain.
7720 Since function with tail call may use any caller-saved
7721 registers in epilogue, DRAP must not use caller-saved
7722 register in such case. */
7723 if ((decl_function_context (decl)
7724 && !DECL_NO_STATIC_CHAIN (decl))
7725 || crtl->tail_call_emit)
7732 /* Use DI for nested function or function need static chain.
7733 Since function with tail call may use any caller-saved
7734 registers in epilogue, DRAP must not use caller-saved
7735 register in such case. */
7736 if ((decl_function_context (decl)
7737 && !DECL_NO_STATIC_CHAIN (decl))
7738 || crtl->tail_call_emit)
7741 /* Reuse static chain register if it isn't used for parameter
7743 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7744 && !lookup_attribute ("fastcall",
7745 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7752 /* Update incoming stack boundary and estimated stack alignment. */
7755 ix86_update_stack_boundary (void)
7757 /* Prefer the one specified at command line. */
7758 ix86_incoming_stack_boundary
7759 = (ix86_user_incoming_stack_boundary
7760 ? ix86_user_incoming_stack_boundary
7761 : ix86_default_incoming_stack_boundary);
7763 /* Incoming stack alignment can be changed on individual functions
7764 via force_align_arg_pointer attribute. We use the smallest
7765 incoming stack boundary. */
7766 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7767 && lookup_attribute (ix86_force_align_arg_pointer_string,
7768 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7769 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7771 /* The incoming stack frame has to be aligned at least at
7772 parm_stack_boundary. */
7773 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
7774 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
7776 /* Stack at entrance of main is aligned by runtime. We use the
7777 smallest incoming stack boundary. */
7778 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7779 && DECL_NAME (current_function_decl)
7780 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7781 && DECL_FILE_SCOPE_P (current_function_decl))
7782 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7784 /* x86_64 vararg needs 16byte stack alignment for register save
7788 && crtl->stack_alignment_estimated < 128)
7789 crtl->stack_alignment_estimated = 128;
7792 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7793 needed or an rtx for DRAP otherwise. */
7796 ix86_get_drap_rtx (void)
7798 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7799 crtl->need_drap = true;
7801 if (stack_realign_drap)
7803 /* Assign DRAP to vDRAP and returns vDRAP */
7804 unsigned int regno = find_drap_reg ();
7809 arg_ptr = gen_rtx_REG (Pmode, regno);
7810 crtl->drap_reg = arg_ptr;
7813 drap_vreg = copy_to_reg (arg_ptr);
7817 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7818 RTX_FRAME_RELATED_P (insn) = 1;
7825 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7828 ix86_internal_arg_pointer (void)
7830 return virtual_incoming_args_rtx;
7833 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7834 This is called from dwarf2out.c to emit call frame instructions
7835 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7837 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7839 rtx unspec = SET_SRC (pattern);
7840 gcc_assert (GET_CODE (unspec) == UNSPEC);
7844 case UNSPEC_REG_SAVE:
7845 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7846 SET_DEST (pattern));
7848 case UNSPEC_DEF_CFA:
7849 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7850 INTVAL (XVECEXP (unspec, 0, 0)));
7857 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7858 to be generated in correct form. */
7860 ix86_finalize_stack_realign_flags (void)
7862 /* Check if stack realign is really needed after reload, and
7863 stores result in cfun */
7864 unsigned int incoming_stack_boundary
7865 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7866 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7867 unsigned int stack_realign = (incoming_stack_boundary
7868 < (current_function_is_leaf
7869 ? crtl->max_used_stack_slot_alignment
7870 : crtl->stack_alignment_needed));
7872 if (crtl->stack_realign_finalized)
7874 /* After stack_realign_needed is finalized, we can't no longer
7876 gcc_assert (crtl->stack_realign_needed == stack_realign);
7880 crtl->stack_realign_needed = stack_realign;
7881 crtl->stack_realign_finalized = true;
7885 /* Expand the prologue into a bunch of separate insns. */
7888 ix86_expand_prologue (void)
7892 struct ix86_frame frame;
7893 HOST_WIDE_INT allocate;
7895 ix86_finalize_stack_realign_flags ();
7897 /* DRAP should not coexist with stack_realign_fp */
7898 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7900 ix86_compute_frame_layout (&frame);
7902 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7903 of DRAP is needed and stack realignment is really needed after reload */
7904 if (crtl->drap_reg && crtl->stack_realign_needed)
7907 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7908 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7909 ? 0 : UNITS_PER_WORD);
7911 gcc_assert (stack_realign_drap);
7913 /* Grab the argument pointer. */
7914 x = plus_constant (stack_pointer_rtx,
7915 (UNITS_PER_WORD + param_ptr_offset));
7918 /* Only need to push parameter pointer reg if it is caller
7920 if (!call_used_regs[REGNO (crtl->drap_reg)])
7922 /* Push arg pointer reg */
7923 insn = emit_insn (gen_push (y));
7924 RTX_FRAME_RELATED_P (insn) = 1;
7927 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7928 RTX_FRAME_RELATED_P (insn) = 1;
7930 /* Align the stack. */
7931 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7933 GEN_INT (-align_bytes)));
7934 RTX_FRAME_RELATED_P (insn) = 1;
7936 /* Replicate the return address on the stack so that return
7937 address can be reached via (argp - 1) slot. This is needed
7938 to implement macro RETURN_ADDR_RTX and intrinsic function
7939 expand_builtin_return_addr etc. */
7941 x = gen_frame_mem (Pmode,
7942 plus_constant (x, -UNITS_PER_WORD));
7943 insn = emit_insn (gen_push (x));
7944 RTX_FRAME_RELATED_P (insn) = 1;
7947 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7948 slower on all targets. Also sdb doesn't like it. */
7950 if (frame_pointer_needed)
7952 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7953 RTX_FRAME_RELATED_P (insn) = 1;
7955 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7956 RTX_FRAME_RELATED_P (insn) = 1;
7959 if (stack_realign_fp)
7961 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7962 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7964 /* Align the stack. */
7965 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7967 GEN_INT (-align_bytes)));
7968 RTX_FRAME_RELATED_P (insn) = 1;
7971 allocate = frame.to_allocate;
7973 if (!frame.save_regs_using_mov)
7974 ix86_emit_save_regs ();
7976 allocate += frame.nregs * UNITS_PER_WORD;
7978 /* When using red zone we may start register saving before allocating
7979 the stack frame saving one cycle of the prologue. However I will
7980 avoid doing this if I am going to have to probe the stack since
7981 at least on x86_64 the stack probe can turn into a call that clobbers
7982 a red zone location */
7983 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7984 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7985 ix86_emit_save_regs_using_mov ((frame_pointer_needed
7986 && !crtl->stack_realign_needed)
7987 ? hard_frame_pointer_rtx
7988 : stack_pointer_rtx,
7989 -frame.nregs * UNITS_PER_WORD);
7993 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7994 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7995 GEN_INT (-allocate), -1);
7998 /* Only valid for Win32. */
7999 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8003 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8005 if (cfun->machine->call_abi == MS_ABI)
8008 eax_live = ix86_eax_live_at_start_p ();
8012 emit_insn (gen_push (eax));
8013 allocate -= UNITS_PER_WORD;
8016 emit_move_insn (eax, GEN_INT (allocate));
8019 insn = gen_allocate_stack_worker_64 (eax, eax);
8021 insn = gen_allocate_stack_worker_32 (eax, eax);
8022 insn = emit_insn (insn);
8023 RTX_FRAME_RELATED_P (insn) = 1;
8024 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8025 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8026 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8027 t, REG_NOTES (insn));
8031 if (frame_pointer_needed)
8032 t = plus_constant (hard_frame_pointer_rtx,
8035 - frame.nregs * UNITS_PER_WORD);
8037 t = plus_constant (stack_pointer_rtx, allocate);
8038 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8042 if (frame.save_regs_using_mov
8043 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8044 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8046 if (!frame_pointer_needed
8047 || !frame.to_allocate
8048 || crtl->stack_realign_needed)
8049 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8052 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8053 -frame.nregs * UNITS_PER_WORD);
8056 pic_reg_used = false;
8057 if (pic_offset_table_rtx
8058 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8061 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8063 if (alt_pic_reg_used != INVALID_REGNUM)
8064 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8066 pic_reg_used = true;
8073 if (ix86_cmodel == CM_LARGE_PIC)
8075 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8076 rtx label = gen_label_rtx ();
8078 LABEL_PRESERVE_P (label) = 1;
8079 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8080 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8081 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8082 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8083 pic_offset_table_rtx, tmp_reg));
8086 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8089 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8092 /* Prevent function calls from being scheduled before the call to mcount.
8093 In the pic_reg_used case, make sure that the got load isn't deleted. */
8097 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8098 emit_insn (gen_blockage ());
8101 if (crtl->drap_reg && !crtl->stack_realign_needed)
8103 /* vDRAP is setup but after reload it turns out stack realign
8104 isn't necessary, here we will emit prologue to setup DRAP
8105 without stack realign adjustment */
8106 int drap_bp_offset = UNITS_PER_WORD * 2;
8107 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8108 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8111 /* Emit cld instruction if stringops are used in the function. */
8112 if (TARGET_CLD && ix86_current_function_needs_cld)
8113 emit_insn (gen_cld ());
8116 /* Emit code to restore saved registers using MOV insns. First register
8117 is restored from POINTER + OFFSET. */
8119 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8120 int maybe_eh_return)
8123 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8125 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8126 if (ix86_save_reg (regno, maybe_eh_return))
8128 /* Ensure that adjust_address won't be forced to produce pointer
8129 out of range allowed by x86-64 instruction set. */
8130 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8134 r11 = gen_rtx_REG (DImode, R11_REG);
8135 emit_move_insn (r11, GEN_INT (offset));
8136 emit_insn (gen_adddi3 (r11, r11, pointer));
8137 base_address = gen_rtx_MEM (Pmode, r11);
8140 emit_move_insn (gen_rtx_REG (Pmode, regno),
8141 adjust_address (base_address, Pmode, offset));
8142 offset += UNITS_PER_WORD;
8146 /* Restore function stack, frame, and registers. */
8149 ix86_expand_epilogue (int style)
8153 struct ix86_frame frame;
8154 HOST_WIDE_INT offset;
8156 ix86_finalize_stack_realign_flags ();
8158 /* When stack is realigned, SP must be valid. */
8159 sp_valid = (!frame_pointer_needed
8160 || current_function_sp_is_unchanging
8161 || stack_realign_fp);
8163 ix86_compute_frame_layout (&frame);
8165 /* Calculate start of saved registers relative to ebp. Special care
8166 must be taken for the normal return case of a function using
8167 eh_return: the eax and edx registers are marked as saved, but not
8168 restored along this path. */
8169 offset = frame.nregs;
8170 if (crtl->calls_eh_return && style != 2)
8172 offset *= -UNITS_PER_WORD;
8174 /* If we're only restoring one register and sp is not valid then
8175 using a move instruction to restore the register since it's
8176 less work than reloading sp and popping the register.
8178 The default code result in stack adjustment using add/lea instruction,
8179 while this code results in LEAVE instruction (or discrete equivalent),
8180 so it is profitable in some other cases as well. Especially when there
8181 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8182 and there is exactly one register to pop. This heuristic may need some
8183 tuning in future. */
8184 if ((!sp_valid && frame.nregs <= 1)
8185 || (TARGET_EPILOGUE_USING_MOVE
8186 && cfun->machine->use_fast_prologue_epilogue
8187 && (frame.nregs > 1 || frame.to_allocate))
8188 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
8189 || (frame_pointer_needed && TARGET_USE_LEAVE
8190 && cfun->machine->use_fast_prologue_epilogue
8191 && frame.nregs == 1)
8192 || crtl->calls_eh_return)
8194 /* Restore registers. We can use ebp or esp to address the memory
8195 locations. If both are available, default to ebp, since offsets
8196 are known to be small. Only exception is esp pointing directly
8197 to the end of block of saved registers, where we may simplify
8200 If we are realigning stack with bp and sp, regs restore can't
8201 be addressed by bp. sp must be used instead. */
8203 if (!frame_pointer_needed
8204 || (sp_valid && !frame.to_allocate)
8205 || stack_realign_fp)
8206 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8207 frame.to_allocate, style == 2);
8209 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8210 offset, style == 2);
8212 /* eh_return epilogues need %ecx added to the stack pointer. */
8215 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8217 /* Stack align doesn't work with eh_return. */
8218 gcc_assert (!crtl->stack_realign_needed);
8220 if (frame_pointer_needed)
8222 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8223 tmp = plus_constant (tmp, UNITS_PER_WORD);
8224 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8226 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8227 emit_move_insn (hard_frame_pointer_rtx, tmp);
8229 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8234 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8235 tmp = plus_constant (tmp, (frame.to_allocate
8236 + frame.nregs * UNITS_PER_WORD));
8237 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8240 else if (!frame_pointer_needed)
8241 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8242 GEN_INT (frame.to_allocate
8243 + frame.nregs * UNITS_PER_WORD),
8245 /* If not an i386, mov & pop is faster than "leave". */
8246 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8247 || !cfun->machine->use_fast_prologue_epilogue)
8248 emit_insn ((*ix86_gen_leave) ());
8251 pro_epilogue_adjust_stack (stack_pointer_rtx,
8252 hard_frame_pointer_rtx,
8255 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8260 /* First step is to deallocate the stack frame so that we can
8263 If we realign stack with frame pointer, then stack pointer
8264 won't be able to recover via lea $offset(%bp), %sp, because
8265 there is a padding area between bp and sp for realign.
8266 "add $to_allocate, %sp" must be used instead. */
8269 gcc_assert (frame_pointer_needed);
8270 gcc_assert (!stack_realign_fp);
8271 pro_epilogue_adjust_stack (stack_pointer_rtx,
8272 hard_frame_pointer_rtx,
8273 GEN_INT (offset), style);
8275 else if (frame.to_allocate)
8276 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8277 GEN_INT (frame.to_allocate), style);
8279 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8280 if (ix86_save_reg (regno, false))
8281 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8282 if (frame_pointer_needed)
8284 /* Leave results in shorter dependency chains on CPUs that are
8285 able to grok it fast. */
8286 if (TARGET_USE_LEAVE)
8287 emit_insn ((*ix86_gen_leave) ());
8290 /* For stack realigned really happens, recover stack
8291 pointer to hard frame pointer is a must, if not using
8293 if (stack_realign_fp)
8294 pro_epilogue_adjust_stack (stack_pointer_rtx,
8295 hard_frame_pointer_rtx,
8297 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8302 if (crtl->drap_reg && crtl->stack_realign_needed)
8304 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8305 ? 0 : UNITS_PER_WORD);
8306 gcc_assert (stack_realign_drap);
8307 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8309 GEN_INT (-(UNITS_PER_WORD
8310 + param_ptr_offset))));
8311 if (!call_used_regs[REGNO (crtl->drap_reg)])
8312 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8316 /* Sibcall epilogues don't want a return instruction. */
8320 if (crtl->args.pops_args && crtl->args.size)
8322 rtx popc = GEN_INT (crtl->args.pops_args);
8324 /* i386 can only pop 64K bytes. If asked to pop more, pop
8325 return address, do explicit add, and jump indirectly to the
8328 if (crtl->args.pops_args >= 65536)
8330 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8332 /* There is no "pascal" calling convention in any 64bit ABI. */
8333 gcc_assert (!TARGET_64BIT);
8335 emit_insn (gen_popsi1 (ecx));
8336 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8337 emit_jump_insn (gen_return_indirect_internal (ecx));
8340 emit_jump_insn (gen_return_pop_internal (popc));
8343 emit_jump_insn (gen_return_internal ());
8346 /* Reset from the function's potential modifications. */
8349 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8350 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8352 if (pic_offset_table_rtx)
8353 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8355 /* Mach-O doesn't support labels at the end of objects, so if
8356 it looks like we might want one, insert a NOP. */
8358 rtx insn = get_last_insn ();
8361 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8362 insn = PREV_INSN (insn);
8366 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8367 fputs ("\tnop\n", file);
8373 /* Extract the parts of an RTL expression that is a valid memory address
8374 for an instruction. Return 0 if the structure of the address is
8375 grossly off. Return -1 if the address contains ASHIFT, so it is not
8376 strictly valid, but still used for computing length of lea instruction. */
8379 ix86_decompose_address (rtx addr, struct ix86_address *out)
8381 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8382 rtx base_reg, index_reg;
8383 HOST_WIDE_INT scale = 1;
8384 rtx scale_rtx = NULL_RTX;
8386 enum ix86_address_seg seg = SEG_DEFAULT;
8388 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8390 else if (GET_CODE (addr) == PLUS)
8400 addends[n++] = XEXP (op, 1);
8403 while (GET_CODE (op) == PLUS);
8408 for (i = n; i >= 0; --i)
8411 switch (GET_CODE (op))
8416 index = XEXP (op, 0);
8417 scale_rtx = XEXP (op, 1);
8421 if (XINT (op, 1) == UNSPEC_TP
8422 && TARGET_TLS_DIRECT_SEG_REFS
8423 && seg == SEG_DEFAULT)
8424 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8453 else if (GET_CODE (addr) == MULT)
8455 index = XEXP (addr, 0); /* index*scale */
8456 scale_rtx = XEXP (addr, 1);
8458 else if (GET_CODE (addr) == ASHIFT)
8462 /* We're called for lea too, which implements ashift on occasion. */
8463 index = XEXP (addr, 0);
8464 tmp = XEXP (addr, 1);
8465 if (!CONST_INT_P (tmp))
8467 scale = INTVAL (tmp);
8468 if ((unsigned HOST_WIDE_INT) scale > 3)
8474 disp = addr; /* displacement */
8476 /* Extract the integral value of scale. */
8479 if (!CONST_INT_P (scale_rtx))
8481 scale = INTVAL (scale_rtx);
8484 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8485 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8487 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8488 if (base_reg && index_reg && scale == 1
8489 && (index_reg == arg_pointer_rtx
8490 || index_reg == frame_pointer_rtx
8491 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8494 tmp = base, base = index, index = tmp;
8495 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8498 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8499 if ((base_reg == hard_frame_pointer_rtx
8500 || base_reg == frame_pointer_rtx
8501 || base_reg == arg_pointer_rtx) && !disp)
8504 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8505 Avoid this by transforming to [%esi+0].
8506 Reload calls address legitimization without cfun defined, so we need
8507 to test cfun for being non-NULL. */
8508 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8509 && base_reg && !index_reg && !disp
8511 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8514 /* Special case: encode reg+reg instead of reg*2. */
8515 if (!base && index && scale && scale == 2)
8516 base = index, base_reg = index_reg, scale = 1;
8518 /* Special case: scaling cannot be encoded without base or displacement. */
8519 if (!base && !disp && index && scale != 1)
8531 /* Return cost of the memory address x.
8532 For i386, it is better to use a complex address than let gcc copy
8533 the address into a reg and make a new pseudo. But not if the address
8534 requires to two regs - that would mean more pseudos with longer
8537 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8539 struct ix86_address parts;
8541 int ok = ix86_decompose_address (x, &parts);
8545 if (parts.base && GET_CODE (parts.base) == SUBREG)
8546 parts.base = SUBREG_REG (parts.base);
8547 if (parts.index && GET_CODE (parts.index) == SUBREG)
8548 parts.index = SUBREG_REG (parts.index);
8550 /* Attempt to minimize number of registers in the address. */
8552 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8554 && (!REG_P (parts.index)
8555 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8559 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8561 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8562 && parts.base != parts.index)
8565 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8566 since it's predecode logic can't detect the length of instructions
8567 and it degenerates to vector decoded. Increase cost of such
8568 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8569 to split such addresses or even refuse such addresses at all.
8571 Following addressing modes are affected:
8576 The first and last case may be avoidable by explicitly coding the zero in
8577 memory address, but I don't have AMD-K6 machine handy to check this
8581 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8582 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8583 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8589 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8590 this is used for to form addresses to local data when -fPIC is in
8594 darwin_local_data_pic (rtx disp)
8596 return (GET_CODE (disp) == UNSPEC
8597 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8600 /* Determine if a given RTX is a valid constant. We already know this
8601 satisfies CONSTANT_P. */
8604 legitimate_constant_p (rtx x)
8606 switch (GET_CODE (x))
8611 if (GET_CODE (x) == PLUS)
8613 if (!CONST_INT_P (XEXP (x, 1)))
8618 if (TARGET_MACHO && darwin_local_data_pic (x))
8621 /* Only some unspecs are valid as "constants". */
8622 if (GET_CODE (x) == UNSPEC)
8623 switch (XINT (x, 1))
8628 return TARGET_64BIT;
8631 x = XVECEXP (x, 0, 0);
8632 return (GET_CODE (x) == SYMBOL_REF
8633 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8635 x = XVECEXP (x, 0, 0);
8636 return (GET_CODE (x) == SYMBOL_REF
8637 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8642 /* We must have drilled down to a symbol. */
8643 if (GET_CODE (x) == LABEL_REF)
8645 if (GET_CODE (x) != SYMBOL_REF)
8650 /* TLS symbols are never valid. */
8651 if (SYMBOL_REF_TLS_MODEL (x))
8654 /* DLLIMPORT symbols are never valid. */
8655 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8656 && SYMBOL_REF_DLLIMPORT_P (x))
8661 if (GET_MODE (x) == TImode
8662 && x != CONST0_RTX (TImode)
8668 if (x == CONST0_RTX (GET_MODE (x)))
8676 /* Otherwise we handle everything else in the move patterns. */
8680 /* Determine if it's legal to put X into the constant pool. This
8681 is not possible for the address of thread-local symbols, which
8682 is checked above. */
8685 ix86_cannot_force_const_mem (rtx x)
8687 /* We can always put integral constants and vectors in memory. */
8688 switch (GET_CODE (x))
8698 return !legitimate_constant_p (x);
8701 /* Determine if a given RTX is a valid constant address. */
8704 constant_address_p (rtx x)
8706 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8709 /* Nonzero if the constant value X is a legitimate general operand
8710 when generating PIC code. It is given that flag_pic is on and
8711 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8714 legitimate_pic_operand_p (rtx x)
8718 switch (GET_CODE (x))
8721 inner = XEXP (x, 0);
8722 if (GET_CODE (inner) == PLUS
8723 && CONST_INT_P (XEXP (inner, 1)))
8724 inner = XEXP (inner, 0);
8726 /* Only some unspecs are valid as "constants". */
8727 if (GET_CODE (inner) == UNSPEC)
8728 switch (XINT (inner, 1))
8733 return TARGET_64BIT;
8735 x = XVECEXP (inner, 0, 0);
8736 return (GET_CODE (x) == SYMBOL_REF
8737 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8738 case UNSPEC_MACHOPIC_OFFSET:
8739 return legitimate_pic_address_disp_p (x);
8747 return legitimate_pic_address_disp_p (x);
8754 /* Determine if a given CONST RTX is a valid memory displacement
8758 legitimate_pic_address_disp_p (rtx disp)
8762 /* In 64bit mode we can allow direct addresses of symbols and labels
8763 when they are not dynamic symbols. */
8766 rtx op0 = disp, op1;
8768 switch (GET_CODE (disp))
8774 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8776 op0 = XEXP (XEXP (disp, 0), 0);
8777 op1 = XEXP (XEXP (disp, 0), 1);
8778 if (!CONST_INT_P (op1)
8779 || INTVAL (op1) >= 16*1024*1024
8780 || INTVAL (op1) < -16*1024*1024)
8782 if (GET_CODE (op0) == LABEL_REF)
8784 if (GET_CODE (op0) != SYMBOL_REF)
8789 /* TLS references should always be enclosed in UNSPEC. */
8790 if (SYMBOL_REF_TLS_MODEL (op0))
8792 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8793 && ix86_cmodel != CM_LARGE_PIC)
8801 if (GET_CODE (disp) != CONST)
8803 disp = XEXP (disp, 0);
8807 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8808 of GOT tables. We should not need these anyway. */
8809 if (GET_CODE (disp) != UNSPEC
8810 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8811 && XINT (disp, 1) != UNSPEC_GOTOFF
8812 && XINT (disp, 1) != UNSPEC_PLTOFF))
8815 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8816 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8822 if (GET_CODE (disp) == PLUS)
8824 if (!CONST_INT_P (XEXP (disp, 1)))
8826 disp = XEXP (disp, 0);
8830 if (TARGET_MACHO && darwin_local_data_pic (disp))
8833 if (GET_CODE (disp) != UNSPEC)
8836 switch (XINT (disp, 1))
8841 /* We need to check for both symbols and labels because VxWorks loads
8842 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8844 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8845 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8847 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8848 While ABI specify also 32bit relocation but we don't produce it in
8849 small PIC model at all. */
8850 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8851 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8853 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8855 case UNSPEC_GOTTPOFF:
8856 case UNSPEC_GOTNTPOFF:
8857 case UNSPEC_INDNTPOFF:
8860 disp = XVECEXP (disp, 0, 0);
8861 return (GET_CODE (disp) == SYMBOL_REF
8862 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8864 disp = XVECEXP (disp, 0, 0);
8865 return (GET_CODE (disp) == SYMBOL_REF
8866 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8868 disp = XVECEXP (disp, 0, 0);
8869 return (GET_CODE (disp) == SYMBOL_REF
8870 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8876 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8877 memory address for an instruction. The MODE argument is the machine mode
8878 for the MEM expression that wants to use this address.
8880 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8881 convert common non-canonical forms to canonical form so that they will
8885 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8886 rtx addr, int strict)
8888 struct ix86_address parts;
8889 rtx base, index, disp;
8890 HOST_WIDE_INT scale;
8891 const char *reason = NULL;
8892 rtx reason_rtx = NULL_RTX;
8894 if (ix86_decompose_address (addr, &parts) <= 0)
8896 reason = "decomposition failed";
8901 index = parts.index;
8903 scale = parts.scale;
8905 /* Validate base register.
8907 Don't allow SUBREG's that span more than a word here. It can lead to spill
8908 failures when the base is one word out of a two word structure, which is
8909 represented internally as a DImode int. */
8918 else if (GET_CODE (base) == SUBREG
8919 && REG_P (SUBREG_REG (base))
8920 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8922 reg = SUBREG_REG (base);
8925 reason = "base is not a register";
8929 if (GET_MODE (base) != Pmode)
8931 reason = "base is not in Pmode";
8935 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8936 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8938 reason = "base is not valid";
8943 /* Validate index register.
8945 Don't allow SUBREG's that span more than a word here -- same as above. */
8954 else if (GET_CODE (index) == SUBREG
8955 && REG_P (SUBREG_REG (index))
8956 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8958 reg = SUBREG_REG (index);
8961 reason = "index is not a register";
8965 if (GET_MODE (index) != Pmode)
8967 reason = "index is not in Pmode";
8971 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8972 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8974 reason = "index is not valid";
8979 /* Validate scale factor. */
8982 reason_rtx = GEN_INT (scale);
8985 reason = "scale without index";
8989 if (scale != 2 && scale != 4 && scale != 8)
8991 reason = "scale is not a valid multiplier";
8996 /* Validate displacement. */
9001 if (GET_CODE (disp) == CONST
9002 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9003 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9004 switch (XINT (XEXP (disp, 0), 1))
9006 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9007 used. While ABI specify also 32bit relocations, we don't produce
9008 them at all and use IP relative instead. */
9011 gcc_assert (flag_pic);
9013 goto is_legitimate_pic;
9014 reason = "64bit address unspec";
9017 case UNSPEC_GOTPCREL:
9018 gcc_assert (flag_pic);
9019 goto is_legitimate_pic;
9021 case UNSPEC_GOTTPOFF:
9022 case UNSPEC_GOTNTPOFF:
9023 case UNSPEC_INDNTPOFF:
9029 reason = "invalid address unspec";
9033 else if (SYMBOLIC_CONST (disp)
9037 && MACHOPIC_INDIRECT
9038 && !machopic_operand_p (disp)
9044 if (TARGET_64BIT && (index || base))
9046 /* foo@dtpoff(%rX) is ok. */
9047 if (GET_CODE (disp) != CONST
9048 || GET_CODE (XEXP (disp, 0)) != PLUS
9049 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9050 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9051 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9052 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9054 reason = "non-constant pic memory reference";
9058 else if (! legitimate_pic_address_disp_p (disp))
9060 reason = "displacement is an invalid pic construct";
9064 /* This code used to verify that a symbolic pic displacement
9065 includes the pic_offset_table_rtx register.
9067 While this is good idea, unfortunately these constructs may
9068 be created by "adds using lea" optimization for incorrect
9077 This code is nonsensical, but results in addressing
9078 GOT table with pic_offset_table_rtx base. We can't
9079 just refuse it easily, since it gets matched by
9080 "addsi3" pattern, that later gets split to lea in the
9081 case output register differs from input. While this
9082 can be handled by separate addsi pattern for this case
9083 that never results in lea, this seems to be easier and
9084 correct fix for crash to disable this test. */
9086 else if (GET_CODE (disp) != LABEL_REF
9087 && !CONST_INT_P (disp)
9088 && (GET_CODE (disp) != CONST
9089 || !legitimate_constant_p (disp))
9090 && (GET_CODE (disp) != SYMBOL_REF
9091 || !legitimate_constant_p (disp)))
9093 reason = "displacement is not constant";
9096 else if (TARGET_64BIT
9097 && !x86_64_immediate_operand (disp, VOIDmode))
9099 reason = "displacement is out of range";
9104 /* Everything looks valid. */
9111 /* Return a unique alias set for the GOT. */
9113 static alias_set_type
9114 ix86_GOT_alias_set (void)
9116 static alias_set_type set = -1;
9118 set = new_alias_set ();
9122 /* Return a legitimate reference for ORIG (an address) using the
9123 register REG. If REG is 0, a new pseudo is generated.
9125 There are two types of references that must be handled:
9127 1. Global data references must load the address from the GOT, via
9128 the PIC reg. An insn is emitted to do this load, and the reg is
9131 2. Static data references, constant pool addresses, and code labels
9132 compute the address as an offset from the GOT, whose base is in
9133 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9134 differentiate them from global data objects. The returned
9135 address is the PIC reg + an unspec constant.
9137 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9138 reg also appears in the address. */
9141 legitimize_pic_address (rtx orig, rtx reg)
9148 if (TARGET_MACHO && !TARGET_64BIT)
9151 reg = gen_reg_rtx (Pmode);
9152 /* Use the generic Mach-O PIC machinery. */
9153 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9157 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9159 else if (TARGET_64BIT
9160 && ix86_cmodel != CM_SMALL_PIC
9161 && gotoff_operand (addr, Pmode))
9164 /* This symbol may be referenced via a displacement from the PIC
9165 base address (@GOTOFF). */
9167 if (reload_in_progress)
9168 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9169 if (GET_CODE (addr) == CONST)
9170 addr = XEXP (addr, 0);
9171 if (GET_CODE (addr) == PLUS)
9173 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9175 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9178 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9179 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9181 tmpreg = gen_reg_rtx (Pmode);
9184 emit_move_insn (tmpreg, new_rtx);
9188 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9189 tmpreg, 1, OPTAB_DIRECT);
9192 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9194 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9196 /* This symbol may be referenced via a displacement from the PIC
9197 base address (@GOTOFF). */
9199 if (reload_in_progress)
9200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9201 if (GET_CODE (addr) == CONST)
9202 addr = XEXP (addr, 0);
9203 if (GET_CODE (addr) == PLUS)
9205 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9207 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9210 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9211 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9212 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9216 emit_move_insn (reg, new_rtx);
9220 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9221 /* We can't use @GOTOFF for text labels on VxWorks;
9222 see gotoff_operand. */
9223 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9225 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9227 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9228 return legitimize_dllimport_symbol (addr, true);
9229 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9230 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9231 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9233 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9234 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9238 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9240 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9241 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9242 new_rtx = gen_const_mem (Pmode, new_rtx);
9243 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9246 reg = gen_reg_rtx (Pmode);
9247 /* Use directly gen_movsi, otherwise the address is loaded
9248 into register for CSE. We don't want to CSE this addresses,
9249 instead we CSE addresses from the GOT table, so skip this. */
9250 emit_insn (gen_movsi (reg, new_rtx));
9255 /* This symbol must be referenced via a load from the
9256 Global Offset Table (@GOT). */
9258 if (reload_in_progress)
9259 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9260 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9261 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9263 new_rtx = force_reg (Pmode, new_rtx);
9264 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9265 new_rtx = gen_const_mem (Pmode, new_rtx);
9266 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9269 reg = gen_reg_rtx (Pmode);
9270 emit_move_insn (reg, new_rtx);
9276 if (CONST_INT_P (addr)
9277 && !x86_64_immediate_operand (addr, VOIDmode))
9281 emit_move_insn (reg, addr);
9285 new_rtx = force_reg (Pmode, addr);
9287 else if (GET_CODE (addr) == CONST)
9289 addr = XEXP (addr, 0);
9291 /* We must match stuff we generate before. Assume the only
9292 unspecs that can get here are ours. Not that we could do
9293 anything with them anyway.... */
9294 if (GET_CODE (addr) == UNSPEC
9295 || (GET_CODE (addr) == PLUS
9296 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9298 gcc_assert (GET_CODE (addr) == PLUS);
9300 if (GET_CODE (addr) == PLUS)
9302 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9304 /* Check first to see if this is a constant offset from a @GOTOFF
9305 symbol reference. */
9306 if (gotoff_operand (op0, Pmode)
9307 && CONST_INT_P (op1))
9311 if (reload_in_progress)
9312 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9313 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9315 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9316 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9317 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9321 emit_move_insn (reg, new_rtx);
9327 if (INTVAL (op1) < -16*1024*1024
9328 || INTVAL (op1) >= 16*1024*1024)
9330 if (!x86_64_immediate_operand (op1, Pmode))
9331 op1 = force_reg (Pmode, op1);
9332 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9338 base = legitimize_pic_address (XEXP (addr, 0), reg);
9339 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9340 base == reg ? NULL_RTX : reg);
9342 if (CONST_INT_P (new_rtx))
9343 new_rtx = plus_constant (base, INTVAL (new_rtx));
9346 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9348 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9349 new_rtx = XEXP (new_rtx, 1);
9351 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9359 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9362 get_thread_pointer (int to_reg)
9366 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9370 reg = gen_reg_rtx (Pmode);
9371 insn = gen_rtx_SET (VOIDmode, reg, tp);
9372 insn = emit_insn (insn);
9377 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9378 false if we expect this to be used for a memory address and true if
9379 we expect to load the address into a register. */
9382 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9384 rtx dest, base, off, pic, tp;
9389 case TLS_MODEL_GLOBAL_DYNAMIC:
9390 dest = gen_reg_rtx (Pmode);
9391 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9393 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9395 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9398 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9399 insns = get_insns ();
9402 RTL_CONST_CALL_P (insns) = 1;
9403 emit_libcall_block (insns, dest, rax, x);
9405 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9406 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9408 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9410 if (TARGET_GNU2_TLS)
9412 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9414 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9418 case TLS_MODEL_LOCAL_DYNAMIC:
9419 base = gen_reg_rtx (Pmode);
9420 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9422 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9424 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9427 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9428 insns = get_insns ();
9431 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9432 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9433 RTL_CONST_CALL_P (insns) = 1;
9434 emit_libcall_block (insns, base, rax, note);
9436 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9437 emit_insn (gen_tls_local_dynamic_base_64 (base));
9439 emit_insn (gen_tls_local_dynamic_base_32 (base));
9441 if (TARGET_GNU2_TLS)
9443 rtx x = ix86_tls_module_base ();
9445 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9446 gen_rtx_MINUS (Pmode, x, tp));
9449 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9450 off = gen_rtx_CONST (Pmode, off);
9452 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9454 if (TARGET_GNU2_TLS)
9456 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9458 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9463 case TLS_MODEL_INITIAL_EXEC:
9467 type = UNSPEC_GOTNTPOFF;
9471 if (reload_in_progress)
9472 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9473 pic = pic_offset_table_rtx;
9474 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9476 else if (!TARGET_ANY_GNU_TLS)
9478 pic = gen_reg_rtx (Pmode);
9479 emit_insn (gen_set_got (pic));
9480 type = UNSPEC_GOTTPOFF;
9485 type = UNSPEC_INDNTPOFF;
9488 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9489 off = gen_rtx_CONST (Pmode, off);
9491 off = gen_rtx_PLUS (Pmode, pic, off);
9492 off = gen_const_mem (Pmode, off);
9493 set_mem_alias_set (off, ix86_GOT_alias_set ());
9495 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9497 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9498 off = force_reg (Pmode, off);
9499 return gen_rtx_PLUS (Pmode, base, off);
9503 base = get_thread_pointer (true);
9504 dest = gen_reg_rtx (Pmode);
9505 emit_insn (gen_subsi3 (dest, base, off));
9509 case TLS_MODEL_LOCAL_EXEC:
9510 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9511 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9512 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9513 off = gen_rtx_CONST (Pmode, off);
9515 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9517 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9518 return gen_rtx_PLUS (Pmode, base, off);
9522 base = get_thread_pointer (true);
9523 dest = gen_reg_rtx (Pmode);
9524 emit_insn (gen_subsi3 (dest, base, off));
9535 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9538 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9539 htab_t dllimport_map;
9542 get_dllimport_decl (tree decl)
9544 struct tree_map *h, in;
9548 size_t namelen, prefixlen;
9554 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9556 in.hash = htab_hash_pointer (decl);
9557 in.base.from = decl;
9558 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9559 h = (struct tree_map *) *loc;
9563 *loc = h = GGC_NEW (struct tree_map);
9565 h->base.from = decl;
9566 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9567 DECL_ARTIFICIAL (to) = 1;
9568 DECL_IGNORED_P (to) = 1;
9569 DECL_EXTERNAL (to) = 1;
9570 TREE_READONLY (to) = 1;
9572 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9573 name = targetm.strip_name_encoding (name);
9574 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9575 ? "*__imp_" : "*__imp__";
9576 namelen = strlen (name);
9577 prefixlen = strlen (prefix);
9578 imp_name = (char *) alloca (namelen + prefixlen + 1);
9579 memcpy (imp_name, prefix, prefixlen);
9580 memcpy (imp_name + prefixlen, name, namelen + 1);
9582 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9583 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9584 SET_SYMBOL_REF_DECL (rtl, to);
9585 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9587 rtl = gen_const_mem (Pmode, rtl);
9588 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9590 SET_DECL_RTL (to, rtl);
9591 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9596 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9597 true if we require the result be a register. */
9600 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9605 gcc_assert (SYMBOL_REF_DECL (symbol));
9606 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9608 x = DECL_RTL (imp_decl);
9610 x = force_reg (Pmode, x);
9614 /* Try machine-dependent ways of modifying an illegitimate address
9615 to be legitimate. If we find one, return the new, valid address.
9616 This macro is used in only one place: `memory_address' in explow.c.
9618 OLDX is the address as it was before break_out_memory_refs was called.
9619 In some cases it is useful to look at this to decide what needs to be done.
9621 MODE and WIN are passed so that this macro can use
9622 GO_IF_LEGITIMATE_ADDRESS.
9624 It is always safe for this macro to do nothing. It exists to recognize
9625 opportunities to optimize the output.
9627 For the 80386, we handle X+REG by loading X into a register R and
9628 using R+REG. R will go in a general reg and indexing will be used.
9629 However, if REG is a broken-out memory address or multiplication,
9630 nothing needs to be done because REG can certainly go in a general reg.
9632 When -fpic is used, special handling is needed for symbolic references.
9633 See comments by legitimize_pic_address in i386.c for details. */
9636 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9641 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9643 return legitimize_tls_address (x, (enum tls_model) log, false);
9644 if (GET_CODE (x) == CONST
9645 && GET_CODE (XEXP (x, 0)) == PLUS
9646 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9647 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9649 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9650 (enum tls_model) log, false);
9651 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9654 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9656 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9657 return legitimize_dllimport_symbol (x, true);
9658 if (GET_CODE (x) == CONST
9659 && GET_CODE (XEXP (x, 0)) == PLUS
9660 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9661 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9663 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9664 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9668 if (flag_pic && SYMBOLIC_CONST (x))
9669 return legitimize_pic_address (x, 0);
9671 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9672 if (GET_CODE (x) == ASHIFT
9673 && CONST_INT_P (XEXP (x, 1))
9674 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9677 log = INTVAL (XEXP (x, 1));
9678 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9679 GEN_INT (1 << log));
9682 if (GET_CODE (x) == PLUS)
9684 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9686 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9687 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9688 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9691 log = INTVAL (XEXP (XEXP (x, 0), 1));
9692 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9693 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9694 GEN_INT (1 << log));
9697 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9698 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9699 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9702 log = INTVAL (XEXP (XEXP (x, 1), 1));
9703 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9704 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9705 GEN_INT (1 << log));
9708 /* Put multiply first if it isn't already. */
9709 if (GET_CODE (XEXP (x, 1)) == MULT)
9711 rtx tmp = XEXP (x, 0);
9712 XEXP (x, 0) = XEXP (x, 1);
9717 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9718 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9719 created by virtual register instantiation, register elimination, and
9720 similar optimizations. */
9721 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9724 x = gen_rtx_PLUS (Pmode,
9725 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9726 XEXP (XEXP (x, 1), 0)),
9727 XEXP (XEXP (x, 1), 1));
9731 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9732 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9733 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9734 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9735 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9736 && CONSTANT_P (XEXP (x, 1)))
9739 rtx other = NULL_RTX;
9741 if (CONST_INT_P (XEXP (x, 1)))
9743 constant = XEXP (x, 1);
9744 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9746 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9748 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9749 other = XEXP (x, 1);
9757 x = gen_rtx_PLUS (Pmode,
9758 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9759 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9760 plus_constant (other, INTVAL (constant)));
9764 if (changed && legitimate_address_p (mode, x, FALSE))
9767 if (GET_CODE (XEXP (x, 0)) == MULT)
9770 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9773 if (GET_CODE (XEXP (x, 1)) == MULT)
9776 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9780 && REG_P (XEXP (x, 1))
9781 && REG_P (XEXP (x, 0)))
9784 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9787 x = legitimize_pic_address (x, 0);
9790 if (changed && legitimate_address_p (mode, x, FALSE))
9793 if (REG_P (XEXP (x, 0)))
9795 rtx temp = gen_reg_rtx (Pmode);
9796 rtx val = force_operand (XEXP (x, 1), temp);
9798 emit_move_insn (temp, val);
9804 else if (REG_P (XEXP (x, 1)))
9806 rtx temp = gen_reg_rtx (Pmode);
9807 rtx val = force_operand (XEXP (x, 0), temp);
9809 emit_move_insn (temp, val);
9819 /* Print an integer constant expression in assembler syntax. Addition
9820 and subtraction are the only arithmetic that may appear in these
9821 expressions. FILE is the stdio stream to write to, X is the rtx, and
9822 CODE is the operand print code from the output string. */
9825 output_pic_addr_const (FILE *file, rtx x, int code)
9829 switch (GET_CODE (x))
9832 gcc_assert (flag_pic);
9837 if (! TARGET_MACHO || TARGET_64BIT)
9838 output_addr_const (file, x);
9841 const char *name = XSTR (x, 0);
9843 /* Mark the decl as referenced so that cgraph will
9844 output the function. */
9845 if (SYMBOL_REF_DECL (x))
9846 mark_decl_referenced (SYMBOL_REF_DECL (x));
9849 if (MACHOPIC_INDIRECT
9850 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9851 name = machopic_indirection_name (x, /*stub_p=*/true);
9853 assemble_name (file, name);
9855 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9856 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9857 fputs ("@PLT", file);
9864 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9865 assemble_name (asm_out_file, buf);
9869 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9873 /* This used to output parentheses around the expression,
9874 but that does not work on the 386 (either ATT or BSD assembler). */
9875 output_pic_addr_const (file, XEXP (x, 0), code);
9879 if (GET_MODE (x) == VOIDmode)
9881 /* We can use %d if the number is <32 bits and positive. */
9882 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9883 fprintf (file, "0x%lx%08lx",
9884 (unsigned long) CONST_DOUBLE_HIGH (x),
9885 (unsigned long) CONST_DOUBLE_LOW (x));
9887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9890 /* We can't handle floating point constants;
9891 PRINT_OPERAND must handle them. */
9892 output_operand_lossage ("floating constant misused");
9896 /* Some assemblers need integer constants to appear first. */
9897 if (CONST_INT_P (XEXP (x, 0)))
9899 output_pic_addr_const (file, XEXP (x, 0), code);
9901 output_pic_addr_const (file, XEXP (x, 1), code);
9905 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9906 output_pic_addr_const (file, XEXP (x, 1), code);
9908 output_pic_addr_const (file, XEXP (x, 0), code);
9914 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9915 output_pic_addr_const (file, XEXP (x, 0), code);
9917 output_pic_addr_const (file, XEXP (x, 1), code);
9919 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9923 gcc_assert (XVECLEN (x, 0) == 1);
9924 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9925 switch (XINT (x, 1))
9928 fputs ("@GOT", file);
9931 fputs ("@GOTOFF", file);
9934 fputs ("@PLTOFF", file);
9936 case UNSPEC_GOTPCREL:
9937 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9938 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9940 case UNSPEC_GOTTPOFF:
9941 /* FIXME: This might be @TPOFF in Sun ld too. */
9942 fputs ("@GOTTPOFF", file);
9945 fputs ("@TPOFF", file);
9949 fputs ("@TPOFF", file);
9951 fputs ("@NTPOFF", file);
9954 fputs ("@DTPOFF", file);
9956 case UNSPEC_GOTNTPOFF:
9958 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9959 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9961 fputs ("@GOTNTPOFF", file);
9963 case UNSPEC_INDNTPOFF:
9964 fputs ("@INDNTPOFF", file);
9967 case UNSPEC_MACHOPIC_OFFSET:
9969 machopic_output_function_base_name (file);
9973 output_operand_lossage ("invalid UNSPEC as operand");
9979 output_operand_lossage ("invalid expression as operand");
9983 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9984 We need to emit DTP-relative relocations. */
9986 static void ATTRIBUTE_UNUSED
9987 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9989 fputs (ASM_LONG, file);
9990 output_addr_const (file, x);
9991 fputs ("@DTPOFF", file);
9997 fputs (", 0", file);
10000 gcc_unreachable ();
10004 /* Return true if X is a representation of the PIC register. This copes
10005 with calls from ix86_find_base_term, where the register might have
10006 been replaced by a cselib value. */
10009 ix86_pic_register_p (rtx x)
10011 if (GET_CODE (x) == VALUE)
10012 return (pic_offset_table_rtx
10013 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10015 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10018 /* In the name of slightly smaller debug output, and to cater to
10019 general assembler lossage, recognize PIC+GOTOFF and turn it back
10020 into a direct symbol reference.
10022 On Darwin, this is necessary to avoid a crash, because Darwin
10023 has a different PIC label for each routine but the DWARF debugging
10024 information is not associated with any particular routine, so it's
10025 necessary to remove references to the PIC label from RTL stored by
10026 the DWARF output code. */
10029 ix86_delegitimize_address (rtx orig_x)
10032 /* reg_addend is NULL or a multiple of some register. */
10033 rtx reg_addend = NULL_RTX;
10034 /* const_addend is NULL or a const_int. */
10035 rtx const_addend = NULL_RTX;
10036 /* This is the result, or NULL. */
10037 rtx result = NULL_RTX;
10044 if (GET_CODE (x) != CONST
10045 || GET_CODE (XEXP (x, 0)) != UNSPEC
10046 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10047 || !MEM_P (orig_x))
10049 return XVECEXP (XEXP (x, 0), 0, 0);
10052 if (GET_CODE (x) != PLUS
10053 || GET_CODE (XEXP (x, 1)) != CONST)
10056 if (ix86_pic_register_p (XEXP (x, 0)))
10057 /* %ebx + GOT/GOTOFF */
10059 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10061 /* %ebx + %reg * scale + GOT/GOTOFF */
10062 reg_addend = XEXP (x, 0);
10063 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10064 reg_addend = XEXP (reg_addend, 1);
10065 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10066 reg_addend = XEXP (reg_addend, 0);
10069 if (!REG_P (reg_addend)
10070 && GET_CODE (reg_addend) != MULT
10071 && GET_CODE (reg_addend) != ASHIFT)
10077 x = XEXP (XEXP (x, 1), 0);
10078 if (GET_CODE (x) == PLUS
10079 && CONST_INT_P (XEXP (x, 1)))
10081 const_addend = XEXP (x, 1);
10085 if (GET_CODE (x) == UNSPEC
10086 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10087 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10088 result = XVECEXP (x, 0, 0);
10090 if (TARGET_MACHO && darwin_local_data_pic (x)
10091 && !MEM_P (orig_x))
10092 result = XVECEXP (x, 0, 0);
10098 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10100 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10104 /* If X is a machine specific address (i.e. a symbol or label being
10105 referenced as a displacement from the GOT implemented using an
10106 UNSPEC), then return the base term. Otherwise return X. */
10109 ix86_find_base_term (rtx x)
10115 if (GET_CODE (x) != CONST)
10117 term = XEXP (x, 0);
10118 if (GET_CODE (term) == PLUS
10119 && (CONST_INT_P (XEXP (term, 1))
10120 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10121 term = XEXP (term, 0);
10122 if (GET_CODE (term) != UNSPEC
10123 || XINT (term, 1) != UNSPEC_GOTPCREL)
10126 return XVECEXP (term, 0, 0);
10129 return ix86_delegitimize_address (x);
10133 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10134 int fp, FILE *file)
10136 const char *suffix;
10138 if (mode == CCFPmode || mode == CCFPUmode)
10140 enum rtx_code second_code, bypass_code;
10141 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10142 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10143 code = ix86_fp_compare_code_to_integer (code);
10147 code = reverse_condition (code);
10198 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10202 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10203 Those same assemblers have the same but opposite lossage on cmov. */
10204 if (mode == CCmode)
10205 suffix = fp ? "nbe" : "a";
10206 else if (mode == CCCmode)
10209 gcc_unreachable ();
10225 gcc_unreachable ();
10229 gcc_assert (mode == CCmode || mode == CCCmode);
10246 gcc_unreachable ();
10250 /* ??? As above. */
10251 gcc_assert (mode == CCmode || mode == CCCmode);
10252 suffix = fp ? "nb" : "ae";
10255 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10259 /* ??? As above. */
10260 if (mode == CCmode)
10262 else if (mode == CCCmode)
10263 suffix = fp ? "nb" : "ae";
10265 gcc_unreachable ();
10268 suffix = fp ? "u" : "p";
10271 suffix = fp ? "nu" : "np";
10274 gcc_unreachable ();
10276 fputs (suffix, file);
10279 /* Print the name of register X to FILE based on its machine mode and number.
10280 If CODE is 'w', pretend the mode is HImode.
10281 If CODE is 'b', pretend the mode is QImode.
10282 If CODE is 'k', pretend the mode is SImode.
10283 If CODE is 'q', pretend the mode is DImode.
10284 If CODE is 'x', pretend the mode is V4SFmode.
10285 If CODE is 't', pretend the mode is V8SFmode.
10286 If CODE is 'h', pretend the reg is the 'high' byte register.
10287 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10288 If CODE is 'd', duplicate the operand for AVX instruction.
10292 print_reg (rtx x, int code, FILE *file)
10295 bool duplicated = code == 'd' && TARGET_AVX;
10297 gcc_assert (x == pc_rtx
10298 || (REGNO (x) != ARG_POINTER_REGNUM
10299 && REGNO (x) != FRAME_POINTER_REGNUM
10300 && REGNO (x) != FLAGS_REG
10301 && REGNO (x) != FPSR_REG
10302 && REGNO (x) != FPCR_REG));
10304 if (ASSEMBLER_DIALECT == ASM_ATT)
10309 gcc_assert (TARGET_64BIT);
10310 fputs ("rip", file);
10314 if (code == 'w' || MMX_REG_P (x))
10316 else if (code == 'b')
10318 else if (code == 'k')
10320 else if (code == 'q')
10322 else if (code == 'y')
10324 else if (code == 'h')
10326 else if (code == 'x')
10328 else if (code == 't')
10331 code = GET_MODE_SIZE (GET_MODE (x));
10333 /* Irritatingly, AMD extended registers use different naming convention
10334 from the normal registers. */
10335 if (REX_INT_REG_P (x))
10337 gcc_assert (TARGET_64BIT);
10341 error ("extended registers have no high halves");
10344 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10347 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10350 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10353 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10356 error ("unsupported operand size for extended register");
10366 if (STACK_TOP_P (x))
10375 if (! ANY_FP_REG_P (x))
10376 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10381 reg = hi_reg_name[REGNO (x)];
10384 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10386 reg = qi_reg_name[REGNO (x)];
10389 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10391 reg = qi_high_reg_name[REGNO (x)];
10396 gcc_assert (!duplicated);
10398 fputs (hi_reg_name[REGNO (x)] + 1, file);
10403 gcc_unreachable ();
10409 if (ASSEMBLER_DIALECT == ASM_ATT)
10410 fprintf (file, ", %%%s", reg);
10412 fprintf (file, ", %s", reg);
10416 /* Locate some local-dynamic symbol still in use by this function
10417 so that we can print its name in some tls_local_dynamic_base
10421 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10425 if (GET_CODE (x) == SYMBOL_REF
10426 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10428 cfun->machine->some_ld_name = XSTR (x, 0);
10435 static const char *
10436 get_some_local_dynamic_name (void)
10440 if (cfun->machine->some_ld_name)
10441 return cfun->machine->some_ld_name;
10443 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10445 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10446 return cfun->machine->some_ld_name;
10448 gcc_unreachable ();
10451 /* Meaning of CODE:
10452 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10453 C -- print opcode suffix for set/cmov insn.
10454 c -- like C, but print reversed condition
10455 E,e -- likewise, but for compare-and-branch fused insn.
10456 F,f -- likewise, but for floating-point.
10457 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10459 R -- print the prefix for register names.
10460 z -- print the opcode suffix for the size of the current operand.
10461 * -- print a star (in certain assembler syntax)
10462 A -- print an absolute memory reference.
10463 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10464 s -- print a shift double count, followed by the assemblers argument
10466 b -- print the QImode name of the register for the indicated operand.
10467 %b0 would print %al if operands[0] is reg 0.
10468 w -- likewise, print the HImode name of the register.
10469 k -- likewise, print the SImode name of the register.
10470 q -- likewise, print the DImode name of the register.
10471 x -- likewise, print the V4SFmode name of the register.
10472 t -- likewise, print the V8SFmode name of the register.
10473 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10474 y -- print "st(0)" instead of "st" as a register.
10475 d -- print duplicated register operand for AVX instruction.
10476 D -- print condition for SSE cmp instruction.
10477 P -- if PIC, print an @PLT suffix.
10478 X -- don't print any sort of PIC '@' suffix for a symbol.
10479 & -- print some in-use local-dynamic symbol name.
10480 H -- print a memory address offset by 8; used for sse high-parts
10481 Y -- print condition for SSE5 com* instruction.
10482 + -- print a branch hint as 'cs' or 'ds' prefix
10483 ; -- print a semicolon (after prefixes due to bug in older gas).
10487 print_operand (FILE *file, rtx x, int code)
10494 if (ASSEMBLER_DIALECT == ASM_ATT)
10499 assemble_name (file, get_some_local_dynamic_name ());
10503 switch (ASSEMBLER_DIALECT)
10510 /* Intel syntax. For absolute addresses, registers should not
10511 be surrounded by braces. */
10515 PRINT_OPERAND (file, x, 0);
10522 gcc_unreachable ();
10525 PRINT_OPERAND (file, x, 0);
10530 if (ASSEMBLER_DIALECT == ASM_ATT)
10535 if (ASSEMBLER_DIALECT == ASM_ATT)
10540 if (ASSEMBLER_DIALECT == ASM_ATT)
10545 if (ASSEMBLER_DIALECT == ASM_ATT)
10550 if (ASSEMBLER_DIALECT == ASM_ATT)
10555 if (ASSEMBLER_DIALECT == ASM_ATT)
10560 /* 387 opcodes don't get size suffixes if the operands are
10562 if (STACK_REG_P (x))
10565 /* Likewise if using Intel opcodes. */
10566 if (ASSEMBLER_DIALECT == ASM_INTEL)
10569 /* This is the size of op from size of operand. */
10570 switch (GET_MODE_SIZE (GET_MODE (x)))
10579 #ifdef HAVE_GAS_FILDS_FISTS
10589 if (GET_MODE (x) == SFmode)
10604 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10608 #ifdef GAS_MNEMONICS
10623 gcc_unreachable ();
10640 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10642 PRINT_OPERAND (file, x, 0);
10643 fputs (", ", file);
10648 /* Little bit of braindamage here. The SSE compare instructions
10649 does use completely different names for the comparisons that the
10650 fp conditional moves. */
10653 switch (GET_CODE (x))
10656 fputs ("eq", file);
10659 fputs ("eq_us", file);
10662 fputs ("lt", file);
10665 fputs ("nge", file);
10668 fputs ("le", file);
10671 fputs ("ngt", file);
10674 fputs ("unord", file);
10677 fputs ("neq", file);
10680 fputs ("neq_oq", file);
10683 fputs ("ge", file);
10686 fputs ("nlt", file);
10689 fputs ("gt", file);
10692 fputs ("nle", file);
10695 fputs ("ord", file);
10698 gcc_unreachable ();
10703 switch (GET_CODE (x))
10707 fputs ("eq", file);
10711 fputs ("lt", file);
10715 fputs ("le", file);
10718 fputs ("unord", file);
10722 fputs ("neq", file);
10726 fputs ("nlt", file);
10730 fputs ("nle", file);
10733 fputs ("ord", file);
10736 gcc_unreachable ();
10741 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10742 if (ASSEMBLER_DIALECT == ASM_ATT)
10744 switch (GET_MODE (x))
10746 case HImode: putc ('w', file); break;
10748 case SFmode: putc ('l', file); break;
10750 case DFmode: putc ('q', file); break;
10751 default: gcc_unreachable ();
10758 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10761 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10762 if (ASSEMBLER_DIALECT == ASM_ATT)
10765 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10768 /* Like above, but reverse condition */
10770 /* Check to see if argument to %c is really a constant
10771 and not a condition code which needs to be reversed. */
10772 if (!COMPARISON_P (x))
10774 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10777 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10780 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10781 if (ASSEMBLER_DIALECT == ASM_ATT)
10784 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10788 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10792 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10796 /* It doesn't actually matter what mode we use here, as we're
10797 only going to use this for printing. */
10798 x = adjust_address_nv (x, DImode, 8);
10806 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10809 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10812 int pred_val = INTVAL (XEXP (x, 0));
10814 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10815 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10817 int taken = pred_val > REG_BR_PROB_BASE / 2;
10818 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10820 /* Emit hints only in the case default branch prediction
10821 heuristics would fail. */
10822 if (taken != cputaken)
10824 /* We use 3e (DS) prefix for taken branches and
10825 2e (CS) prefix for not taken branches. */
10827 fputs ("ds ; ", file);
10829 fputs ("cs ; ", file);
10837 switch (GET_CODE (x))
10840 fputs ("neq", file);
10843 fputs ("eq", file);
10847 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10851 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10855 fputs ("le", file);
10859 fputs ("lt", file);
10862 fputs ("unord", file);
10865 fputs ("ord", file);
10868 fputs ("ueq", file);
10871 fputs ("nlt", file);
10874 fputs ("nle", file);
10877 fputs ("ule", file);
10880 fputs ("ult", file);
10883 fputs ("une", file);
10886 gcc_unreachable ();
10892 fputs (" ; ", file);
10899 output_operand_lossage ("invalid operand code '%c'", code);
10904 print_reg (x, code, file);
10906 else if (MEM_P (x))
10908 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10909 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10910 && GET_MODE (x) != BLKmode)
10913 switch (GET_MODE_SIZE (GET_MODE (x)))
10915 case 1: size = "BYTE"; break;
10916 case 2: size = "WORD"; break;
10917 case 4: size = "DWORD"; break;
10918 case 8: size = "QWORD"; break;
10919 case 12: size = "XWORD"; break;
10921 if (GET_MODE (x) == XFmode)
10927 gcc_unreachable ();
10930 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10933 else if (code == 'w')
10935 else if (code == 'k')
10938 fputs (size, file);
10939 fputs (" PTR ", file);
10943 /* Avoid (%rip) for call operands. */
10944 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10945 && !CONST_INT_P (x))
10946 output_addr_const (file, x);
10947 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10948 output_operand_lossage ("invalid constraints for operand");
10950 output_address (x);
10953 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10958 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10959 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10961 if (ASSEMBLER_DIALECT == ASM_ATT)
10963 fprintf (file, "0x%08lx", (long unsigned int) l);
10966 /* These float cases don't actually occur as immediate operands. */
10967 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10971 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10972 fprintf (file, "%s", dstr);
10975 else if (GET_CODE (x) == CONST_DOUBLE
10976 && GET_MODE (x) == XFmode)
10980 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10981 fprintf (file, "%s", dstr);
10986 /* We have patterns that allow zero sets of memory, for instance.
10987 In 64-bit mode, we should probably support all 8-byte vectors,
10988 since we can in fact encode that into an immediate. */
10989 if (GET_CODE (x) == CONST_VECTOR)
10991 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10997 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10999 if (ASSEMBLER_DIALECT == ASM_ATT)
11002 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11003 || GET_CODE (x) == LABEL_REF)
11005 if (ASSEMBLER_DIALECT == ASM_ATT)
11008 fputs ("OFFSET FLAT:", file);
11011 if (CONST_INT_P (x))
11012 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11014 output_pic_addr_const (file, x, code);
11016 output_addr_const (file, x);
11020 /* Print a memory operand whose address is ADDR. */
11023 print_operand_address (FILE *file, rtx addr)
11025 struct ix86_address parts;
11026 rtx base, index, disp;
11028 int ok = ix86_decompose_address (addr, &parts);
11033 index = parts.index;
11035 scale = parts.scale;
11043 if (ASSEMBLER_DIALECT == ASM_ATT)
11045 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11048 gcc_unreachable ();
11051 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11052 if (TARGET_64BIT && !base && !index)
11056 if (GET_CODE (disp) == CONST
11057 && GET_CODE (XEXP (disp, 0)) == PLUS
11058 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11059 symbol = XEXP (XEXP (disp, 0), 0);
11061 if (GET_CODE (symbol) == LABEL_REF
11062 || (GET_CODE (symbol) == SYMBOL_REF
11063 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11066 if (!base && !index)
11068 /* Displacement only requires special attention. */
11070 if (CONST_INT_P (disp))
11072 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11073 fputs ("ds:", file);
11074 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11077 output_pic_addr_const (file, disp, 0);
11079 output_addr_const (file, disp);
11083 if (ASSEMBLER_DIALECT == ASM_ATT)
11088 output_pic_addr_const (file, disp, 0);
11089 else if (GET_CODE (disp) == LABEL_REF)
11090 output_asm_label (disp);
11092 output_addr_const (file, disp);
11097 print_reg (base, 0, file);
11101 print_reg (index, 0, file);
11103 fprintf (file, ",%d", scale);
11109 rtx offset = NULL_RTX;
11113 /* Pull out the offset of a symbol; print any symbol itself. */
11114 if (GET_CODE (disp) == CONST
11115 && GET_CODE (XEXP (disp, 0)) == PLUS
11116 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11118 offset = XEXP (XEXP (disp, 0), 1);
11119 disp = gen_rtx_CONST (VOIDmode,
11120 XEXP (XEXP (disp, 0), 0));
11124 output_pic_addr_const (file, disp, 0);
11125 else if (GET_CODE (disp) == LABEL_REF)
11126 output_asm_label (disp);
11127 else if (CONST_INT_P (disp))
11130 output_addr_const (file, disp);
11136 print_reg (base, 0, file);
11139 if (INTVAL (offset) >= 0)
11141 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11145 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11152 print_reg (index, 0, file);
11154 fprintf (file, "*%d", scale);
11162 output_addr_const_extra (FILE *file, rtx x)
11166 if (GET_CODE (x) != UNSPEC)
11169 op = XVECEXP (x, 0, 0);
11170 switch (XINT (x, 1))
11172 case UNSPEC_GOTTPOFF:
11173 output_addr_const (file, op);
11174 /* FIXME: This might be @TPOFF in Sun ld. */
11175 fputs ("@GOTTPOFF", file);
11178 output_addr_const (file, op);
11179 fputs ("@TPOFF", file);
11181 case UNSPEC_NTPOFF:
11182 output_addr_const (file, op);
11184 fputs ("@TPOFF", file);
11186 fputs ("@NTPOFF", file);
11188 case UNSPEC_DTPOFF:
11189 output_addr_const (file, op);
11190 fputs ("@DTPOFF", file);
11192 case UNSPEC_GOTNTPOFF:
11193 output_addr_const (file, op);
11195 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11196 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11198 fputs ("@GOTNTPOFF", file);
11200 case UNSPEC_INDNTPOFF:
11201 output_addr_const (file, op);
11202 fputs ("@INDNTPOFF", file);
11205 case UNSPEC_MACHOPIC_OFFSET:
11206 output_addr_const (file, op);
11208 machopic_output_function_base_name (file);
11219 /* Split one or more DImode RTL references into pairs of SImode
11220 references. The RTL can be REG, offsettable MEM, integer constant, or
11221 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11222 split and "num" is its length. lo_half and hi_half are output arrays
11223 that parallel "operands". */
11226 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11230 rtx op = operands[num];
11232 /* simplify_subreg refuse to split volatile memory addresses,
11233 but we still have to handle it. */
11236 lo_half[num] = adjust_address (op, SImode, 0);
11237 hi_half[num] = adjust_address (op, SImode, 4);
11241 lo_half[num] = simplify_gen_subreg (SImode, op,
11242 GET_MODE (op) == VOIDmode
11243 ? DImode : GET_MODE (op), 0);
11244 hi_half[num] = simplify_gen_subreg (SImode, op,
11245 GET_MODE (op) == VOIDmode
11246 ? DImode : GET_MODE (op), 4);
11250 /* Split one or more TImode RTL references into pairs of DImode
11251 references. The RTL can be REG, offsettable MEM, integer constant, or
11252 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11253 split and "num" is its length. lo_half and hi_half are output arrays
11254 that parallel "operands". */
11257 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11261 rtx op = operands[num];
11263 /* simplify_subreg refuse to split volatile memory addresses, but we
11264 still have to handle it. */
11267 lo_half[num] = adjust_address (op, DImode, 0);
11268 hi_half[num] = adjust_address (op, DImode, 8);
11272 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11273 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11278 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11279 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11280 is the expression of the binary operation. The output may either be
11281 emitted here, or returned to the caller, like all output_* functions.
11283 There is no guarantee that the operands are the same mode, as they
11284 might be within FLOAT or FLOAT_EXTEND expressions. */
11286 #ifndef SYSV386_COMPAT
11287 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11288 wants to fix the assemblers because that causes incompatibility
11289 with gcc. No-one wants to fix gcc because that causes
11290 incompatibility with assemblers... You can use the option of
11291 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11292 #define SYSV386_COMPAT 1
11296 output_387_binary_op (rtx insn, rtx *operands)
11298 static char buf[40];
11301 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11303 #ifdef ENABLE_CHECKING
11304 /* Even if we do not want to check the inputs, this documents input
11305 constraints. Which helps in understanding the following code. */
11306 if (STACK_REG_P (operands[0])
11307 && ((REG_P (operands[1])
11308 && REGNO (operands[0]) == REGNO (operands[1])
11309 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11310 || (REG_P (operands[2])
11311 && REGNO (operands[0]) == REGNO (operands[2])
11312 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11313 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11316 gcc_assert (is_sse);
11319 switch (GET_CODE (operands[3]))
11322 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11323 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11331 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11332 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11340 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11341 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11349 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11350 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11358 gcc_unreachable ();
11365 strcpy (buf, ssep);
11366 if (GET_MODE (operands[0]) == SFmode)
11367 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11369 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11373 strcpy (buf, ssep + 1);
11374 if (GET_MODE (operands[0]) == SFmode)
11375 strcat (buf, "ss\t{%2, %0|%0, %2}");
11377 strcat (buf, "sd\t{%2, %0|%0, %2}");
11383 switch (GET_CODE (operands[3]))
11387 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11389 rtx temp = operands[2];
11390 operands[2] = operands[1];
11391 operands[1] = temp;
11394 /* know operands[0] == operands[1]. */
11396 if (MEM_P (operands[2]))
11402 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11404 if (STACK_TOP_P (operands[0]))
11405 /* How is it that we are storing to a dead operand[2]?
11406 Well, presumably operands[1] is dead too. We can't
11407 store the result to st(0) as st(0) gets popped on this
11408 instruction. Instead store to operands[2] (which I
11409 think has to be st(1)). st(1) will be popped later.
11410 gcc <= 2.8.1 didn't have this check and generated
11411 assembly code that the Unixware assembler rejected. */
11412 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11414 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11418 if (STACK_TOP_P (operands[0]))
11419 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11421 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11426 if (MEM_P (operands[1]))
11432 if (MEM_P (operands[2]))
11438 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11441 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11442 derived assemblers, confusingly reverse the direction of
11443 the operation for fsub{r} and fdiv{r} when the
11444 destination register is not st(0). The Intel assembler
11445 doesn't have this brain damage. Read !SYSV386_COMPAT to
11446 figure out what the hardware really does. */
11447 if (STACK_TOP_P (operands[0]))
11448 p = "{p\t%0, %2|rp\t%2, %0}";
11450 p = "{rp\t%2, %0|p\t%0, %2}";
11452 if (STACK_TOP_P (operands[0]))
11453 /* As above for fmul/fadd, we can't store to st(0). */
11454 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11456 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11461 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11464 if (STACK_TOP_P (operands[0]))
11465 p = "{rp\t%0, %1|p\t%1, %0}";
11467 p = "{p\t%1, %0|rp\t%0, %1}";
11469 if (STACK_TOP_P (operands[0]))
11470 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11472 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11477 if (STACK_TOP_P (operands[0]))
11479 if (STACK_TOP_P (operands[1]))
11480 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11482 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11485 else if (STACK_TOP_P (operands[1]))
11488 p = "{\t%1, %0|r\t%0, %1}";
11490 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11496 p = "{r\t%2, %0|\t%0, %2}";
11498 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11504 gcc_unreachable ();
11511 /* Return needed mode for entity in optimize_mode_switching pass. */
11514 ix86_mode_needed (int entity, rtx insn)
11516 enum attr_i387_cw mode;
11518 /* The mode UNINITIALIZED is used to store control word after a
11519 function call or ASM pattern. The mode ANY specify that function
11520 has no requirements on the control word and make no changes in the
11521 bits we are interested in. */
11524 || (NONJUMP_INSN_P (insn)
11525 && (asm_noperands (PATTERN (insn)) >= 0
11526 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11527 return I387_CW_UNINITIALIZED;
11529 if (recog_memoized (insn) < 0)
11530 return I387_CW_ANY;
11532 mode = get_attr_i387_cw (insn);
11537 if (mode == I387_CW_TRUNC)
11542 if (mode == I387_CW_FLOOR)
11547 if (mode == I387_CW_CEIL)
11552 if (mode == I387_CW_MASK_PM)
11557 gcc_unreachable ();
11560 return I387_CW_ANY;
11563 /* Output code to initialize control word copies used by trunc?f?i and
11564 rounding patterns. CURRENT_MODE is set to current control word,
11565 while NEW_MODE is set to new control word. */
11568 emit_i387_cw_initialization (int mode)
11570 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11573 enum ix86_stack_slot slot;
11575 rtx reg = gen_reg_rtx (HImode);
11577 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11578 emit_move_insn (reg, copy_rtx (stored_mode));
11580 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11581 || optimize_function_for_size_p (cfun))
11585 case I387_CW_TRUNC:
11586 /* round toward zero (truncate) */
11587 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11588 slot = SLOT_CW_TRUNC;
11591 case I387_CW_FLOOR:
11592 /* round down toward -oo */
11593 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11594 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11595 slot = SLOT_CW_FLOOR;
11599 /* round up toward +oo */
11600 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11601 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11602 slot = SLOT_CW_CEIL;
11605 case I387_CW_MASK_PM:
11606 /* mask precision exception for nearbyint() */
11607 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11608 slot = SLOT_CW_MASK_PM;
11612 gcc_unreachable ();
11619 case I387_CW_TRUNC:
11620 /* round toward zero (truncate) */
11621 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11622 slot = SLOT_CW_TRUNC;
11625 case I387_CW_FLOOR:
11626 /* round down toward -oo */
11627 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11628 slot = SLOT_CW_FLOOR;
11632 /* round up toward +oo */
11633 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11634 slot = SLOT_CW_CEIL;
11637 case I387_CW_MASK_PM:
11638 /* mask precision exception for nearbyint() */
11639 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11640 slot = SLOT_CW_MASK_PM;
11644 gcc_unreachable ();
11648 gcc_assert (slot < MAX_386_STACK_LOCALS);
11650 new_mode = assign_386_stack_local (HImode, slot);
11651 emit_move_insn (new_mode, reg);
11654 /* Output code for INSN to convert a float to a signed int. OPERANDS
11655 are the insn operands. The output may be [HSD]Imode and the input
11656 operand may be [SDX]Fmode. */
11659 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11661 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11662 int dimode_p = GET_MODE (operands[0]) == DImode;
11663 int round_mode = get_attr_i387_cw (insn);
11665 /* Jump through a hoop or two for DImode, since the hardware has no
11666 non-popping instruction. We used to do this a different way, but
11667 that was somewhat fragile and broke with post-reload splitters. */
11668 if ((dimode_p || fisttp) && !stack_top_dies)
11669 output_asm_insn ("fld\t%y1", operands);
11671 gcc_assert (STACK_TOP_P (operands[1]));
11672 gcc_assert (MEM_P (operands[0]));
11673 gcc_assert (GET_MODE (operands[1]) != TFmode);
11676 output_asm_insn ("fisttp%z0\t%0", operands);
11679 if (round_mode != I387_CW_ANY)
11680 output_asm_insn ("fldcw\t%3", operands);
11681 if (stack_top_dies || dimode_p)
11682 output_asm_insn ("fistp%z0\t%0", operands);
11684 output_asm_insn ("fist%z0\t%0", operands);
11685 if (round_mode != I387_CW_ANY)
11686 output_asm_insn ("fldcw\t%2", operands);
11692 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11693 have the values zero or one, indicates the ffreep insn's operand
11694 from the OPERANDS array. */
11696 static const char *
11697 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11699 if (TARGET_USE_FFREEP)
11700 #if HAVE_AS_IX86_FFREEP
11701 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11704 static char retval[] = ".word\t0xc_df";
11705 int regno = REGNO (operands[opno]);
11707 gcc_assert (FP_REGNO_P (regno));
11709 retval[9] = '0' + (regno - FIRST_STACK_REG);
11714 return opno ? "fstp\t%y1" : "fstp\t%y0";
11718 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11719 should be used. UNORDERED_P is true when fucom should be used. */
11722 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11724 int stack_top_dies;
11725 rtx cmp_op0, cmp_op1;
11726 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11730 cmp_op0 = operands[0];
11731 cmp_op1 = operands[1];
11735 cmp_op0 = operands[1];
11736 cmp_op1 = operands[2];
11741 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11742 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11743 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11744 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11746 if (GET_MODE (operands[0]) == SFmode)
11748 return &ucomiss[TARGET_AVX ? 0 : 1];
11750 return &comiss[TARGET_AVX ? 0 : 1];
11753 return &ucomisd[TARGET_AVX ? 0 : 1];
11755 return &comisd[TARGET_AVX ? 0 : 1];
11758 gcc_assert (STACK_TOP_P (cmp_op0));
11760 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11762 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11764 if (stack_top_dies)
11766 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11767 return output_387_ffreep (operands, 1);
11770 return "ftst\n\tfnstsw\t%0";
11773 if (STACK_REG_P (cmp_op1)
11775 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11776 && REGNO (cmp_op1) != FIRST_STACK_REG)
11778 /* If both the top of the 387 stack dies, and the other operand
11779 is also a stack register that dies, then this must be a
11780 `fcompp' float compare */
11784 /* There is no double popping fcomi variant. Fortunately,
11785 eflags is immune from the fstp's cc clobbering. */
11787 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11789 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11790 return output_387_ffreep (operands, 0);
11795 return "fucompp\n\tfnstsw\t%0";
11797 return "fcompp\n\tfnstsw\t%0";
11802 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11804 static const char * const alt[16] =
11806 "fcom%z2\t%y2\n\tfnstsw\t%0",
11807 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11808 "fucom%z2\t%y2\n\tfnstsw\t%0",
11809 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11811 "ficom%z2\t%y2\n\tfnstsw\t%0",
11812 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11816 "fcomi\t{%y1, %0|%0, %y1}",
11817 "fcomip\t{%y1, %0|%0, %y1}",
11818 "fucomi\t{%y1, %0|%0, %y1}",
11819 "fucomip\t{%y1, %0|%0, %y1}",
11830 mask = eflags_p << 3;
11831 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11832 mask |= unordered_p << 1;
11833 mask |= stack_top_dies;
11835 gcc_assert (mask < 16);
11844 ix86_output_addr_vec_elt (FILE *file, int value)
11846 const char *directive = ASM_LONG;
11850 directive = ASM_QUAD;
11852 gcc_assert (!TARGET_64BIT);
11855 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11859 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11861 const char *directive = ASM_LONG;
11864 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11865 directive = ASM_QUAD;
11867 gcc_assert (!TARGET_64BIT);
11869 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11870 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11871 fprintf (file, "%s%s%d-%s%d\n",
11872 directive, LPREFIX, value, LPREFIX, rel);
11873 else if (HAVE_AS_GOTOFF_IN_DATA)
11874 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11876 else if (TARGET_MACHO)
11878 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11879 machopic_output_function_base_name (file);
11880 fprintf(file, "\n");
11884 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11885 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11888 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11892 ix86_expand_clear (rtx dest)
11896 /* We play register width games, which are only valid after reload. */
11897 gcc_assert (reload_completed);
11899 /* Avoid HImode and its attendant prefix byte. */
11900 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11901 dest = gen_rtx_REG (SImode, REGNO (dest));
11902 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11904 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11905 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
11907 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11908 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11914 /* X is an unchanging MEM. If it is a constant pool reference, return
11915 the constant pool rtx, else NULL. */
11918 maybe_get_pool_constant (rtx x)
11920 x = ix86_delegitimize_address (XEXP (x, 0));
11922 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11923 return get_pool_constant (x);
11929 ix86_expand_move (enum machine_mode mode, rtx operands[])
11932 enum tls_model model;
11937 if (GET_CODE (op1) == SYMBOL_REF)
11939 model = SYMBOL_REF_TLS_MODEL (op1);
11942 op1 = legitimize_tls_address (op1, model, true);
11943 op1 = force_operand (op1, op0);
11947 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11948 && SYMBOL_REF_DLLIMPORT_P (op1))
11949 op1 = legitimize_dllimport_symbol (op1, false);
11951 else if (GET_CODE (op1) == CONST
11952 && GET_CODE (XEXP (op1, 0)) == PLUS
11953 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11955 rtx addend = XEXP (XEXP (op1, 0), 1);
11956 rtx symbol = XEXP (XEXP (op1, 0), 0);
11959 model = SYMBOL_REF_TLS_MODEL (symbol);
11961 tmp = legitimize_tls_address (symbol, model, true);
11962 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11963 && SYMBOL_REF_DLLIMPORT_P (symbol))
11964 tmp = legitimize_dllimport_symbol (symbol, true);
11968 tmp = force_operand (tmp, NULL);
11969 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11970 op0, 1, OPTAB_DIRECT);
11976 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11978 if (TARGET_MACHO && !TARGET_64BIT)
11983 rtx temp = ((reload_in_progress
11984 || ((op0 && REG_P (op0))
11986 ? op0 : gen_reg_rtx (Pmode));
11987 op1 = machopic_indirect_data_reference (op1, temp);
11988 op1 = machopic_legitimize_pic_address (op1, mode,
11989 temp == op1 ? 0 : temp);
11991 else if (MACHOPIC_INDIRECT)
11992 op1 = machopic_indirect_data_reference (op1, 0);
12000 op1 = force_reg (Pmode, op1);
12001 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12003 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12004 op1 = legitimize_pic_address (op1, reg);
12013 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12014 || !push_operand (op0, mode))
12016 op1 = force_reg (mode, op1);
12018 if (push_operand (op0, mode)
12019 && ! general_no_elim_operand (op1, mode))
12020 op1 = copy_to_mode_reg (mode, op1);
12022 /* Force large constants in 64bit compilation into register
12023 to get them CSEed. */
12024 if (can_create_pseudo_p ()
12025 && (mode == DImode) && TARGET_64BIT
12026 && immediate_operand (op1, mode)
12027 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12028 && !register_operand (op0, mode)
12030 op1 = copy_to_mode_reg (mode, op1);
12032 if (can_create_pseudo_p ()
12033 && FLOAT_MODE_P (mode)
12034 && GET_CODE (op1) == CONST_DOUBLE)
12036 /* If we are loading a floating point constant to a register,
12037 force the value to memory now, since we'll get better code
12038 out the back end. */
12040 op1 = validize_mem (force_const_mem (mode, op1));
12041 if (!register_operand (op0, mode))
12043 rtx temp = gen_reg_rtx (mode);
12044 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12045 emit_move_insn (op0, temp);
12051 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12055 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12057 rtx op0 = operands[0], op1 = operands[1];
12058 unsigned int align = GET_MODE_ALIGNMENT (mode);
12060 /* Force constants other than zero into memory. We do not know how
12061 the instructions used to build constants modify the upper 64 bits
12062 of the register, once we have that information we may be able
12063 to handle some of them more efficiently. */
12064 if (can_create_pseudo_p ()
12065 && register_operand (op0, mode)
12066 && (CONSTANT_P (op1)
12067 || (GET_CODE (op1) == SUBREG
12068 && CONSTANT_P (SUBREG_REG (op1))))
12069 && standard_sse_constant_p (op1) <= 0)
12070 op1 = validize_mem (force_const_mem (mode, op1));
12072 /* We need to check memory alignment for SSE mode since attribute
12073 can make operands unaligned. */
12074 if (can_create_pseudo_p ()
12075 && SSE_REG_MODE_P (mode)
12076 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12077 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12081 /* ix86_expand_vector_move_misalign() does not like constants ... */
12082 if (CONSTANT_P (op1)
12083 || (GET_CODE (op1) == SUBREG
12084 && CONSTANT_P (SUBREG_REG (op1))))
12085 op1 = validize_mem (force_const_mem (mode, op1));
12087 /* ... nor both arguments in memory. */
12088 if (!register_operand (op0, mode)
12089 && !register_operand (op1, mode))
12090 op1 = force_reg (mode, op1);
12092 tmp[0] = op0; tmp[1] = op1;
12093 ix86_expand_vector_move_misalign (mode, tmp);
12097 /* Make operand1 a register if it isn't already. */
12098 if (can_create_pseudo_p ()
12099 && !register_operand (op0, mode)
12100 && !register_operand (op1, mode))
12102 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12106 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12109 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12110 straight to ix86_expand_vector_move. */
12111 /* Code generation for scalar reg-reg moves of single and double precision data:
12112 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12116 if (x86_sse_partial_reg_dependency == true)
12121 Code generation for scalar loads of double precision data:
12122 if (x86_sse_split_regs == true)
12123 movlpd mem, reg (gas syntax)
12127 Code generation for unaligned packed loads of single precision data
12128 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12129 if (x86_sse_unaligned_move_optimal)
12132 if (x86_sse_partial_reg_dependency == true)
12144 Code generation for unaligned packed loads of double precision data
12145 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12146 if (x86_sse_unaligned_move_optimal)
12149 if (x86_sse_split_regs == true)
12162 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12171 switch (GET_MODE_CLASS (mode))
12173 case MODE_VECTOR_INT:
12175 switch (GET_MODE_SIZE (mode))
12178 op0 = gen_lowpart (V16QImode, op0);
12179 op1 = gen_lowpart (V16QImode, op1);
12180 emit_insn (gen_avx_movdqu (op0, op1));
12183 op0 = gen_lowpart (V32QImode, op0);
12184 op1 = gen_lowpart (V32QImode, op1);
12185 emit_insn (gen_avx_movdqu256 (op0, op1));
12188 gcc_unreachable ();
12191 case MODE_VECTOR_FLOAT:
12192 op0 = gen_lowpart (mode, op0);
12193 op1 = gen_lowpart (mode, op1);
12198 emit_insn (gen_avx_movups (op0, op1));
12201 emit_insn (gen_avx_movups256 (op0, op1));
12204 emit_insn (gen_avx_movupd (op0, op1));
12207 emit_insn (gen_avx_movupd256 (op0, op1));
12210 gcc_unreachable ();
12215 gcc_unreachable ();
12223 /* If we're optimizing for size, movups is the smallest. */
12224 if (optimize_insn_for_size_p ())
12226 op0 = gen_lowpart (V4SFmode, op0);
12227 op1 = gen_lowpart (V4SFmode, op1);
12228 emit_insn (gen_sse_movups (op0, op1));
12232 /* ??? If we have typed data, then it would appear that using
12233 movdqu is the only way to get unaligned data loaded with
12235 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12237 op0 = gen_lowpart (V16QImode, op0);
12238 op1 = gen_lowpart (V16QImode, op1);
12239 emit_insn (gen_sse2_movdqu (op0, op1));
12243 if (TARGET_SSE2 && mode == V2DFmode)
12247 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12249 op0 = gen_lowpart (V2DFmode, op0);
12250 op1 = gen_lowpart (V2DFmode, op1);
12251 emit_insn (gen_sse2_movupd (op0, op1));
12255 /* When SSE registers are split into halves, we can avoid
12256 writing to the top half twice. */
12257 if (TARGET_SSE_SPLIT_REGS)
12259 emit_clobber (op0);
12264 /* ??? Not sure about the best option for the Intel chips.
12265 The following would seem to satisfy; the register is
12266 entirely cleared, breaking the dependency chain. We
12267 then store to the upper half, with a dependency depth
12268 of one. A rumor has it that Intel recommends two movsd
12269 followed by an unpacklpd, but this is unconfirmed. And
12270 given that the dependency depth of the unpacklpd would
12271 still be one, I'm not sure why this would be better. */
12272 zero = CONST0_RTX (V2DFmode);
12275 m = adjust_address (op1, DFmode, 0);
12276 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12277 m = adjust_address (op1, DFmode, 8);
12278 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12282 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12284 op0 = gen_lowpart (V4SFmode, op0);
12285 op1 = gen_lowpart (V4SFmode, op1);
12286 emit_insn (gen_sse_movups (op0, op1));
12290 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12291 emit_move_insn (op0, CONST0_RTX (mode));
12293 emit_clobber (op0);
12295 if (mode != V4SFmode)
12296 op0 = gen_lowpart (V4SFmode, op0);
12297 m = adjust_address (op1, V2SFmode, 0);
12298 emit_insn (gen_sse_loadlps (op0, op0, m));
12299 m = adjust_address (op1, V2SFmode, 8);
12300 emit_insn (gen_sse_loadhps (op0, op0, m));
12303 else if (MEM_P (op0))
12305 /* If we're optimizing for size, movups is the smallest. */
12306 if (optimize_insn_for_size_p ())
12308 op0 = gen_lowpart (V4SFmode, op0);
12309 op1 = gen_lowpart (V4SFmode, op1);
12310 emit_insn (gen_sse_movups (op0, op1));
12314 /* ??? Similar to above, only less clear because of quote
12315 typeless stores unquote. */
12316 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12317 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12319 op0 = gen_lowpart (V16QImode, op0);
12320 op1 = gen_lowpart (V16QImode, op1);
12321 emit_insn (gen_sse2_movdqu (op0, op1));
12325 if (TARGET_SSE2 && mode == V2DFmode)
12327 m = adjust_address (op0, DFmode, 0);
12328 emit_insn (gen_sse2_storelpd (m, op1));
12329 m = adjust_address (op0, DFmode, 8);
12330 emit_insn (gen_sse2_storehpd (m, op1));
12334 if (mode != V4SFmode)
12335 op1 = gen_lowpart (V4SFmode, op1);
12336 m = adjust_address (op0, V2SFmode, 0);
12337 emit_insn (gen_sse_storelps (m, op1));
12338 m = adjust_address (op0, V2SFmode, 8);
12339 emit_insn (gen_sse_storehps (m, op1));
12343 gcc_unreachable ();
12346 /* Expand a push in MODE. This is some mode for which we do not support
12347 proper push instructions, at least from the registers that we expect
12348 the value to live in. */
12351 ix86_expand_push (enum machine_mode mode, rtx x)
12355 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12356 GEN_INT (-GET_MODE_SIZE (mode)),
12357 stack_pointer_rtx, 1, OPTAB_DIRECT);
12358 if (tmp != stack_pointer_rtx)
12359 emit_move_insn (stack_pointer_rtx, tmp);
12361 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12362 emit_move_insn (tmp, x);
12365 /* Helper function of ix86_fixup_binary_operands to canonicalize
12366 operand order. Returns true if the operands should be swapped. */
12369 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12372 rtx dst = operands[0];
12373 rtx src1 = operands[1];
12374 rtx src2 = operands[2];
12376 /* If the operation is not commutative, we can't do anything. */
12377 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12380 /* Highest priority is that src1 should match dst. */
12381 if (rtx_equal_p (dst, src1))
12383 if (rtx_equal_p (dst, src2))
12386 /* Next highest priority is that immediate constants come second. */
12387 if (immediate_operand (src2, mode))
12389 if (immediate_operand (src1, mode))
12392 /* Lowest priority is that memory references should come second. */
12402 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12403 destination to use for the operation. If different from the true
12404 destination in operands[0], a copy operation will be required. */
12407 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12410 rtx dst = operands[0];
12411 rtx src1 = operands[1];
12412 rtx src2 = operands[2];
12414 /* Canonicalize operand order. */
12415 if (ix86_swap_binary_operands_p (code, mode, operands))
12419 /* It is invalid to swap operands of different modes. */
12420 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12427 /* Both source operands cannot be in memory. */
12428 if (MEM_P (src1) && MEM_P (src2))
12430 /* Optimization: Only read from memory once. */
12431 if (rtx_equal_p (src1, src2))
12433 src2 = force_reg (mode, src2);
12437 src2 = force_reg (mode, src2);
12440 /* If the destination is memory, and we do not have matching source
12441 operands, do things in registers. */
12442 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12443 dst = gen_reg_rtx (mode);
12445 /* Source 1 cannot be a constant. */
12446 if (CONSTANT_P (src1))
12447 src1 = force_reg (mode, src1);
12449 /* Source 1 cannot be a non-matching memory. */
12450 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12451 src1 = force_reg (mode, src1);
12453 operands[1] = src1;
12454 operands[2] = src2;
12458 /* Similarly, but assume that the destination has already been
12459 set up properly. */
12462 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12463 enum machine_mode mode, rtx operands[])
12465 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12466 gcc_assert (dst == operands[0]);
12469 /* Attempt to expand a binary operator. Make the expansion closer to the
12470 actual machine, then just general_operand, which will allow 3 separate
12471 memory references (one output, two input) in a single insn. */
12474 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12477 rtx src1, src2, dst, op, clob;
12479 dst = ix86_fixup_binary_operands (code, mode, operands);
12480 src1 = operands[1];
12481 src2 = operands[2];
12483 /* Emit the instruction. */
12485 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12486 if (reload_in_progress)
12488 /* Reload doesn't know about the flags register, and doesn't know that
12489 it doesn't want to clobber it. We can only do this with PLUS. */
12490 gcc_assert (code == PLUS);
12495 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12496 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12499 /* Fix up the destination if needed. */
12500 if (dst != operands[0])
12501 emit_move_insn (operands[0], dst);
12504 /* Return TRUE or FALSE depending on whether the binary operator meets the
12505 appropriate constraints. */
12508 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12511 rtx dst = operands[0];
12512 rtx src1 = operands[1];
12513 rtx src2 = operands[2];
12515 /* Both source operands cannot be in memory. */
12516 if (MEM_P (src1) && MEM_P (src2))
12519 /* Canonicalize operand order for commutative operators. */
12520 if (ix86_swap_binary_operands_p (code, mode, operands))
12527 /* If the destination is memory, we must have a matching source operand. */
12528 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12531 /* Source 1 cannot be a constant. */
12532 if (CONSTANT_P (src1))
12535 /* Source 1 cannot be a non-matching memory. */
12536 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12542 /* Attempt to expand a unary operator. Make the expansion closer to the
12543 actual machine, then just general_operand, which will allow 2 separate
12544 memory references (one output, one input) in a single insn. */
12547 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12550 int matching_memory;
12551 rtx src, dst, op, clob;
12556 /* If the destination is memory, and we do not have matching source
12557 operands, do things in registers. */
12558 matching_memory = 0;
12561 if (rtx_equal_p (dst, src))
12562 matching_memory = 1;
12564 dst = gen_reg_rtx (mode);
12567 /* When source operand is memory, destination must match. */
12568 if (MEM_P (src) && !matching_memory)
12569 src = force_reg (mode, src);
12571 /* Emit the instruction. */
12573 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12574 if (reload_in_progress || code == NOT)
12576 /* Reload doesn't know about the flags register, and doesn't know that
12577 it doesn't want to clobber it. */
12578 gcc_assert (code == NOT);
12583 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12584 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12587 /* Fix up the destination if needed. */
12588 if (dst != operands[0])
12589 emit_move_insn (operands[0], dst);
12592 /* Return TRUE or FALSE depending on whether the unary operator meets the
12593 appropriate constraints. */
12596 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12597 enum machine_mode mode ATTRIBUTE_UNUSED,
12598 rtx operands[2] ATTRIBUTE_UNUSED)
12600 /* If one of operands is memory, source and destination must match. */
12601 if ((MEM_P (operands[0])
12602 || MEM_P (operands[1]))
12603 && ! rtx_equal_p (operands[0], operands[1]))
12608 /* Post-reload splitter for converting an SF or DFmode value in an
12609 SSE register into an unsigned SImode. */
12612 ix86_split_convert_uns_si_sse (rtx operands[])
12614 enum machine_mode vecmode;
12615 rtx value, large, zero_or_two31, input, two31, x;
12617 large = operands[1];
12618 zero_or_two31 = operands[2];
12619 input = operands[3];
12620 two31 = operands[4];
12621 vecmode = GET_MODE (large);
12622 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12624 /* Load up the value into the low element. We must ensure that the other
12625 elements are valid floats -- zero is the easiest such value. */
12628 if (vecmode == V4SFmode)
12629 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12631 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12635 input = gen_rtx_REG (vecmode, REGNO (input));
12636 emit_move_insn (value, CONST0_RTX (vecmode));
12637 if (vecmode == V4SFmode)
12638 emit_insn (gen_sse_movss (value, value, input));
12640 emit_insn (gen_sse2_movsd (value, value, input));
12643 emit_move_insn (large, two31);
12644 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12646 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12647 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12649 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12650 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12652 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12653 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12655 large = gen_rtx_REG (V4SImode, REGNO (large));
12656 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12658 x = gen_rtx_REG (V4SImode, REGNO (value));
12659 if (vecmode == V4SFmode)
12660 emit_insn (gen_sse2_cvttps2dq (x, value));
12662 emit_insn (gen_sse2_cvttpd2dq (x, value));
12665 emit_insn (gen_xorv4si3 (value, value, large));
12668 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12669 Expects the 64-bit DImode to be supplied in a pair of integral
12670 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12671 -mfpmath=sse, !optimize_size only. */
12674 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12676 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12677 rtx int_xmm, fp_xmm;
12678 rtx biases, exponents;
12681 int_xmm = gen_reg_rtx (V4SImode);
12682 if (TARGET_INTER_UNIT_MOVES)
12683 emit_insn (gen_movdi_to_sse (int_xmm, input));
12684 else if (TARGET_SSE_SPLIT_REGS)
12686 emit_clobber (int_xmm);
12687 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12691 x = gen_reg_rtx (V2DImode);
12692 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12693 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12696 x = gen_rtx_CONST_VECTOR (V4SImode,
12697 gen_rtvec (4, GEN_INT (0x43300000UL),
12698 GEN_INT (0x45300000UL),
12699 const0_rtx, const0_rtx));
12700 exponents = validize_mem (force_const_mem (V4SImode, x));
12702 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12703 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12705 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12706 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12707 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12708 (0x1.0p84 + double(fp_value_hi_xmm)).
12709 Note these exponents differ by 32. */
12711 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12713 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12714 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12715 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12716 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12717 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12718 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12719 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12720 biases = validize_mem (force_const_mem (V2DFmode, biases));
12721 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12723 /* Add the upper and lower DFmode values together. */
12725 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12728 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12729 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12730 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12733 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12736 /* Not used, but eases macroization of patterns. */
12738 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12739 rtx input ATTRIBUTE_UNUSED)
12741 gcc_unreachable ();
12744 /* Convert an unsigned SImode value into a DFmode. Only currently used
12745 for SSE, but applicable anywhere. */
12748 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12750 REAL_VALUE_TYPE TWO31r;
12753 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12754 NULL, 1, OPTAB_DIRECT);
12756 fp = gen_reg_rtx (DFmode);
12757 emit_insn (gen_floatsidf2 (fp, x));
12759 real_ldexp (&TWO31r, &dconst1, 31);
12760 x = const_double_from_real_value (TWO31r, DFmode);
12762 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12764 emit_move_insn (target, x);
12767 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12768 32-bit mode; otherwise we have a direct convert instruction. */
12771 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12773 REAL_VALUE_TYPE TWO32r;
12774 rtx fp_lo, fp_hi, x;
12776 fp_lo = gen_reg_rtx (DFmode);
12777 fp_hi = gen_reg_rtx (DFmode);
12779 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12781 real_ldexp (&TWO32r, &dconst1, 32);
12782 x = const_double_from_real_value (TWO32r, DFmode);
12783 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12785 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12787 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12790 emit_move_insn (target, x);
12793 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12794 For x86_32, -mfpmath=sse, !optimize_size only. */
12796 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12798 REAL_VALUE_TYPE ONE16r;
12799 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12801 real_ldexp (&ONE16r, &dconst1, 16);
12802 x = const_double_from_real_value (ONE16r, SFmode);
12803 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12804 NULL, 0, OPTAB_DIRECT);
12805 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12806 NULL, 0, OPTAB_DIRECT);
12807 fp_hi = gen_reg_rtx (SFmode);
12808 fp_lo = gen_reg_rtx (SFmode);
12809 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12810 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12811 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12813 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12815 if (!rtx_equal_p (target, fp_hi))
12816 emit_move_insn (target, fp_hi);
12819 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12820 then replicate the value for all elements of the vector
12824 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12831 v = gen_rtvec (4, value, value, value, value);
12832 return gen_rtx_CONST_VECTOR (V4SImode, v);
12836 v = gen_rtvec (2, value, value);
12837 return gen_rtx_CONST_VECTOR (V2DImode, v);
12841 v = gen_rtvec (4, value, value, value, value);
12843 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12844 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12845 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12849 v = gen_rtvec (2, value, value);
12851 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12852 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12855 gcc_unreachable ();
12859 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12860 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12861 for an SSE register. If VECT is true, then replicate the mask for
12862 all elements of the vector register. If INVERT is true, then create
12863 a mask excluding the sign bit. */
12866 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12868 enum machine_mode vec_mode, imode;
12869 HOST_WIDE_INT hi, lo;
12874 /* Find the sign bit, sign extended to 2*HWI. */
12880 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12881 lo = 0x80000000, hi = lo < 0;
12887 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12888 if (HOST_BITS_PER_WIDE_INT >= 64)
12889 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12891 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12896 vec_mode = VOIDmode;
12897 if (HOST_BITS_PER_WIDE_INT >= 64)
12900 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12907 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12911 lo = ~lo, hi = ~hi;
12917 mask = immed_double_const (lo, hi, imode);
12919 vec = gen_rtvec (2, v, mask);
12920 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12921 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12928 gcc_unreachable ();
12932 lo = ~lo, hi = ~hi;
12934 /* Force this value into the low part of a fp vector constant. */
12935 mask = immed_double_const (lo, hi, imode);
12936 mask = gen_lowpart (mode, mask);
12938 if (vec_mode == VOIDmode)
12939 return force_reg (mode, mask);
12941 v = ix86_build_const_vector (mode, vect, mask);
12942 return force_reg (vec_mode, v);
12945 /* Generate code for floating point ABS or NEG. */
12948 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12951 rtx mask, set, use, clob, dst, src;
12952 bool use_sse = false;
12953 bool vector_mode = VECTOR_MODE_P (mode);
12954 enum machine_mode elt_mode = mode;
12958 elt_mode = GET_MODE_INNER (mode);
12961 else if (mode == TFmode)
12963 else if (TARGET_SSE_MATH)
12964 use_sse = SSE_FLOAT_MODE_P (mode);
12966 /* NEG and ABS performed with SSE use bitwise mask operations.
12967 Create the appropriate mask now. */
12969 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12978 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12979 set = gen_rtx_SET (VOIDmode, dst, set);
12984 set = gen_rtx_fmt_e (code, mode, src);
12985 set = gen_rtx_SET (VOIDmode, dst, set);
12988 use = gen_rtx_USE (VOIDmode, mask);
12989 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12990 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12991 gen_rtvec (3, set, use, clob)));
12998 /* Expand a copysign operation. Special case operand 0 being a constant. */
13001 ix86_expand_copysign (rtx operands[])
13003 enum machine_mode mode;
13004 rtx dest, op0, op1, mask, nmask;
13006 dest = operands[0];
13010 mode = GET_MODE (dest);
13012 if (GET_CODE (op0) == CONST_DOUBLE)
13014 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13016 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13017 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13019 if (mode == SFmode || mode == DFmode)
13021 enum machine_mode vmode;
13023 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13025 if (op0 == CONST0_RTX (mode))
13026 op0 = CONST0_RTX (vmode);
13031 if (mode == SFmode)
13032 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13033 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13035 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13037 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13040 else if (op0 != CONST0_RTX (mode))
13041 op0 = force_reg (mode, op0);
13043 mask = ix86_build_signbit_mask (mode, 0, 0);
13045 if (mode == SFmode)
13046 copysign_insn = gen_copysignsf3_const;
13047 else if (mode == DFmode)
13048 copysign_insn = gen_copysigndf3_const;
13050 copysign_insn = gen_copysigntf3_const;
13052 emit_insn (copysign_insn (dest, op0, op1, mask));
13056 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13058 nmask = ix86_build_signbit_mask (mode, 0, 1);
13059 mask = ix86_build_signbit_mask (mode, 0, 0);
13061 if (mode == SFmode)
13062 copysign_insn = gen_copysignsf3_var;
13063 else if (mode == DFmode)
13064 copysign_insn = gen_copysigndf3_var;
13066 copysign_insn = gen_copysigntf3_var;
13068 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13072 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13073 be a constant, and so has already been expanded into a vector constant. */
13076 ix86_split_copysign_const (rtx operands[])
13078 enum machine_mode mode, vmode;
13079 rtx dest, op0, op1, mask, x;
13081 dest = operands[0];
13084 mask = operands[3];
13086 mode = GET_MODE (dest);
13087 vmode = GET_MODE (mask);
13089 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13090 x = gen_rtx_AND (vmode, dest, mask);
13091 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13093 if (op0 != CONST0_RTX (vmode))
13095 x = gen_rtx_IOR (vmode, dest, op0);
13096 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13100 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13101 so we have to do two masks. */
13104 ix86_split_copysign_var (rtx operands[])
13106 enum machine_mode mode, vmode;
13107 rtx dest, scratch, op0, op1, mask, nmask, x;
13109 dest = operands[0];
13110 scratch = operands[1];
13113 nmask = operands[4];
13114 mask = operands[5];
13116 mode = GET_MODE (dest);
13117 vmode = GET_MODE (mask);
13119 if (rtx_equal_p (op0, op1))
13121 /* Shouldn't happen often (it's useless, obviously), but when it does
13122 we'd generate incorrect code if we continue below. */
13123 emit_move_insn (dest, op0);
13127 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13129 gcc_assert (REGNO (op1) == REGNO (scratch));
13131 x = gen_rtx_AND (vmode, scratch, mask);
13132 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13135 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13136 x = gen_rtx_NOT (vmode, dest);
13137 x = gen_rtx_AND (vmode, x, op0);
13138 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13142 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13144 x = gen_rtx_AND (vmode, scratch, mask);
13146 else /* alternative 2,4 */
13148 gcc_assert (REGNO (mask) == REGNO (scratch));
13149 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13150 x = gen_rtx_AND (vmode, scratch, op1);
13152 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13154 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13156 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13157 x = gen_rtx_AND (vmode, dest, nmask);
13159 else /* alternative 3,4 */
13161 gcc_assert (REGNO (nmask) == REGNO (dest));
13163 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13164 x = gen_rtx_AND (vmode, dest, op0);
13166 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13169 x = gen_rtx_IOR (vmode, dest, scratch);
13170 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13173 /* Return TRUE or FALSE depending on whether the first SET in INSN
13174 has source and destination with matching CC modes, and that the
13175 CC mode is at least as constrained as REQ_MODE. */
13178 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13181 enum machine_mode set_mode;
13183 set = PATTERN (insn);
13184 if (GET_CODE (set) == PARALLEL)
13185 set = XVECEXP (set, 0, 0);
13186 gcc_assert (GET_CODE (set) == SET);
13187 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13189 set_mode = GET_MODE (SET_DEST (set));
13193 if (req_mode != CCNOmode
13194 && (req_mode != CCmode
13195 || XEXP (SET_SRC (set), 1) != const0_rtx))
13199 if (req_mode == CCGCmode)
13203 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13207 if (req_mode == CCZmode)
13218 gcc_unreachable ();
13221 return (GET_MODE (SET_SRC (set)) == set_mode);
13224 /* Generate insn patterns to do an integer compare of OPERANDS. */
13227 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13229 enum machine_mode cmpmode;
13232 cmpmode = SELECT_CC_MODE (code, op0, op1);
13233 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13235 /* This is very simple, but making the interface the same as in the
13236 FP case makes the rest of the code easier. */
13237 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13238 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13240 /* Return the test that should be put into the flags user, i.e.
13241 the bcc, scc, or cmov instruction. */
13242 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13245 /* Figure out whether to use ordered or unordered fp comparisons.
13246 Return the appropriate mode to use. */
13249 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13251 /* ??? In order to make all comparisons reversible, we do all comparisons
13252 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13253 all forms trapping and nontrapping comparisons, we can make inequality
13254 comparisons trapping again, since it results in better code when using
13255 FCOM based compares. */
13256 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13260 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13262 enum machine_mode mode = GET_MODE (op0);
13264 if (SCALAR_FLOAT_MODE_P (mode))
13266 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13267 return ix86_fp_compare_mode (code);
13272 /* Only zero flag is needed. */
13273 case EQ: /* ZF=0 */
13274 case NE: /* ZF!=0 */
13276 /* Codes needing carry flag. */
13277 case GEU: /* CF=0 */
13278 case LTU: /* CF=1 */
13279 /* Detect overflow checks. They need just the carry flag. */
13280 if (GET_CODE (op0) == PLUS
13281 && rtx_equal_p (op1, XEXP (op0, 0)))
13285 case GTU: /* CF=0 & ZF=0 */
13286 case LEU: /* CF=1 | ZF=1 */
13287 /* Detect overflow checks. They need just the carry flag. */
13288 if (GET_CODE (op0) == MINUS
13289 && rtx_equal_p (op1, XEXP (op0, 0)))
13293 /* Codes possibly doable only with sign flag when
13294 comparing against zero. */
13295 case GE: /* SF=OF or SF=0 */
13296 case LT: /* SF<>OF or SF=1 */
13297 if (op1 == const0_rtx)
13300 /* For other cases Carry flag is not required. */
13302 /* Codes doable only with sign flag when comparing
13303 against zero, but we miss jump instruction for it
13304 so we need to use relational tests against overflow
13305 that thus needs to be zero. */
13306 case GT: /* ZF=0 & SF=OF */
13307 case LE: /* ZF=1 | SF<>OF */
13308 if (op1 == const0_rtx)
13312 /* strcmp pattern do (use flags) and combine may ask us for proper
13317 gcc_unreachable ();
13321 /* Return the fixed registers used for condition codes. */
13324 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13331 /* If two condition code modes are compatible, return a condition code
13332 mode which is compatible with both. Otherwise, return
13335 static enum machine_mode
13336 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13341 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13344 if ((m1 == CCGCmode && m2 == CCGOCmode)
13345 || (m1 == CCGOCmode && m2 == CCGCmode))
13351 gcc_unreachable ();
13381 /* These are only compatible with themselves, which we already
13387 /* Split comparison code CODE into comparisons we can do using branch
13388 instructions. BYPASS_CODE is comparison code for branch that will
13389 branch around FIRST_CODE and SECOND_CODE. If some of branches
13390 is not required, set value to UNKNOWN.
13391 We never require more than two branches. */
13394 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13395 enum rtx_code *first_code,
13396 enum rtx_code *second_code)
13398 *first_code = code;
13399 *bypass_code = UNKNOWN;
13400 *second_code = UNKNOWN;
13402 /* The fcomi comparison sets flags as follows:
13412 case GT: /* GTU - CF=0 & ZF=0 */
13413 case GE: /* GEU - CF=0 */
13414 case ORDERED: /* PF=0 */
13415 case UNORDERED: /* PF=1 */
13416 case UNEQ: /* EQ - ZF=1 */
13417 case UNLT: /* LTU - CF=1 */
13418 case UNLE: /* LEU - CF=1 | ZF=1 */
13419 case LTGT: /* EQ - ZF=0 */
13421 case LT: /* LTU - CF=1 - fails on unordered */
13422 *first_code = UNLT;
13423 *bypass_code = UNORDERED;
13425 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13426 *first_code = UNLE;
13427 *bypass_code = UNORDERED;
13429 case EQ: /* EQ - ZF=1 - fails on unordered */
13430 *first_code = UNEQ;
13431 *bypass_code = UNORDERED;
13433 case NE: /* NE - ZF=0 - fails on unordered */
13434 *first_code = LTGT;
13435 *second_code = UNORDERED;
13437 case UNGE: /* GEU - CF=0 - fails on unordered */
13439 *second_code = UNORDERED;
13441 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13443 *second_code = UNORDERED;
13446 gcc_unreachable ();
13448 if (!TARGET_IEEE_FP)
13450 *second_code = UNKNOWN;
13451 *bypass_code = UNKNOWN;
13455 /* Return cost of comparison done fcom + arithmetics operations on AX.
13456 All following functions do use number of instructions as a cost metrics.
13457 In future this should be tweaked to compute bytes for optimize_size and
13458 take into account performance of various instructions on various CPUs. */
13460 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13462 if (!TARGET_IEEE_FP)
13464 /* The cost of code output by ix86_expand_fp_compare. */
13488 gcc_unreachable ();
13492 /* Return cost of comparison done using fcomi operation.
13493 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13495 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13497 enum rtx_code bypass_code, first_code, second_code;
13498 /* Return arbitrarily high cost when instruction is not supported - this
13499 prevents gcc from using it. */
13502 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13503 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13506 /* Return cost of comparison done using sahf operation.
13507 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13509 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13511 enum rtx_code bypass_code, first_code, second_code;
13512 /* Return arbitrarily high cost when instruction is not preferred - this
13513 avoids gcc from using it. */
13514 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13516 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13517 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13520 /* Compute cost of the comparison done using any method.
13521 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13523 ix86_fp_comparison_cost (enum rtx_code code)
13525 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13528 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13529 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13531 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13532 if (min > sahf_cost)
13534 if (min > fcomi_cost)
13539 /* Return true if we should use an FCOMI instruction for this
13543 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13545 enum rtx_code swapped_code = swap_condition (code);
13547 return ((ix86_fp_comparison_cost (code)
13548 == ix86_fp_comparison_fcomi_cost (code))
13549 || (ix86_fp_comparison_cost (swapped_code)
13550 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13553 /* Swap, force into registers, or otherwise massage the two operands
13554 to a fp comparison. The operands are updated in place; the new
13555 comparison code is returned. */
13557 static enum rtx_code
13558 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13560 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13561 rtx op0 = *pop0, op1 = *pop1;
13562 enum machine_mode op_mode = GET_MODE (op0);
13563 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13565 /* All of the unordered compare instructions only work on registers.
13566 The same is true of the fcomi compare instructions. The XFmode
13567 compare instructions require registers except when comparing
13568 against zero or when converting operand 1 from fixed point to
13572 && (fpcmp_mode == CCFPUmode
13573 || (op_mode == XFmode
13574 && ! (standard_80387_constant_p (op0) == 1
13575 || standard_80387_constant_p (op1) == 1)
13576 && GET_CODE (op1) != FLOAT)
13577 || ix86_use_fcomi_compare (code)))
13579 op0 = force_reg (op_mode, op0);
13580 op1 = force_reg (op_mode, op1);
13584 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13585 things around if they appear profitable, otherwise force op0
13586 into a register. */
13588 if (standard_80387_constant_p (op0) == 0
13590 && ! (standard_80387_constant_p (op1) == 0
13594 tmp = op0, op0 = op1, op1 = tmp;
13595 code = swap_condition (code);
13599 op0 = force_reg (op_mode, op0);
13601 if (CONSTANT_P (op1))
13603 int tmp = standard_80387_constant_p (op1);
13605 op1 = validize_mem (force_const_mem (op_mode, op1));
13609 op1 = force_reg (op_mode, op1);
13612 op1 = force_reg (op_mode, op1);
13616 /* Try to rearrange the comparison to make it cheaper. */
13617 if (ix86_fp_comparison_cost (code)
13618 > ix86_fp_comparison_cost (swap_condition (code))
13619 && (REG_P (op1) || can_create_pseudo_p ()))
13622 tmp = op0, op0 = op1, op1 = tmp;
13623 code = swap_condition (code);
13625 op0 = force_reg (op_mode, op0);
13633 /* Convert comparison codes we use to represent FP comparison to integer
13634 code that will result in proper branch. Return UNKNOWN if no such code
13638 ix86_fp_compare_code_to_integer (enum rtx_code code)
13667 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13670 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13671 rtx *second_test, rtx *bypass_test)
13673 enum machine_mode fpcmp_mode, intcmp_mode;
13675 int cost = ix86_fp_comparison_cost (code);
13676 enum rtx_code bypass_code, first_code, second_code;
13678 fpcmp_mode = ix86_fp_compare_mode (code);
13679 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13682 *second_test = NULL_RTX;
13684 *bypass_test = NULL_RTX;
13686 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13688 /* Do fcomi/sahf based test when profitable. */
13689 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13690 && (bypass_code == UNKNOWN || bypass_test)
13691 && (second_code == UNKNOWN || second_test))
13693 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13694 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13700 gcc_assert (TARGET_SAHF);
13703 scratch = gen_reg_rtx (HImode);
13704 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13706 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13709 /* The FP codes work out to act like unsigned. */
13710 intcmp_mode = fpcmp_mode;
13712 if (bypass_code != UNKNOWN)
13713 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13714 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13716 if (second_code != UNKNOWN)
13717 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13718 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13723 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13724 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13725 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13727 scratch = gen_reg_rtx (HImode);
13728 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13730 /* In the unordered case, we have to check C2 for NaN's, which
13731 doesn't happen to work out to anything nice combination-wise.
13732 So do some bit twiddling on the value we've got in AH to come
13733 up with an appropriate set of condition codes. */
13735 intcmp_mode = CCNOmode;
13740 if (code == GT || !TARGET_IEEE_FP)
13742 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13747 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13748 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13749 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13750 intcmp_mode = CCmode;
13756 if (code == LT && TARGET_IEEE_FP)
13758 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13759 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13760 intcmp_mode = CCmode;
13765 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13771 if (code == GE || !TARGET_IEEE_FP)
13773 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13778 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13779 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13786 if (code == LE && TARGET_IEEE_FP)
13788 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13789 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13790 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13791 intcmp_mode = CCmode;
13796 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13802 if (code == EQ && TARGET_IEEE_FP)
13804 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13806 intcmp_mode = CCmode;
13811 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13818 if (code == NE && TARGET_IEEE_FP)
13820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13821 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13837 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13842 gcc_unreachable ();
13846 /* Return the test that should be put into the flags user, i.e.
13847 the bcc, scc, or cmov instruction. */
13848 return gen_rtx_fmt_ee (code, VOIDmode,
13849 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13854 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13857 op0 = ix86_compare_op0;
13858 op1 = ix86_compare_op1;
13861 *second_test = NULL_RTX;
13863 *bypass_test = NULL_RTX;
13865 if (ix86_compare_emitted)
13867 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13868 ix86_compare_emitted = NULL_RTX;
13870 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13872 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13873 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13874 second_test, bypass_test);
13877 ret = ix86_expand_int_compare (code, op0, op1);
13882 /* Return true if the CODE will result in nontrivial jump sequence. */
13884 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13886 enum rtx_code bypass_code, first_code, second_code;
13889 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13890 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13894 ix86_expand_branch (enum rtx_code code, rtx label)
13898 /* If we have emitted a compare insn, go straight to simple.
13899 ix86_expand_compare won't emit anything if ix86_compare_emitted
13901 if (ix86_compare_emitted)
13904 switch (GET_MODE (ix86_compare_op0))
13910 tmp = ix86_expand_compare (code, NULL, NULL);
13911 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13912 gen_rtx_LABEL_REF (VOIDmode, label),
13914 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13923 enum rtx_code bypass_code, first_code, second_code;
13925 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13926 &ix86_compare_op1);
13928 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13930 /* Check whether we will use the natural sequence with one jump. If
13931 so, we can expand jump early. Otherwise delay expansion by
13932 creating compound insn to not confuse optimizers. */
13933 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13935 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13936 gen_rtx_LABEL_REF (VOIDmode, label),
13937 pc_rtx, NULL_RTX, NULL_RTX);
13941 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13942 ix86_compare_op0, ix86_compare_op1);
13943 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13944 gen_rtx_LABEL_REF (VOIDmode, label),
13946 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13948 use_fcomi = ix86_use_fcomi_compare (code);
13949 vec = rtvec_alloc (3 + !use_fcomi);
13950 RTVEC_ELT (vec, 0) = tmp;
13952 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13954 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13957 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13959 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13968 /* Expand DImode branch into multiple compare+branch. */
13970 rtx lo[2], hi[2], label2;
13971 enum rtx_code code1, code2, code3;
13972 enum machine_mode submode;
13974 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13976 tmp = ix86_compare_op0;
13977 ix86_compare_op0 = ix86_compare_op1;
13978 ix86_compare_op1 = tmp;
13979 code = swap_condition (code);
13981 if (GET_MODE (ix86_compare_op0) == DImode)
13983 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13984 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13989 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13990 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13994 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13995 avoid two branches. This costs one extra insn, so disable when
13996 optimizing for size. */
13998 if ((code == EQ || code == NE)
13999 && (!optimize_insn_for_size_p ()
14000 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14005 if (hi[1] != const0_rtx)
14006 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14007 NULL_RTX, 0, OPTAB_WIDEN);
14010 if (lo[1] != const0_rtx)
14011 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14012 NULL_RTX, 0, OPTAB_WIDEN);
14014 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14015 NULL_RTX, 0, OPTAB_WIDEN);
14017 ix86_compare_op0 = tmp;
14018 ix86_compare_op1 = const0_rtx;
14019 ix86_expand_branch (code, label);
14023 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14024 op1 is a constant and the low word is zero, then we can just
14025 examine the high word. Similarly for low word -1 and
14026 less-or-equal-than or greater-than. */
14028 if (CONST_INT_P (hi[1]))
14031 case LT: case LTU: case GE: case GEU:
14032 if (lo[1] == const0_rtx)
14034 ix86_compare_op0 = hi[0];
14035 ix86_compare_op1 = hi[1];
14036 ix86_expand_branch (code, label);
14040 case LE: case LEU: case GT: case GTU:
14041 if (lo[1] == constm1_rtx)
14043 ix86_compare_op0 = hi[0];
14044 ix86_compare_op1 = hi[1];
14045 ix86_expand_branch (code, label);
14053 /* Otherwise, we need two or three jumps. */
14055 label2 = gen_label_rtx ();
14058 code2 = swap_condition (code);
14059 code3 = unsigned_condition (code);
14063 case LT: case GT: case LTU: case GTU:
14066 case LE: code1 = LT; code2 = GT; break;
14067 case GE: code1 = GT; code2 = LT; break;
14068 case LEU: code1 = LTU; code2 = GTU; break;
14069 case GEU: code1 = GTU; code2 = LTU; break;
14071 case EQ: code1 = UNKNOWN; code2 = NE; break;
14072 case NE: code2 = UNKNOWN; break;
14075 gcc_unreachable ();
14080 * if (hi(a) < hi(b)) goto true;
14081 * if (hi(a) > hi(b)) goto false;
14082 * if (lo(a) < lo(b)) goto true;
14086 ix86_compare_op0 = hi[0];
14087 ix86_compare_op1 = hi[1];
14089 if (code1 != UNKNOWN)
14090 ix86_expand_branch (code1, label);
14091 if (code2 != UNKNOWN)
14092 ix86_expand_branch (code2, label2);
14094 ix86_compare_op0 = lo[0];
14095 ix86_compare_op1 = lo[1];
14096 ix86_expand_branch (code3, label);
14098 if (code2 != UNKNOWN)
14099 emit_label (label2);
14104 gcc_unreachable ();
14108 /* Split branch based on floating point condition. */
14110 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14111 rtx target1, rtx target2, rtx tmp, rtx pushed)
14113 rtx second, bypass;
14114 rtx label = NULL_RTX;
14116 int bypass_probability = -1, second_probability = -1, probability = -1;
14119 if (target2 != pc_rtx)
14122 code = reverse_condition_maybe_unordered (code);
14127 condition = ix86_expand_fp_compare (code, op1, op2,
14128 tmp, &second, &bypass);
14130 /* Remove pushed operand from stack. */
14132 ix86_free_from_memory (GET_MODE (pushed));
14134 if (split_branch_probability >= 0)
14136 /* Distribute the probabilities across the jumps.
14137 Assume the BYPASS and SECOND to be always test
14139 probability = split_branch_probability;
14141 /* Value of 1 is low enough to make no need for probability
14142 to be updated. Later we may run some experiments and see
14143 if unordered values are more frequent in practice. */
14145 bypass_probability = 1;
14147 second_probability = 1;
14149 if (bypass != NULL_RTX)
14151 label = gen_label_rtx ();
14152 i = emit_jump_insn (gen_rtx_SET
14154 gen_rtx_IF_THEN_ELSE (VOIDmode,
14156 gen_rtx_LABEL_REF (VOIDmode,
14159 if (bypass_probability >= 0)
14161 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14162 GEN_INT (bypass_probability),
14165 i = emit_jump_insn (gen_rtx_SET
14167 gen_rtx_IF_THEN_ELSE (VOIDmode,
14168 condition, target1, target2)));
14169 if (probability >= 0)
14171 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14172 GEN_INT (probability),
14174 if (second != NULL_RTX)
14176 i = emit_jump_insn (gen_rtx_SET
14178 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14180 if (second_probability >= 0)
14182 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14183 GEN_INT (second_probability),
14186 if (label != NULL_RTX)
14187 emit_label (label);
14191 ix86_expand_setcc (enum rtx_code code, rtx dest)
14193 rtx ret, tmp, tmpreg, equiv;
14194 rtx second_test, bypass_test;
14196 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14197 return 0; /* FAIL */
14199 gcc_assert (GET_MODE (dest) == QImode);
14201 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14202 PUT_MODE (ret, QImode);
14207 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14208 if (bypass_test || second_test)
14210 rtx test = second_test;
14212 rtx tmp2 = gen_reg_rtx (QImode);
14215 gcc_assert (!second_test);
14216 test = bypass_test;
14218 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14220 PUT_MODE (test, QImode);
14221 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14224 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14226 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14229 /* Attach a REG_EQUAL note describing the comparison result. */
14230 if (ix86_compare_op0 && ix86_compare_op1)
14232 equiv = simplify_gen_relational (code, QImode,
14233 GET_MODE (ix86_compare_op0),
14234 ix86_compare_op0, ix86_compare_op1);
14235 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14238 return 1; /* DONE */
14241 /* Expand comparison setting or clearing carry flag. Return true when
14242 successful and set pop for the operation. */
14244 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14246 enum machine_mode mode =
14247 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14249 /* Do not handle DImode compares that go through special path. */
14250 if (mode == (TARGET_64BIT ? TImode : DImode))
14253 if (SCALAR_FLOAT_MODE_P (mode))
14255 rtx second_test = NULL, bypass_test = NULL;
14256 rtx compare_op, compare_seq;
14258 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14260 /* Shortcut: following common codes never translate
14261 into carry flag compares. */
14262 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14263 || code == ORDERED || code == UNORDERED)
14266 /* These comparisons require zero flag; swap operands so they won't. */
14267 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14268 && !TARGET_IEEE_FP)
14273 code = swap_condition (code);
14276 /* Try to expand the comparison and verify that we end up with
14277 carry flag based comparison. This fails to be true only when
14278 we decide to expand comparison using arithmetic that is not
14279 too common scenario. */
14281 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14282 &second_test, &bypass_test);
14283 compare_seq = get_insns ();
14286 if (second_test || bypass_test)
14289 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14290 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14291 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14293 code = GET_CODE (compare_op);
14295 if (code != LTU && code != GEU)
14298 emit_insn (compare_seq);
14303 if (!INTEGRAL_MODE_P (mode))
14312 /* Convert a==0 into (unsigned)a<1. */
14315 if (op1 != const0_rtx)
14318 code = (code == EQ ? LTU : GEU);
14321 /* Convert a>b into b<a or a>=b-1. */
14324 if (CONST_INT_P (op1))
14326 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14327 /* Bail out on overflow. We still can swap operands but that
14328 would force loading of the constant into register. */
14329 if (op1 == const0_rtx
14330 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14332 code = (code == GTU ? GEU : LTU);
14339 code = (code == GTU ? LTU : GEU);
14343 /* Convert a>=0 into (unsigned)a<0x80000000. */
14346 if (mode == DImode || op1 != const0_rtx)
14348 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14349 code = (code == LT ? GEU : LTU);
14353 if (mode == DImode || op1 != constm1_rtx)
14355 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14356 code = (code == LE ? GEU : LTU);
14362 /* Swapping operands may cause constant to appear as first operand. */
14363 if (!nonimmediate_operand (op0, VOIDmode))
14365 if (!can_create_pseudo_p ())
14367 op0 = force_reg (mode, op0);
14369 ix86_compare_op0 = op0;
14370 ix86_compare_op1 = op1;
14371 *pop = ix86_expand_compare (code, NULL, NULL);
14372 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14377 ix86_expand_int_movcc (rtx operands[])
14379 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14380 rtx compare_seq, compare_op;
14381 rtx second_test, bypass_test;
14382 enum machine_mode mode = GET_MODE (operands[0]);
14383 bool sign_bit_compare_p = false;;
14386 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14387 compare_seq = get_insns ();
14390 compare_code = GET_CODE (compare_op);
14392 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14393 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14394 sign_bit_compare_p = true;
14396 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14397 HImode insns, we'd be swallowed in word prefix ops. */
14399 if ((mode != HImode || TARGET_FAST_PREFIX)
14400 && (mode != (TARGET_64BIT ? TImode : DImode))
14401 && CONST_INT_P (operands[2])
14402 && CONST_INT_P (operands[3]))
14404 rtx out = operands[0];
14405 HOST_WIDE_INT ct = INTVAL (operands[2]);
14406 HOST_WIDE_INT cf = INTVAL (operands[3]);
14407 HOST_WIDE_INT diff;
14410 /* Sign bit compares are better done using shifts than we do by using
14412 if (sign_bit_compare_p
14413 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14414 ix86_compare_op1, &compare_op))
14416 /* Detect overlap between destination and compare sources. */
14419 if (!sign_bit_compare_p)
14421 bool fpcmp = false;
14423 compare_code = GET_CODE (compare_op);
14425 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14426 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14429 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14432 /* To simplify rest of code, restrict to the GEU case. */
14433 if (compare_code == LTU)
14435 HOST_WIDE_INT tmp = ct;
14438 compare_code = reverse_condition (compare_code);
14439 code = reverse_condition (code);
14444 PUT_CODE (compare_op,
14445 reverse_condition_maybe_unordered
14446 (GET_CODE (compare_op)));
14448 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14452 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14453 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14454 tmp = gen_reg_rtx (mode);
14456 if (mode == DImode)
14457 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14459 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14463 if (code == GT || code == GE)
14464 code = reverse_condition (code);
14467 HOST_WIDE_INT tmp = ct;
14472 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14473 ix86_compare_op1, VOIDmode, 0, -1);
14486 tmp = expand_simple_binop (mode, PLUS,
14488 copy_rtx (tmp), 1, OPTAB_DIRECT);
14499 tmp = expand_simple_binop (mode, IOR,
14501 copy_rtx (tmp), 1, OPTAB_DIRECT);
14503 else if (diff == -1 && ct)
14513 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14515 tmp = expand_simple_binop (mode, PLUS,
14516 copy_rtx (tmp), GEN_INT (cf),
14517 copy_rtx (tmp), 1, OPTAB_DIRECT);
14525 * andl cf - ct, dest
14535 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14538 tmp = expand_simple_binop (mode, AND,
14540 gen_int_mode (cf - ct, mode),
14541 copy_rtx (tmp), 1, OPTAB_DIRECT);
14543 tmp = expand_simple_binop (mode, PLUS,
14544 copy_rtx (tmp), GEN_INT (ct),
14545 copy_rtx (tmp), 1, OPTAB_DIRECT);
14548 if (!rtx_equal_p (tmp, out))
14549 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14551 return 1; /* DONE */
14556 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14559 tmp = ct, ct = cf, cf = tmp;
14562 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14564 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14566 /* We may be reversing unordered compare to normal compare, that
14567 is not valid in general (we may convert non-trapping condition
14568 to trapping one), however on i386 we currently emit all
14569 comparisons unordered. */
14570 compare_code = reverse_condition_maybe_unordered (compare_code);
14571 code = reverse_condition_maybe_unordered (code);
14575 compare_code = reverse_condition (compare_code);
14576 code = reverse_condition (code);
14580 compare_code = UNKNOWN;
14581 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14582 && CONST_INT_P (ix86_compare_op1))
14584 if (ix86_compare_op1 == const0_rtx
14585 && (code == LT || code == GE))
14586 compare_code = code;
14587 else if (ix86_compare_op1 == constm1_rtx)
14591 else if (code == GT)
14596 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14597 if (compare_code != UNKNOWN
14598 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14599 && (cf == -1 || ct == -1))
14601 /* If lea code below could be used, only optimize
14602 if it results in a 2 insn sequence. */
14604 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14605 || diff == 3 || diff == 5 || diff == 9)
14606 || (compare_code == LT && ct == -1)
14607 || (compare_code == GE && cf == -1))
14610 * notl op1 (if necessary)
14618 code = reverse_condition (code);
14621 out = emit_store_flag (out, code, ix86_compare_op0,
14622 ix86_compare_op1, VOIDmode, 0, -1);
14624 out = expand_simple_binop (mode, IOR,
14626 out, 1, OPTAB_DIRECT);
14627 if (out != operands[0])
14628 emit_move_insn (operands[0], out);
14630 return 1; /* DONE */
14635 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14636 || diff == 3 || diff == 5 || diff == 9)
14637 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14639 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14645 * lea cf(dest*(ct-cf)),dest
14649 * This also catches the degenerate setcc-only case.
14655 out = emit_store_flag (out, code, ix86_compare_op0,
14656 ix86_compare_op1, VOIDmode, 0, 1);
14659 /* On x86_64 the lea instruction operates on Pmode, so we need
14660 to get arithmetics done in proper mode to match. */
14662 tmp = copy_rtx (out);
14666 out1 = copy_rtx (out);
14667 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14671 tmp = gen_rtx_PLUS (mode, tmp, out1);
14677 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14680 if (!rtx_equal_p (tmp, out))
14683 out = force_operand (tmp, copy_rtx (out));
14685 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14687 if (!rtx_equal_p (out, operands[0]))
14688 emit_move_insn (operands[0], copy_rtx (out));
14690 return 1; /* DONE */
14694 * General case: Jumpful:
14695 * xorl dest,dest cmpl op1, op2
14696 * cmpl op1, op2 movl ct, dest
14697 * setcc dest jcc 1f
14698 * decl dest movl cf, dest
14699 * andl (cf-ct),dest 1:
14702 * Size 20. Size 14.
14704 * This is reasonably steep, but branch mispredict costs are
14705 * high on modern cpus, so consider failing only if optimizing
14709 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14710 && BRANCH_COST (optimize_insn_for_speed_p (),
14715 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14720 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14722 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14724 /* We may be reversing unordered compare to normal compare,
14725 that is not valid in general (we may convert non-trapping
14726 condition to trapping one), however on i386 we currently
14727 emit all comparisons unordered. */
14728 code = reverse_condition_maybe_unordered (code);
14732 code = reverse_condition (code);
14733 if (compare_code != UNKNOWN)
14734 compare_code = reverse_condition (compare_code);
14738 if (compare_code != UNKNOWN)
14740 /* notl op1 (if needed)
14745 For x < 0 (resp. x <= -1) there will be no notl,
14746 so if possible swap the constants to get rid of the
14748 True/false will be -1/0 while code below (store flag
14749 followed by decrement) is 0/-1, so the constants need
14750 to be exchanged once more. */
14752 if (compare_code == GE || !cf)
14754 code = reverse_condition (code);
14759 HOST_WIDE_INT tmp = cf;
14764 out = emit_store_flag (out, code, ix86_compare_op0,
14765 ix86_compare_op1, VOIDmode, 0, -1);
14769 out = emit_store_flag (out, code, ix86_compare_op0,
14770 ix86_compare_op1, VOIDmode, 0, 1);
14772 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14773 copy_rtx (out), 1, OPTAB_DIRECT);
14776 out = expand_simple_binop (mode, AND, copy_rtx (out),
14777 gen_int_mode (cf - ct, mode),
14778 copy_rtx (out), 1, OPTAB_DIRECT);
14780 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14781 copy_rtx (out), 1, OPTAB_DIRECT);
14782 if (!rtx_equal_p (out, operands[0]))
14783 emit_move_insn (operands[0], copy_rtx (out));
14785 return 1; /* DONE */
14789 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14791 /* Try a few things more with specific constants and a variable. */
14794 rtx var, orig_out, out, tmp;
14796 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
14797 return 0; /* FAIL */
14799 /* If one of the two operands is an interesting constant, load a
14800 constant with the above and mask it in with a logical operation. */
14802 if (CONST_INT_P (operands[2]))
14805 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14806 operands[3] = constm1_rtx, op = and_optab;
14807 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14808 operands[3] = const0_rtx, op = ior_optab;
14810 return 0; /* FAIL */
14812 else if (CONST_INT_P (operands[3]))
14815 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14816 operands[2] = constm1_rtx, op = and_optab;
14817 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14818 operands[2] = const0_rtx, op = ior_optab;
14820 return 0; /* FAIL */
14823 return 0; /* FAIL */
14825 orig_out = operands[0];
14826 tmp = gen_reg_rtx (mode);
14829 /* Recurse to get the constant loaded. */
14830 if (ix86_expand_int_movcc (operands) == 0)
14831 return 0; /* FAIL */
14833 /* Mask in the interesting variable. */
14834 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14836 if (!rtx_equal_p (out, orig_out))
14837 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14839 return 1; /* DONE */
14843 * For comparison with above,
14853 if (! nonimmediate_operand (operands[2], mode))
14854 operands[2] = force_reg (mode, operands[2]);
14855 if (! nonimmediate_operand (operands[3], mode))
14856 operands[3] = force_reg (mode, operands[3]);
14858 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14860 rtx tmp = gen_reg_rtx (mode);
14861 emit_move_insn (tmp, operands[3]);
14864 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14866 rtx tmp = gen_reg_rtx (mode);
14867 emit_move_insn (tmp, operands[2]);
14871 if (! register_operand (operands[2], VOIDmode)
14873 || ! register_operand (operands[3], VOIDmode)))
14874 operands[2] = force_reg (mode, operands[2]);
14877 && ! register_operand (operands[3], VOIDmode))
14878 operands[3] = force_reg (mode, operands[3]);
14880 emit_insn (compare_seq);
14881 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14882 gen_rtx_IF_THEN_ELSE (mode,
14883 compare_op, operands[2],
14886 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14887 gen_rtx_IF_THEN_ELSE (mode,
14889 copy_rtx (operands[3]),
14890 copy_rtx (operands[0]))));
14892 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14893 gen_rtx_IF_THEN_ELSE (mode,
14895 copy_rtx (operands[2]),
14896 copy_rtx (operands[0]))));
14898 return 1; /* DONE */
14901 /* Swap, force into registers, or otherwise massage the two operands
14902 to an sse comparison with a mask result. Thus we differ a bit from
14903 ix86_prepare_fp_compare_args which expects to produce a flags result.
14905 The DEST operand exists to help determine whether to commute commutative
14906 operators. The POP0/POP1 operands are updated in place. The new
14907 comparison code is returned, or UNKNOWN if not implementable. */
14909 static enum rtx_code
14910 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14911 rtx *pop0, rtx *pop1)
14919 /* We have no LTGT as an operator. We could implement it with
14920 NE & ORDERED, but this requires an extra temporary. It's
14921 not clear that it's worth it. */
14928 /* These are supported directly. */
14935 /* For commutative operators, try to canonicalize the destination
14936 operand to be first in the comparison - this helps reload to
14937 avoid extra moves. */
14938 if (!dest || !rtx_equal_p (dest, *pop1))
14946 /* These are not supported directly. Swap the comparison operands
14947 to transform into something that is supported. */
14951 code = swap_condition (code);
14955 gcc_unreachable ();
14961 /* Detect conditional moves that exactly match min/max operational
14962 semantics. Note that this is IEEE safe, as long as we don't
14963 interchange the operands.
14965 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14966 and TRUE if the operation is successful and instructions are emitted. */
14969 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14970 rtx cmp_op1, rtx if_true, rtx if_false)
14972 enum machine_mode mode;
14978 else if (code == UNGE)
14981 if_true = if_false;
14987 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14989 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14994 mode = GET_MODE (dest);
14996 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14997 but MODE may be a vector mode and thus not appropriate. */
14998 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15000 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15003 if_true = force_reg (mode, if_true);
15004 v = gen_rtvec (2, if_true, if_false);
15005 tmp = gen_rtx_UNSPEC (mode, v, u);
15009 code = is_min ? SMIN : SMAX;
15010 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15013 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15017 /* Expand an sse vector comparison. Return the register with the result. */
15020 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15021 rtx op_true, rtx op_false)
15023 enum machine_mode mode = GET_MODE (dest);
15026 cmp_op0 = force_reg (mode, cmp_op0);
15027 if (!nonimmediate_operand (cmp_op1, mode))
15028 cmp_op1 = force_reg (mode, cmp_op1);
15031 || reg_overlap_mentioned_p (dest, op_true)
15032 || reg_overlap_mentioned_p (dest, op_false))
15033 dest = gen_reg_rtx (mode);
15035 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15036 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15041 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15042 operations. This is used for both scalar and vector conditional moves. */
15045 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15047 enum machine_mode mode = GET_MODE (dest);
15050 if (op_false == CONST0_RTX (mode))
15052 op_true = force_reg (mode, op_true);
15053 x = gen_rtx_AND (mode, cmp, op_true);
15054 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15056 else if (op_true == CONST0_RTX (mode))
15058 op_false = force_reg (mode, op_false);
15059 x = gen_rtx_NOT (mode, cmp);
15060 x = gen_rtx_AND (mode, x, op_false);
15061 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15063 else if (TARGET_SSE5)
15065 rtx pcmov = gen_rtx_SET (mode, dest,
15066 gen_rtx_IF_THEN_ELSE (mode, cmp,
15073 op_true = force_reg (mode, op_true);
15074 op_false = force_reg (mode, op_false);
15076 t2 = gen_reg_rtx (mode);
15078 t3 = gen_reg_rtx (mode);
15082 x = gen_rtx_AND (mode, op_true, cmp);
15083 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15085 x = gen_rtx_NOT (mode, cmp);
15086 x = gen_rtx_AND (mode, x, op_false);
15087 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15089 x = gen_rtx_IOR (mode, t3, t2);
15090 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15094 /* Expand a floating-point conditional move. Return true if successful. */
15097 ix86_expand_fp_movcc (rtx operands[])
15099 enum machine_mode mode = GET_MODE (operands[0]);
15100 enum rtx_code code = GET_CODE (operands[1]);
15101 rtx tmp, compare_op, second_test, bypass_test;
15103 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15105 enum machine_mode cmode;
15107 /* Since we've no cmove for sse registers, don't force bad register
15108 allocation just to gain access to it. Deny movcc when the
15109 comparison mode doesn't match the move mode. */
15110 cmode = GET_MODE (ix86_compare_op0);
15111 if (cmode == VOIDmode)
15112 cmode = GET_MODE (ix86_compare_op1);
15116 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15118 &ix86_compare_op1);
15119 if (code == UNKNOWN)
15122 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15123 ix86_compare_op1, operands[2],
15127 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15128 ix86_compare_op1, operands[2], operands[3]);
15129 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15133 /* The floating point conditional move instructions don't directly
15134 support conditions resulting from a signed integer comparison. */
15136 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15138 /* The floating point conditional move instructions don't directly
15139 support signed integer comparisons. */
15141 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15143 gcc_assert (!second_test && !bypass_test);
15144 tmp = gen_reg_rtx (QImode);
15145 ix86_expand_setcc (code, tmp);
15147 ix86_compare_op0 = tmp;
15148 ix86_compare_op1 = const0_rtx;
15149 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15151 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15153 tmp = gen_reg_rtx (mode);
15154 emit_move_insn (tmp, operands[3]);
15157 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15159 tmp = gen_reg_rtx (mode);
15160 emit_move_insn (tmp, operands[2]);
15164 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15165 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15166 operands[2], operands[3])));
15168 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15169 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15170 operands[3], operands[0])));
15172 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15173 gen_rtx_IF_THEN_ELSE (mode, second_test,
15174 operands[2], operands[0])));
15179 /* Expand a floating-point vector conditional move; a vcond operation
15180 rather than a movcc operation. */
15183 ix86_expand_fp_vcond (rtx operands[])
15185 enum rtx_code code = GET_CODE (operands[3]);
15188 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15189 &operands[4], &operands[5]);
15190 if (code == UNKNOWN)
15193 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15194 operands[5], operands[1], operands[2]))
15197 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15198 operands[1], operands[2]);
15199 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15203 /* Expand a signed/unsigned integral vector conditional move. */
15206 ix86_expand_int_vcond (rtx operands[])
15208 enum machine_mode mode = GET_MODE (operands[0]);
15209 enum rtx_code code = GET_CODE (operands[3]);
15210 bool negate = false;
15213 cop0 = operands[4];
15214 cop1 = operands[5];
15216 /* SSE5 supports all of the comparisons on all vector int types. */
15219 /* Canonicalize the comparison to EQ, GT, GTU. */
15230 code = reverse_condition (code);
15236 code = reverse_condition (code);
15242 code = swap_condition (code);
15243 x = cop0, cop0 = cop1, cop1 = x;
15247 gcc_unreachable ();
15250 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15251 if (mode == V2DImode)
15256 /* SSE4.1 supports EQ. */
15257 if (!TARGET_SSE4_1)
15263 /* SSE4.2 supports GT/GTU. */
15264 if (!TARGET_SSE4_2)
15269 gcc_unreachable ();
15273 /* Unsigned parallel compare is not supported by the hardware. Play some
15274 tricks to turn this into a signed comparison against 0. */
15277 cop0 = force_reg (mode, cop0);
15286 /* Perform a parallel modulo subtraction. */
15287 t1 = gen_reg_rtx (mode);
15288 emit_insn ((mode == V4SImode
15290 : gen_subv2di3) (t1, cop0, cop1));
15292 /* Extract the original sign bit of op0. */
15293 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15295 t2 = gen_reg_rtx (mode);
15296 emit_insn ((mode == V4SImode
15298 : gen_andv2di3) (t2, cop0, mask));
15300 /* XOR it back into the result of the subtraction. This results
15301 in the sign bit set iff we saw unsigned underflow. */
15302 x = gen_reg_rtx (mode);
15303 emit_insn ((mode == V4SImode
15305 : gen_xorv2di3) (x, t1, t2));
15313 /* Perform a parallel unsigned saturating subtraction. */
15314 x = gen_reg_rtx (mode);
15315 emit_insn (gen_rtx_SET (VOIDmode, x,
15316 gen_rtx_US_MINUS (mode, cop0, cop1)));
15323 gcc_unreachable ();
15327 cop1 = CONST0_RTX (mode);
15331 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15332 operands[1+negate], operands[2-negate]);
15334 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15335 operands[2-negate]);
15339 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15340 true if we should do zero extension, else sign extension. HIGH_P is
15341 true if we want the N/2 high elements, else the low elements. */
15344 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15346 enum machine_mode imode = GET_MODE (operands[1]);
15347 rtx (*unpack)(rtx, rtx, rtx);
15354 unpack = gen_vec_interleave_highv16qi;
15356 unpack = gen_vec_interleave_lowv16qi;
15360 unpack = gen_vec_interleave_highv8hi;
15362 unpack = gen_vec_interleave_lowv8hi;
15366 unpack = gen_vec_interleave_highv4si;
15368 unpack = gen_vec_interleave_lowv4si;
15371 gcc_unreachable ();
15374 dest = gen_lowpart (imode, operands[0]);
15377 se = force_reg (imode, CONST0_RTX (imode));
15379 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15380 operands[1], pc_rtx, pc_rtx);
15382 emit_insn (unpack (dest, operands[1], se));
15385 /* This function performs the same task as ix86_expand_sse_unpack,
15386 but with SSE4.1 instructions. */
15389 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15391 enum machine_mode imode = GET_MODE (operands[1]);
15392 rtx (*unpack)(rtx, rtx);
15399 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15401 unpack = gen_sse4_1_extendv8qiv8hi2;
15405 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15407 unpack = gen_sse4_1_extendv4hiv4si2;
15411 unpack = gen_sse4_1_zero_extendv2siv2di2;
15413 unpack = gen_sse4_1_extendv2siv2di2;
15416 gcc_unreachable ();
15419 dest = operands[0];
15422 /* Shift higher 8 bytes to lower 8 bytes. */
15423 src = gen_reg_rtx (imode);
15424 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15425 gen_lowpart (TImode, operands[1]),
15431 emit_insn (unpack (dest, src));
15434 /* This function performs the same task as ix86_expand_sse_unpack,
15435 but with sse5 instructions. */
15438 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15440 enum machine_mode imode = GET_MODE (operands[1]);
15441 int pperm_bytes[16];
15443 int h = (high_p) ? 8 : 0;
15446 rtvec v = rtvec_alloc (16);
15449 rtx op0 = operands[0], op1 = operands[1];
15454 vs = rtvec_alloc (8);
15455 h2 = (high_p) ? 8 : 0;
15456 for (i = 0; i < 8; i++)
15458 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15459 pperm_bytes[2*i+1] = ((unsigned_p)
15461 : PPERM_SIGN | PPERM_SRC2 | i | h);
15464 for (i = 0; i < 16; i++)
15465 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15467 for (i = 0; i < 8; i++)
15468 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15470 p = gen_rtx_PARALLEL (VOIDmode, vs);
15471 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15473 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15475 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15479 vs = rtvec_alloc (4);
15480 h2 = (high_p) ? 4 : 0;
15481 for (i = 0; i < 4; i++)
15483 sign_extend = ((unsigned_p)
15485 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15486 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15487 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15488 pperm_bytes[4*i+2] = sign_extend;
15489 pperm_bytes[4*i+3] = sign_extend;
15492 for (i = 0; i < 16; i++)
15493 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15495 for (i = 0; i < 4; i++)
15496 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15498 p = gen_rtx_PARALLEL (VOIDmode, vs);
15499 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15501 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15503 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15507 vs = rtvec_alloc (2);
15508 h2 = (high_p) ? 2 : 0;
15509 for (i = 0; i < 2; i++)
15511 sign_extend = ((unsigned_p)
15513 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15514 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15515 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15516 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15517 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15518 pperm_bytes[8*i+4] = sign_extend;
15519 pperm_bytes[8*i+5] = sign_extend;
15520 pperm_bytes[8*i+6] = sign_extend;
15521 pperm_bytes[8*i+7] = sign_extend;
15524 for (i = 0; i < 16; i++)
15525 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15527 for (i = 0; i < 2; i++)
15528 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15530 p = gen_rtx_PARALLEL (VOIDmode, vs);
15531 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15533 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15535 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15539 gcc_unreachable ();
15545 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15546 next narrower integer vector type */
15548 ix86_expand_sse5_pack (rtx operands[3])
15550 enum machine_mode imode = GET_MODE (operands[0]);
15551 int pperm_bytes[16];
15553 rtvec v = rtvec_alloc (16);
15555 rtx op0 = operands[0];
15556 rtx op1 = operands[1];
15557 rtx op2 = operands[2];
15562 for (i = 0; i < 8; i++)
15564 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15565 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15568 for (i = 0; i < 16; i++)
15569 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15571 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15572 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15576 for (i = 0; i < 4; i++)
15578 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15579 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15580 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15581 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15584 for (i = 0; i < 16; i++)
15585 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15587 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15588 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15592 for (i = 0; i < 2; i++)
15594 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15595 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15596 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15597 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15598 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15599 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15600 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15601 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15604 for (i = 0; i < 16; i++)
15605 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15607 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15608 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15612 gcc_unreachable ();
15618 /* Expand conditional increment or decrement using adb/sbb instructions.
15619 The default case using setcc followed by the conditional move can be
15620 done by generic code. */
15622 ix86_expand_int_addcc (rtx operands[])
15624 enum rtx_code code = GET_CODE (operands[1]);
15626 rtx val = const0_rtx;
15627 bool fpcmp = false;
15628 enum machine_mode mode = GET_MODE (operands[0]);
15630 if (operands[3] != const1_rtx
15631 && operands[3] != constm1_rtx)
15633 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15634 ix86_compare_op1, &compare_op))
15636 code = GET_CODE (compare_op);
15638 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15639 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15642 code = ix86_fp_compare_code_to_integer (code);
15649 PUT_CODE (compare_op,
15650 reverse_condition_maybe_unordered
15651 (GET_CODE (compare_op)));
15653 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15655 PUT_MODE (compare_op, mode);
15657 /* Construct either adc or sbb insn. */
15658 if ((code == LTU) == (operands[3] == constm1_rtx))
15660 switch (GET_MODE (operands[0]))
15663 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15666 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15669 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15672 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15675 gcc_unreachable ();
15680 switch (GET_MODE (operands[0]))
15683 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15686 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15689 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15692 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15695 gcc_unreachable ();
15698 return 1; /* DONE */
15702 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15703 works for floating pointer parameters and nonoffsetable memories.
15704 For pushes, it returns just stack offsets; the values will be saved
15705 in the right order. Maximally three parts are generated. */
15708 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15713 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15715 size = (GET_MODE_SIZE (mode) + 4) / 8;
15717 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15718 gcc_assert (size >= 2 && size <= 4);
15720 /* Optimize constant pool reference to immediates. This is used by fp
15721 moves, that force all constants to memory to allow combining. */
15722 if (MEM_P (operand) && MEM_READONLY_P (operand))
15724 rtx tmp = maybe_get_pool_constant (operand);
15729 if (MEM_P (operand) && !offsettable_memref_p (operand))
15731 /* The only non-offsetable memories we handle are pushes. */
15732 int ok = push_operand (operand, VOIDmode);
15736 operand = copy_rtx (operand);
15737 PUT_MODE (operand, Pmode);
15738 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15742 if (GET_CODE (operand) == CONST_VECTOR)
15744 enum machine_mode imode = int_mode_for_mode (mode);
15745 /* Caution: if we looked through a constant pool memory above,
15746 the operand may actually have a different mode now. That's
15747 ok, since we want to pun this all the way back to an integer. */
15748 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15749 gcc_assert (operand != NULL);
15755 if (mode == DImode)
15756 split_di (&operand, 1, &parts[0], &parts[1]);
15761 if (REG_P (operand))
15763 gcc_assert (reload_completed);
15764 for (i = 0; i < size; i++)
15765 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15767 else if (offsettable_memref_p (operand))
15769 operand = adjust_address (operand, SImode, 0);
15770 parts[0] = operand;
15771 for (i = 1; i < size; i++)
15772 parts[i] = adjust_address (operand, SImode, 4 * i);
15774 else if (GET_CODE (operand) == CONST_DOUBLE)
15779 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15783 real_to_target (l, &r, mode);
15784 parts[3] = gen_int_mode (l[3], SImode);
15785 parts[2] = gen_int_mode (l[2], SImode);
15788 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15789 parts[2] = gen_int_mode (l[2], SImode);
15792 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15795 gcc_unreachable ();
15797 parts[1] = gen_int_mode (l[1], SImode);
15798 parts[0] = gen_int_mode (l[0], SImode);
15801 gcc_unreachable ();
15806 if (mode == TImode)
15807 split_ti (&operand, 1, &parts[0], &parts[1]);
15808 if (mode == XFmode || mode == TFmode)
15810 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15811 if (REG_P (operand))
15813 gcc_assert (reload_completed);
15814 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15815 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15817 else if (offsettable_memref_p (operand))
15819 operand = adjust_address (operand, DImode, 0);
15820 parts[0] = operand;
15821 parts[1] = adjust_address (operand, upper_mode, 8);
15823 else if (GET_CODE (operand) == CONST_DOUBLE)
15828 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15829 real_to_target (l, &r, mode);
15831 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15832 if (HOST_BITS_PER_WIDE_INT >= 64)
15835 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15836 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15839 parts[0] = immed_double_const (l[0], l[1], DImode);
15841 if (upper_mode == SImode)
15842 parts[1] = gen_int_mode (l[2], SImode);
15843 else if (HOST_BITS_PER_WIDE_INT >= 64)
15846 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15847 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15850 parts[1] = immed_double_const (l[2], l[3], DImode);
15853 gcc_unreachable ();
15860 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15861 Return false when normal moves are needed; true when all required
15862 insns have been emitted. Operands 2-4 contain the input values
15863 int the correct order; operands 5-7 contain the output values. */
15866 ix86_split_long_move (rtx operands[])
15871 int collisions = 0;
15872 enum machine_mode mode = GET_MODE (operands[0]);
15873 bool collisionparts[4];
15875 /* The DFmode expanders may ask us to move double.
15876 For 64bit target this is single move. By hiding the fact
15877 here we simplify i386.md splitters. */
15878 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15880 /* Optimize constant pool reference to immediates. This is used by
15881 fp moves, that force all constants to memory to allow combining. */
15883 if (MEM_P (operands[1])
15884 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15885 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15886 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15887 if (push_operand (operands[0], VOIDmode))
15889 operands[0] = copy_rtx (operands[0]);
15890 PUT_MODE (operands[0], Pmode);
15893 operands[0] = gen_lowpart (DImode, operands[0]);
15894 operands[1] = gen_lowpart (DImode, operands[1]);
15895 emit_move_insn (operands[0], operands[1]);
15899 /* The only non-offsettable memory we handle is push. */
15900 if (push_operand (operands[0], VOIDmode))
15903 gcc_assert (!MEM_P (operands[0])
15904 || offsettable_memref_p (operands[0]));
15906 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15907 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15909 /* When emitting push, take care for source operands on the stack. */
15910 if (push && MEM_P (operands[1])
15911 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15912 for (i = 0; i < nparts - 1; i++)
15913 part[1][i] = change_address (part[1][i],
15914 GET_MODE (part[1][i]),
15915 XEXP (part[1][i + 1], 0));
15917 /* We need to do copy in the right order in case an address register
15918 of the source overlaps the destination. */
15919 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15923 for (i = 0; i < nparts; i++)
15926 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15927 if (collisionparts[i])
15931 /* Collision in the middle part can be handled by reordering. */
15932 if (collisions == 1 && nparts == 3 && collisionparts [1])
15934 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15935 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15937 else if (collisions == 1
15939 && (collisionparts [1] || collisionparts [2]))
15941 if (collisionparts [1])
15943 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15944 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15948 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15949 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15953 /* If there are more collisions, we can't handle it by reordering.
15954 Do an lea to the last part and use only one colliding move. */
15955 else if (collisions > 1)
15961 base = part[0][nparts - 1];
15963 /* Handle the case when the last part isn't valid for lea.
15964 Happens in 64-bit mode storing the 12-byte XFmode. */
15965 if (GET_MODE (base) != Pmode)
15966 base = gen_rtx_REG (Pmode, REGNO (base));
15968 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15969 part[1][0] = replace_equiv_address (part[1][0], base);
15970 for (i = 1; i < nparts; i++)
15972 tmp = plus_constant (base, UNITS_PER_WORD * i);
15973 part[1][i] = replace_equiv_address (part[1][i], tmp);
15984 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15985 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15986 emit_move_insn (part[0][2], part[1][2]);
15988 else if (nparts == 4)
15990 emit_move_insn (part[0][3], part[1][3]);
15991 emit_move_insn (part[0][2], part[1][2]);
15996 /* In 64bit mode we don't have 32bit push available. In case this is
15997 register, it is OK - we will just use larger counterpart. We also
15998 retype memory - these comes from attempt to avoid REX prefix on
15999 moving of second half of TFmode value. */
16000 if (GET_MODE (part[1][1]) == SImode)
16002 switch (GET_CODE (part[1][1]))
16005 part[1][1] = adjust_address (part[1][1], DImode, 0);
16009 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16013 gcc_unreachable ();
16016 if (GET_MODE (part[1][0]) == SImode)
16017 part[1][0] = part[1][1];
16020 emit_move_insn (part[0][1], part[1][1]);
16021 emit_move_insn (part[0][0], part[1][0]);
16025 /* Choose correct order to not overwrite the source before it is copied. */
16026 if ((REG_P (part[0][0])
16027 && REG_P (part[1][1])
16028 && (REGNO (part[0][0]) == REGNO (part[1][1])
16030 && REGNO (part[0][0]) == REGNO (part[1][2]))
16032 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16034 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16036 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16038 operands[2 + i] = part[0][j];
16039 operands[6 + i] = part[1][j];
16044 for (i = 0; i < nparts; i++)
16046 operands[2 + i] = part[0][i];
16047 operands[6 + i] = part[1][i];
16051 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16052 if (optimize_insn_for_size_p ())
16054 for (j = 0; j < nparts - 1; j++)
16055 if (CONST_INT_P (operands[6 + j])
16056 && operands[6 + j] != const0_rtx
16057 && REG_P (operands[2 + j]))
16058 for (i = j; i < nparts - 1; i++)
16059 if (CONST_INT_P (operands[7 + i])
16060 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16061 operands[7 + i] = operands[2 + j];
16064 for (i = 0; i < nparts; i++)
16065 emit_move_insn (operands[2 + i], operands[6 + i]);
16070 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16071 left shift by a constant, either using a single shift or
16072 a sequence of add instructions. */
16075 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16079 emit_insn ((mode == DImode
16081 : gen_adddi3) (operand, operand, operand));
16083 else if (!optimize_insn_for_size_p ()
16084 && count * ix86_cost->add <= ix86_cost->shift_const)
16087 for (i=0; i<count; i++)
16089 emit_insn ((mode == DImode
16091 : gen_adddi3) (operand, operand, operand));
16095 emit_insn ((mode == DImode
16097 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16101 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16103 rtx low[2], high[2];
16105 const int single_width = mode == DImode ? 32 : 64;
16107 if (CONST_INT_P (operands[2]))
16109 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16110 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16112 if (count >= single_width)
16114 emit_move_insn (high[0], low[1]);
16115 emit_move_insn (low[0], const0_rtx);
16117 if (count > single_width)
16118 ix86_expand_ashl_const (high[0], count - single_width, mode);
16122 if (!rtx_equal_p (operands[0], operands[1]))
16123 emit_move_insn (operands[0], operands[1]);
16124 emit_insn ((mode == DImode
16126 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16127 ix86_expand_ashl_const (low[0], count, mode);
16132 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16134 if (operands[1] == const1_rtx)
16136 /* Assuming we've chosen a QImode capable registers, then 1 << N
16137 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16138 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16140 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16142 ix86_expand_clear (low[0]);
16143 ix86_expand_clear (high[0]);
16144 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16146 d = gen_lowpart (QImode, low[0]);
16147 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16148 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16149 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16151 d = gen_lowpart (QImode, high[0]);
16152 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16153 s = gen_rtx_NE (QImode, flags, const0_rtx);
16154 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16157 /* Otherwise, we can get the same results by manually performing
16158 a bit extract operation on bit 5/6, and then performing the two
16159 shifts. The two methods of getting 0/1 into low/high are exactly
16160 the same size. Avoiding the shift in the bit extract case helps
16161 pentium4 a bit; no one else seems to care much either way. */
16166 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16167 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16169 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16170 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16172 emit_insn ((mode == DImode
16174 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16175 emit_insn ((mode == DImode
16177 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16178 emit_move_insn (low[0], high[0]);
16179 emit_insn ((mode == DImode
16181 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16184 emit_insn ((mode == DImode
16186 : gen_ashldi3) (low[0], low[0], operands[2]));
16187 emit_insn ((mode == DImode
16189 : gen_ashldi3) (high[0], high[0], operands[2]));
16193 if (operands[1] == constm1_rtx)
16195 /* For -1 << N, we can avoid the shld instruction, because we
16196 know that we're shifting 0...31/63 ones into a -1. */
16197 emit_move_insn (low[0], constm1_rtx);
16198 if (optimize_insn_for_size_p ())
16199 emit_move_insn (high[0], low[0]);
16201 emit_move_insn (high[0], constm1_rtx);
16205 if (!rtx_equal_p (operands[0], operands[1]))
16206 emit_move_insn (operands[0], operands[1]);
16208 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16209 emit_insn ((mode == DImode
16211 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16214 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16216 if (TARGET_CMOVE && scratch)
16218 ix86_expand_clear (scratch);
16219 emit_insn ((mode == DImode
16220 ? gen_x86_shift_adj_1
16221 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16225 emit_insn ((mode == DImode
16226 ? gen_x86_shift_adj_2
16227 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16231 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16233 rtx low[2], high[2];
16235 const int single_width = mode == DImode ? 32 : 64;
16237 if (CONST_INT_P (operands[2]))
16239 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16240 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16242 if (count == single_width * 2 - 1)
16244 emit_move_insn (high[0], high[1]);
16245 emit_insn ((mode == DImode
16247 : gen_ashrdi3) (high[0], high[0],
16248 GEN_INT (single_width - 1)));
16249 emit_move_insn (low[0], high[0]);
16252 else if (count >= single_width)
16254 emit_move_insn (low[0], high[1]);
16255 emit_move_insn (high[0], low[0]);
16256 emit_insn ((mode == DImode
16258 : gen_ashrdi3) (high[0], high[0],
16259 GEN_INT (single_width - 1)));
16260 if (count > single_width)
16261 emit_insn ((mode == DImode
16263 : gen_ashrdi3) (low[0], low[0],
16264 GEN_INT (count - single_width)));
16268 if (!rtx_equal_p (operands[0], operands[1]))
16269 emit_move_insn (operands[0], operands[1]);
16270 emit_insn ((mode == DImode
16272 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16273 emit_insn ((mode == DImode
16275 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16280 if (!rtx_equal_p (operands[0], operands[1]))
16281 emit_move_insn (operands[0], operands[1]);
16283 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16285 emit_insn ((mode == DImode
16287 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16288 emit_insn ((mode == DImode
16290 : gen_ashrdi3) (high[0], high[0], operands[2]));
16292 if (TARGET_CMOVE && scratch)
16294 emit_move_insn (scratch, high[0]);
16295 emit_insn ((mode == DImode
16297 : gen_ashrdi3) (scratch, scratch,
16298 GEN_INT (single_width - 1)));
16299 emit_insn ((mode == DImode
16300 ? gen_x86_shift_adj_1
16301 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16305 emit_insn ((mode == DImode
16306 ? gen_x86_shift_adj_3
16307 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16312 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16314 rtx low[2], high[2];
16316 const int single_width = mode == DImode ? 32 : 64;
16318 if (CONST_INT_P (operands[2]))
16320 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16321 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16323 if (count >= single_width)
16325 emit_move_insn (low[0], high[1]);
16326 ix86_expand_clear (high[0]);
16328 if (count > single_width)
16329 emit_insn ((mode == DImode
16331 : gen_lshrdi3) (low[0], low[0],
16332 GEN_INT (count - single_width)));
16336 if (!rtx_equal_p (operands[0], operands[1]))
16337 emit_move_insn (operands[0], operands[1]);
16338 emit_insn ((mode == DImode
16340 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16341 emit_insn ((mode == DImode
16343 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16348 if (!rtx_equal_p (operands[0], operands[1]))
16349 emit_move_insn (operands[0], operands[1]);
16351 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16353 emit_insn ((mode == DImode
16355 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16356 emit_insn ((mode == DImode
16358 : gen_lshrdi3) (high[0], high[0], operands[2]));
16360 /* Heh. By reversing the arguments, we can reuse this pattern. */
16361 if (TARGET_CMOVE && scratch)
16363 ix86_expand_clear (scratch);
16364 emit_insn ((mode == DImode
16365 ? gen_x86_shift_adj_1
16366 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16370 emit_insn ((mode == DImode
16371 ? gen_x86_shift_adj_2
16372 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16376 /* Predict just emitted jump instruction to be taken with probability PROB. */
16378 predict_jump (int prob)
16380 rtx insn = get_last_insn ();
16381 gcc_assert (JUMP_P (insn));
16383 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16388 /* Helper function for the string operations below. Dest VARIABLE whether
16389 it is aligned to VALUE bytes. If true, jump to the label. */
16391 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16393 rtx label = gen_label_rtx ();
16394 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16395 if (GET_MODE (variable) == DImode)
16396 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16398 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16399 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16402 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16404 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16408 /* Adjust COUNTER by the VALUE. */
16410 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16412 if (GET_MODE (countreg) == DImode)
16413 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16415 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16418 /* Zero extend possibly SImode EXP to Pmode register. */
16420 ix86_zero_extend_to_Pmode (rtx exp)
16423 if (GET_MODE (exp) == VOIDmode)
16424 return force_reg (Pmode, exp);
16425 if (GET_MODE (exp) == Pmode)
16426 return copy_to_mode_reg (Pmode, exp);
16427 r = gen_reg_rtx (Pmode);
16428 emit_insn (gen_zero_extendsidi2 (r, exp));
16432 /* Divide COUNTREG by SCALE. */
16434 scale_counter (rtx countreg, int scale)
16437 rtx piece_size_mask;
16441 if (CONST_INT_P (countreg))
16442 return GEN_INT (INTVAL (countreg) / scale);
16443 gcc_assert (REG_P (countreg));
16445 piece_size_mask = GEN_INT (scale - 1);
16446 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16447 GEN_INT (exact_log2 (scale)),
16448 NULL, 1, OPTAB_DIRECT);
16452 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16453 DImode for constant loop counts. */
16455 static enum machine_mode
16456 counter_mode (rtx count_exp)
16458 if (GET_MODE (count_exp) != VOIDmode)
16459 return GET_MODE (count_exp);
16460 if (GET_CODE (count_exp) != CONST_INT)
16462 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16467 /* When SRCPTR is non-NULL, output simple loop to move memory
16468 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16469 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16470 equivalent loop to set memory by VALUE (supposed to be in MODE).
16472 The size is rounded down to whole number of chunk size moved at once.
16473 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16477 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16478 rtx destptr, rtx srcptr, rtx value,
16479 rtx count, enum machine_mode mode, int unroll,
16482 rtx out_label, top_label, iter, tmp;
16483 enum machine_mode iter_mode = counter_mode (count);
16484 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16485 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16491 top_label = gen_label_rtx ();
16492 out_label = gen_label_rtx ();
16493 iter = gen_reg_rtx (iter_mode);
16495 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16496 NULL, 1, OPTAB_DIRECT);
16497 /* Those two should combine. */
16498 if (piece_size == const1_rtx)
16500 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16502 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16504 emit_move_insn (iter, const0_rtx);
16506 emit_label (top_label);
16508 tmp = convert_modes (Pmode, iter_mode, iter, true);
16509 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16510 destmem = change_address (destmem, mode, x_addr);
16514 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16515 srcmem = change_address (srcmem, mode, y_addr);
16517 /* When unrolling for chips that reorder memory reads and writes,
16518 we can save registers by using single temporary.
16519 Also using 4 temporaries is overkill in 32bit mode. */
16520 if (!TARGET_64BIT && 0)
16522 for (i = 0; i < unroll; i++)
16527 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16529 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16531 emit_move_insn (destmem, srcmem);
16537 gcc_assert (unroll <= 4);
16538 for (i = 0; i < unroll; i++)
16540 tmpreg[i] = gen_reg_rtx (mode);
16544 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16546 emit_move_insn (tmpreg[i], srcmem);
16548 for (i = 0; i < unroll; i++)
16553 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16555 emit_move_insn (destmem, tmpreg[i]);
16560 for (i = 0; i < unroll; i++)
16564 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16565 emit_move_insn (destmem, value);
16568 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16569 true, OPTAB_LIB_WIDEN);
16571 emit_move_insn (iter, tmp);
16573 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16575 if (expected_size != -1)
16577 expected_size /= GET_MODE_SIZE (mode) * unroll;
16578 if (expected_size == 0)
16580 else if (expected_size > REG_BR_PROB_BASE)
16581 predict_jump (REG_BR_PROB_BASE - 1);
16583 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16586 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16587 iter = ix86_zero_extend_to_Pmode (iter);
16588 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16589 true, OPTAB_LIB_WIDEN);
16590 if (tmp != destptr)
16591 emit_move_insn (destptr, tmp);
16594 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16595 true, OPTAB_LIB_WIDEN);
16597 emit_move_insn (srcptr, tmp);
16599 emit_label (out_label);
16602 /* Output "rep; mov" instruction.
16603 Arguments have same meaning as for previous function */
16605 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16606 rtx destptr, rtx srcptr,
16608 enum machine_mode mode)
16614 /* If the size is known, it is shorter to use rep movs. */
16615 if (mode == QImode && CONST_INT_P (count)
16616 && !(INTVAL (count) & 3))
16619 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16620 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16621 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16622 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16623 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16624 if (mode != QImode)
16626 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16627 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16628 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16629 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16630 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16631 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16635 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16636 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16638 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16642 /* Output "rep; stos" instruction.
16643 Arguments have same meaning as for previous function */
16645 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16647 enum machine_mode mode)
16652 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16653 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16654 value = force_reg (mode, gen_lowpart (mode, value));
16655 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16656 if (mode != QImode)
16658 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16659 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16660 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16663 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16664 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16668 emit_strmov (rtx destmem, rtx srcmem,
16669 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16671 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16672 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16673 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16676 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16678 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16679 rtx destptr, rtx srcptr, rtx count, int max_size)
16682 if (CONST_INT_P (count))
16684 HOST_WIDE_INT countval = INTVAL (count);
16687 if ((countval & 0x10) && max_size > 16)
16691 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16692 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16695 gcc_unreachable ();
16698 if ((countval & 0x08) && max_size > 8)
16701 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16704 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16705 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16709 if ((countval & 0x04) && max_size > 4)
16711 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16714 if ((countval & 0x02) && max_size > 2)
16716 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16719 if ((countval & 0x01) && max_size > 1)
16721 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16728 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16729 count, 1, OPTAB_DIRECT);
16730 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16731 count, QImode, 1, 4);
16735 /* When there are stringops, we can cheaply increase dest and src pointers.
16736 Otherwise we save code size by maintaining offset (zero is readily
16737 available from preceding rep operation) and using x86 addressing modes.
16739 if (TARGET_SINGLE_STRINGOP)
16743 rtx label = ix86_expand_aligntest (count, 4, true);
16744 src = change_address (srcmem, SImode, srcptr);
16745 dest = change_address (destmem, SImode, destptr);
16746 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16747 emit_label (label);
16748 LABEL_NUSES (label) = 1;
16752 rtx label = ix86_expand_aligntest (count, 2, true);
16753 src = change_address (srcmem, HImode, srcptr);
16754 dest = change_address (destmem, HImode, destptr);
16755 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16756 emit_label (label);
16757 LABEL_NUSES (label) = 1;
16761 rtx label = ix86_expand_aligntest (count, 1, true);
16762 src = change_address (srcmem, QImode, srcptr);
16763 dest = change_address (destmem, QImode, destptr);
16764 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16765 emit_label (label);
16766 LABEL_NUSES (label) = 1;
16771 rtx offset = force_reg (Pmode, const0_rtx);
16776 rtx label = ix86_expand_aligntest (count, 4, true);
16777 src = change_address (srcmem, SImode, srcptr);
16778 dest = change_address (destmem, SImode, destptr);
16779 emit_move_insn (dest, src);
16780 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16781 true, OPTAB_LIB_WIDEN);
16783 emit_move_insn (offset, tmp);
16784 emit_label (label);
16785 LABEL_NUSES (label) = 1;
16789 rtx label = ix86_expand_aligntest (count, 2, true);
16790 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16791 src = change_address (srcmem, HImode, tmp);
16792 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16793 dest = change_address (destmem, HImode, tmp);
16794 emit_move_insn (dest, src);
16795 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16796 true, OPTAB_LIB_WIDEN);
16798 emit_move_insn (offset, tmp);
16799 emit_label (label);
16800 LABEL_NUSES (label) = 1;
16804 rtx label = ix86_expand_aligntest (count, 1, true);
16805 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16806 src = change_address (srcmem, QImode, tmp);
16807 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16808 dest = change_address (destmem, QImode, tmp);
16809 emit_move_insn (dest, src);
16810 emit_label (label);
16811 LABEL_NUSES (label) = 1;
16816 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16818 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16819 rtx count, int max_size)
16822 expand_simple_binop (counter_mode (count), AND, count,
16823 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16824 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16825 gen_lowpart (QImode, value), count, QImode,
16829 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16831 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16835 if (CONST_INT_P (count))
16837 HOST_WIDE_INT countval = INTVAL (count);
16840 if ((countval & 0x10) && max_size > 16)
16844 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16845 emit_insn (gen_strset (destptr, dest, value));
16846 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16847 emit_insn (gen_strset (destptr, dest, value));
16850 gcc_unreachable ();
16853 if ((countval & 0x08) && max_size > 8)
16857 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16858 emit_insn (gen_strset (destptr, dest, value));
16862 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16863 emit_insn (gen_strset (destptr, dest, value));
16864 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16865 emit_insn (gen_strset (destptr, dest, value));
16869 if ((countval & 0x04) && max_size > 4)
16871 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16872 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16875 if ((countval & 0x02) && max_size > 2)
16877 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16878 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16881 if ((countval & 0x01) && max_size > 1)
16883 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16884 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16891 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16896 rtx label = ix86_expand_aligntest (count, 16, true);
16899 dest = change_address (destmem, DImode, destptr);
16900 emit_insn (gen_strset (destptr, dest, value));
16901 emit_insn (gen_strset (destptr, dest, value));
16905 dest = change_address (destmem, SImode, destptr);
16906 emit_insn (gen_strset (destptr, dest, value));
16907 emit_insn (gen_strset (destptr, dest, value));
16908 emit_insn (gen_strset (destptr, dest, value));
16909 emit_insn (gen_strset (destptr, dest, value));
16911 emit_label (label);
16912 LABEL_NUSES (label) = 1;
16916 rtx label = ix86_expand_aligntest (count, 8, true);
16919 dest = change_address (destmem, DImode, destptr);
16920 emit_insn (gen_strset (destptr, dest, value));
16924 dest = change_address (destmem, SImode, destptr);
16925 emit_insn (gen_strset (destptr, dest, value));
16926 emit_insn (gen_strset (destptr, dest, value));
16928 emit_label (label);
16929 LABEL_NUSES (label) = 1;
16933 rtx label = ix86_expand_aligntest (count, 4, true);
16934 dest = change_address (destmem, SImode, destptr);
16935 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16936 emit_label (label);
16937 LABEL_NUSES (label) = 1;
16941 rtx label = ix86_expand_aligntest (count, 2, true);
16942 dest = change_address (destmem, HImode, destptr);
16943 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16944 emit_label (label);
16945 LABEL_NUSES (label) = 1;
16949 rtx label = ix86_expand_aligntest (count, 1, true);
16950 dest = change_address (destmem, QImode, destptr);
16951 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16952 emit_label (label);
16953 LABEL_NUSES (label) = 1;
16957 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16958 DESIRED_ALIGNMENT. */
16960 expand_movmem_prologue (rtx destmem, rtx srcmem,
16961 rtx destptr, rtx srcptr, rtx count,
16962 int align, int desired_alignment)
16964 if (align <= 1 && desired_alignment > 1)
16966 rtx label = ix86_expand_aligntest (destptr, 1, false);
16967 srcmem = change_address (srcmem, QImode, srcptr);
16968 destmem = change_address (destmem, QImode, destptr);
16969 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16970 ix86_adjust_counter (count, 1);
16971 emit_label (label);
16972 LABEL_NUSES (label) = 1;
16974 if (align <= 2 && desired_alignment > 2)
16976 rtx label = ix86_expand_aligntest (destptr, 2, false);
16977 srcmem = change_address (srcmem, HImode, srcptr);
16978 destmem = change_address (destmem, HImode, destptr);
16979 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16980 ix86_adjust_counter (count, 2);
16981 emit_label (label);
16982 LABEL_NUSES (label) = 1;
16984 if (align <= 4 && desired_alignment > 4)
16986 rtx label = ix86_expand_aligntest (destptr, 4, false);
16987 srcmem = change_address (srcmem, SImode, srcptr);
16988 destmem = change_address (destmem, SImode, destptr);
16989 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16990 ix86_adjust_counter (count, 4);
16991 emit_label (label);
16992 LABEL_NUSES (label) = 1;
16994 gcc_assert (desired_alignment <= 8);
16997 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16998 DESIRED_ALIGNMENT. */
17000 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17001 int align, int desired_alignment)
17003 if (align <= 1 && desired_alignment > 1)
17005 rtx label = ix86_expand_aligntest (destptr, 1, false);
17006 destmem = change_address (destmem, QImode, destptr);
17007 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17008 ix86_adjust_counter (count, 1);
17009 emit_label (label);
17010 LABEL_NUSES (label) = 1;
17012 if (align <= 2 && desired_alignment > 2)
17014 rtx label = ix86_expand_aligntest (destptr, 2, false);
17015 destmem = change_address (destmem, HImode, destptr);
17016 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17017 ix86_adjust_counter (count, 2);
17018 emit_label (label);
17019 LABEL_NUSES (label) = 1;
17021 if (align <= 4 && desired_alignment > 4)
17023 rtx label = ix86_expand_aligntest (destptr, 4, false);
17024 destmem = change_address (destmem, SImode, destptr);
17025 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17026 ix86_adjust_counter (count, 4);
17027 emit_label (label);
17028 LABEL_NUSES (label) = 1;
17030 gcc_assert (desired_alignment <= 8);
17033 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17034 static enum stringop_alg
17035 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17036 int *dynamic_check)
17038 const struct stringop_algs * algs;
17039 bool optimize_for_speed;
17040 /* Algorithms using the rep prefix want at least edi and ecx;
17041 additionally, memset wants eax and memcpy wants esi. Don't
17042 consider such algorithms if the user has appropriated those
17043 registers for their own purposes. */
17044 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17046 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17048 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17049 || (alg != rep_prefix_1_byte \
17050 && alg != rep_prefix_4_byte \
17051 && alg != rep_prefix_8_byte))
17052 const struct processor_costs *cost;
17054 /* Even if the string operation call is cold, we still might spend a lot
17055 of time processing large blocks. */
17056 if (optimize_function_for_size_p (cfun)
17057 || (optimize_insn_for_size_p ()
17058 && expected_size != -1 && expected_size < 256))
17059 optimize_for_speed = false;
17061 optimize_for_speed = true;
17063 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17065 *dynamic_check = -1;
17067 algs = &cost->memset[TARGET_64BIT != 0];
17069 algs = &cost->memcpy[TARGET_64BIT != 0];
17070 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17071 return stringop_alg;
17072 /* rep; movq or rep; movl is the smallest variant. */
17073 else if (!optimize_for_speed)
17075 if (!count || (count & 3))
17076 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17078 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17080 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17082 else if (expected_size != -1 && expected_size < 4)
17083 return loop_1_byte;
17084 else if (expected_size != -1)
17087 enum stringop_alg alg = libcall;
17088 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17090 /* We get here if the algorithms that were not libcall-based
17091 were rep-prefix based and we are unable to use rep prefixes
17092 based on global register usage. Break out of the loop and
17093 use the heuristic below. */
17094 if (algs->size[i].max == 0)
17096 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17098 enum stringop_alg candidate = algs->size[i].alg;
17100 if (candidate != libcall && ALG_USABLE_P (candidate))
17102 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17103 last non-libcall inline algorithm. */
17104 if (TARGET_INLINE_ALL_STRINGOPS)
17106 /* When the current size is best to be copied by a libcall,
17107 but we are still forced to inline, run the heuristic below
17108 that will pick code for medium sized blocks. */
17109 if (alg != libcall)
17113 else if (ALG_USABLE_P (candidate))
17117 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17119 /* When asked to inline the call anyway, try to pick meaningful choice.
17120 We look for maximal size of block that is faster to copy by hand and
17121 take blocks of at most of that size guessing that average size will
17122 be roughly half of the block.
17124 If this turns out to be bad, we might simply specify the preferred
17125 choice in ix86_costs. */
17126 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17127 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17130 enum stringop_alg alg;
17132 bool any_alg_usable_p = true;
17134 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17136 enum stringop_alg candidate = algs->size[i].alg;
17137 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17139 if (candidate != libcall && candidate
17140 && ALG_USABLE_P (candidate))
17141 max = algs->size[i].max;
17143 /* If there aren't any usable algorithms, then recursing on
17144 smaller sizes isn't going to find anything. Just return the
17145 simple byte-at-a-time copy loop. */
17146 if (!any_alg_usable_p)
17148 /* Pick something reasonable. */
17149 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17150 *dynamic_check = 128;
17151 return loop_1_byte;
17155 alg = decide_alg (count, max / 2, memset, dynamic_check);
17156 gcc_assert (*dynamic_check == -1);
17157 gcc_assert (alg != libcall);
17158 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17159 *dynamic_check = max;
17162 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17163 #undef ALG_USABLE_P
17166 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17167 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17169 decide_alignment (int align,
17170 enum stringop_alg alg,
17173 int desired_align = 0;
17177 gcc_unreachable ();
17179 case unrolled_loop:
17180 desired_align = GET_MODE_SIZE (Pmode);
17182 case rep_prefix_8_byte:
17185 case rep_prefix_4_byte:
17186 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17187 copying whole cacheline at once. */
17188 if (TARGET_PENTIUMPRO)
17193 case rep_prefix_1_byte:
17194 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17195 copying whole cacheline at once. */
17196 if (TARGET_PENTIUMPRO)
17210 if (desired_align < align)
17211 desired_align = align;
17212 if (expected_size != -1 && expected_size < 4)
17213 desired_align = align;
17214 return desired_align;
17217 /* Return the smallest power of 2 greater than VAL. */
17219 smallest_pow2_greater_than (int val)
17227 /* Expand string move (memcpy) operation. Use i386 string operations when
17228 profitable. expand_setmem contains similar code. The code depends upon
17229 architecture, block size and alignment, but always has the same
17232 1) Prologue guard: Conditional that jumps up to epilogues for small
17233 blocks that can be handled by epilogue alone. This is faster but
17234 also needed for correctness, since prologue assume the block is larger
17235 than the desired alignment.
17237 Optional dynamic check for size and libcall for large
17238 blocks is emitted here too, with -minline-stringops-dynamically.
17240 2) Prologue: copy first few bytes in order to get destination aligned
17241 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17242 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17243 We emit either a jump tree on power of two sized blocks, or a byte loop.
17245 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17246 with specified algorithm.
17248 4) Epilogue: code copying tail of the block that is too small to be
17249 handled by main body (or up to size guarded by prologue guard). */
17252 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17253 rtx expected_align_exp, rtx expected_size_exp)
17259 rtx jump_around_label = NULL;
17260 HOST_WIDE_INT align = 1;
17261 unsigned HOST_WIDE_INT count = 0;
17262 HOST_WIDE_INT expected_size = -1;
17263 int size_needed = 0, epilogue_size_needed;
17264 int desired_align = 0;
17265 enum stringop_alg alg;
17267 bool need_zero_guard = false;
17269 if (CONST_INT_P (align_exp))
17270 align = INTVAL (align_exp);
17271 /* i386 can do misaligned access on reasonably increased cost. */
17272 if (CONST_INT_P (expected_align_exp)
17273 && INTVAL (expected_align_exp) > align)
17274 align = INTVAL (expected_align_exp);
17275 if (CONST_INT_P (count_exp))
17276 count = expected_size = INTVAL (count_exp);
17277 if (CONST_INT_P (expected_size_exp) && count == 0)
17278 expected_size = INTVAL (expected_size_exp);
17280 /* Make sure we don't need to care about overflow later on. */
17281 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17284 /* Step 0: Decide on preferred algorithm, desired alignment and
17285 size of chunks to be copied by main loop. */
17287 alg = decide_alg (count, expected_size, false, &dynamic_check);
17288 desired_align = decide_alignment (align, alg, expected_size);
17290 if (!TARGET_ALIGN_STRINGOPS)
17291 align = desired_align;
17293 if (alg == libcall)
17295 gcc_assert (alg != no_stringop);
17297 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17298 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17299 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17304 gcc_unreachable ();
17306 need_zero_guard = true;
17307 size_needed = GET_MODE_SIZE (Pmode);
17309 case unrolled_loop:
17310 need_zero_guard = true;
17311 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17313 case rep_prefix_8_byte:
17316 case rep_prefix_4_byte:
17319 case rep_prefix_1_byte:
17323 need_zero_guard = true;
17328 epilogue_size_needed = size_needed;
17330 /* Step 1: Prologue guard. */
17332 /* Alignment code needs count to be in register. */
17333 if (CONST_INT_P (count_exp) && desired_align > align)
17334 count_exp = force_reg (counter_mode (count_exp), count_exp);
17335 gcc_assert (desired_align >= 1 && align >= 1);
17337 /* Ensure that alignment prologue won't copy past end of block. */
17338 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17340 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17341 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17342 Make sure it is power of 2. */
17343 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17345 if (CONST_INT_P (count_exp))
17347 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17352 label = gen_label_rtx ();
17353 emit_cmp_and_jump_insns (count_exp,
17354 GEN_INT (epilogue_size_needed),
17355 LTU, 0, counter_mode (count_exp), 1, label);
17356 if (expected_size == -1 || expected_size < epilogue_size_needed)
17357 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17359 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17363 /* Emit code to decide on runtime whether library call or inline should be
17365 if (dynamic_check != -1)
17367 if (CONST_INT_P (count_exp))
17369 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17371 emit_block_move_via_libcall (dst, src, count_exp, false);
17372 count_exp = const0_rtx;
17378 rtx hot_label = gen_label_rtx ();
17379 jump_around_label = gen_label_rtx ();
17380 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17381 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17382 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17383 emit_block_move_via_libcall (dst, src, count_exp, false);
17384 emit_jump (jump_around_label);
17385 emit_label (hot_label);
17389 /* Step 2: Alignment prologue. */
17391 if (desired_align > align)
17393 /* Except for the first move in epilogue, we no longer know
17394 constant offset in aliasing info. It don't seems to worth
17395 the pain to maintain it for the first move, so throw away
17397 src = change_address (src, BLKmode, srcreg);
17398 dst = change_address (dst, BLKmode, destreg);
17399 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17401 if (need_zero_guard && !count)
17403 /* It is possible that we copied enough so the main loop will not
17405 emit_cmp_and_jump_insns (count_exp,
17406 GEN_INT (size_needed),
17407 LTU, 0, counter_mode (count_exp), 1, label);
17408 if (expected_size == -1
17409 || expected_size < (desired_align - align) / 2 + size_needed)
17410 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17412 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17415 if (label && size_needed == 1)
17417 emit_label (label);
17418 LABEL_NUSES (label) = 1;
17422 /* Step 3: Main loop. */
17428 gcc_unreachable ();
17430 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17431 count_exp, QImode, 1, expected_size);
17434 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17435 count_exp, Pmode, 1, expected_size);
17437 case unrolled_loop:
17438 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17439 registers for 4 temporaries anyway. */
17440 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17441 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17444 case rep_prefix_8_byte:
17445 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17448 case rep_prefix_4_byte:
17449 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17452 case rep_prefix_1_byte:
17453 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17457 /* Adjust properly the offset of src and dest memory for aliasing. */
17458 if (CONST_INT_P (count_exp))
17460 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17461 (count / size_needed) * size_needed);
17462 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17463 (count / size_needed) * size_needed);
17467 src = change_address (src, BLKmode, srcreg);
17468 dst = change_address (dst, BLKmode, destreg);
17471 /* Step 4: Epilogue to copy the remaining bytes. */
17475 /* When the main loop is done, COUNT_EXP might hold original count,
17476 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17477 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17478 bytes. Compensate if needed. */
17480 if (size_needed < epilogue_size_needed)
17483 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17484 GEN_INT (size_needed - 1), count_exp, 1,
17486 if (tmp != count_exp)
17487 emit_move_insn (count_exp, tmp);
17489 emit_label (label);
17490 LABEL_NUSES (label) = 1;
17493 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17494 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17495 epilogue_size_needed);
17496 if (jump_around_label)
17497 emit_label (jump_around_label);
17501 /* Helper function for memcpy. For QImode value 0xXY produce
17502 0xXYXYXYXY of wide specified by MODE. This is essentially
17503 a * 0x10101010, but we can do slightly better than
17504 synth_mult by unwinding the sequence by hand on CPUs with
17507 promote_duplicated_reg (enum machine_mode mode, rtx val)
17509 enum machine_mode valmode = GET_MODE (val);
17511 int nops = mode == DImode ? 3 : 2;
17513 gcc_assert (mode == SImode || mode == DImode);
17514 if (val == const0_rtx)
17515 return copy_to_mode_reg (mode, const0_rtx);
17516 if (CONST_INT_P (val))
17518 HOST_WIDE_INT v = INTVAL (val) & 255;
17522 if (mode == DImode)
17523 v |= (v << 16) << 16;
17524 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17527 if (valmode == VOIDmode)
17529 if (valmode != QImode)
17530 val = gen_lowpart (QImode, val);
17531 if (mode == QImode)
17533 if (!TARGET_PARTIAL_REG_STALL)
17535 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17536 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17537 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17538 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17540 rtx reg = convert_modes (mode, QImode, val, true);
17541 tmp = promote_duplicated_reg (mode, const1_rtx);
17542 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17547 rtx reg = convert_modes (mode, QImode, val, true);
17549 if (!TARGET_PARTIAL_REG_STALL)
17550 if (mode == SImode)
17551 emit_insn (gen_movsi_insv_1 (reg, reg));
17553 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17556 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17557 NULL, 1, OPTAB_DIRECT);
17559 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17561 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17562 NULL, 1, OPTAB_DIRECT);
17563 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17564 if (mode == SImode)
17566 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17567 NULL, 1, OPTAB_DIRECT);
17568 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17573 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17574 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17575 alignment from ALIGN to DESIRED_ALIGN. */
17577 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17582 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17583 promoted_val = promote_duplicated_reg (DImode, val);
17584 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17585 promoted_val = promote_duplicated_reg (SImode, val);
17586 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17587 promoted_val = promote_duplicated_reg (HImode, val);
17589 promoted_val = val;
17591 return promoted_val;
17594 /* Expand string clear operation (bzero). Use i386 string operations when
17595 profitable. See expand_movmem comment for explanation of individual
17596 steps performed. */
17598 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17599 rtx expected_align_exp, rtx expected_size_exp)
17604 rtx jump_around_label = NULL;
17605 HOST_WIDE_INT align = 1;
17606 unsigned HOST_WIDE_INT count = 0;
17607 HOST_WIDE_INT expected_size = -1;
17608 int size_needed = 0, epilogue_size_needed;
17609 int desired_align = 0;
17610 enum stringop_alg alg;
17611 rtx promoted_val = NULL;
17612 bool force_loopy_epilogue = false;
17614 bool need_zero_guard = false;
17616 if (CONST_INT_P (align_exp))
17617 align = INTVAL (align_exp);
17618 /* i386 can do misaligned access on reasonably increased cost. */
17619 if (CONST_INT_P (expected_align_exp)
17620 && INTVAL (expected_align_exp) > align)
17621 align = INTVAL (expected_align_exp);
17622 if (CONST_INT_P (count_exp))
17623 count = expected_size = INTVAL (count_exp);
17624 if (CONST_INT_P (expected_size_exp) && count == 0)
17625 expected_size = INTVAL (expected_size_exp);
17627 /* Make sure we don't need to care about overflow later on. */
17628 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17631 /* Step 0: Decide on preferred algorithm, desired alignment and
17632 size of chunks to be copied by main loop. */
17634 alg = decide_alg (count, expected_size, true, &dynamic_check);
17635 desired_align = decide_alignment (align, alg, expected_size);
17637 if (!TARGET_ALIGN_STRINGOPS)
17638 align = desired_align;
17640 if (alg == libcall)
17642 gcc_assert (alg != no_stringop);
17644 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
17645 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17650 gcc_unreachable ();
17652 need_zero_guard = true;
17653 size_needed = GET_MODE_SIZE (Pmode);
17655 case unrolled_loop:
17656 need_zero_guard = true;
17657 size_needed = GET_MODE_SIZE (Pmode) * 4;
17659 case rep_prefix_8_byte:
17662 case rep_prefix_4_byte:
17665 case rep_prefix_1_byte:
17669 need_zero_guard = true;
17673 epilogue_size_needed = size_needed;
17675 /* Step 1: Prologue guard. */
17677 /* Alignment code needs count to be in register. */
17678 if (CONST_INT_P (count_exp) && desired_align > align)
17680 enum machine_mode mode = SImode;
17681 if (TARGET_64BIT && (count & ~0xffffffff))
17683 count_exp = force_reg (mode, count_exp);
17685 /* Do the cheap promotion to allow better CSE across the
17686 main loop and epilogue (ie one load of the big constant in the
17687 front of all code. */
17688 if (CONST_INT_P (val_exp))
17689 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17690 desired_align, align);
17691 /* Ensure that alignment prologue won't copy past end of block. */
17692 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17694 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17695 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17696 Make sure it is power of 2. */
17697 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17699 /* To improve performance of small blocks, we jump around the VAL
17700 promoting mode. This mean that if the promoted VAL is not constant,
17701 we might not use it in the epilogue and have to use byte
17703 if (epilogue_size_needed > 2 && !promoted_val)
17704 force_loopy_epilogue = true;
17705 label = gen_label_rtx ();
17706 emit_cmp_and_jump_insns (count_exp,
17707 GEN_INT (epilogue_size_needed),
17708 LTU, 0, counter_mode (count_exp), 1, label);
17709 if (GET_CODE (count_exp) == CONST_INT)
17711 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17712 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17714 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17716 if (dynamic_check != -1)
17718 rtx hot_label = gen_label_rtx ();
17719 jump_around_label = gen_label_rtx ();
17720 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17721 LEU, 0, counter_mode (count_exp), 1, hot_label);
17722 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17723 set_storage_via_libcall (dst, count_exp, val_exp, false);
17724 emit_jump (jump_around_label);
17725 emit_label (hot_label);
17728 /* Step 2: Alignment prologue. */
17730 /* Do the expensive promotion once we branched off the small blocks. */
17732 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17733 desired_align, align);
17734 gcc_assert (desired_align >= 1 && align >= 1);
17736 if (desired_align > align)
17738 /* Except for the first move in epilogue, we no longer know
17739 constant offset in aliasing info. It don't seems to worth
17740 the pain to maintain it for the first move, so throw away
17742 dst = change_address (dst, BLKmode, destreg);
17743 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17745 if (need_zero_guard && !count)
17747 /* It is possible that we copied enough so the main loop will not
17749 emit_cmp_and_jump_insns (count_exp,
17750 GEN_INT (size_needed),
17751 LTU, 0, counter_mode (count_exp), 1, label);
17752 if (expected_size == -1
17753 || expected_size < (desired_align - align) / 2 + size_needed)
17754 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17756 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17759 if (label && size_needed == 1)
17761 emit_label (label);
17762 LABEL_NUSES (label) = 1;
17766 /* Step 3: Main loop. */
17772 gcc_unreachable ();
17774 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17775 count_exp, QImode, 1, expected_size);
17778 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17779 count_exp, Pmode, 1, expected_size);
17781 case unrolled_loop:
17782 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17783 count_exp, Pmode, 4, expected_size);
17785 case rep_prefix_8_byte:
17786 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17789 case rep_prefix_4_byte:
17790 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17793 case rep_prefix_1_byte:
17794 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17798 /* Adjust properly the offset of src and dest memory for aliasing. */
17799 if (CONST_INT_P (count_exp))
17800 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17801 (count / size_needed) * size_needed);
17803 dst = change_address (dst, BLKmode, destreg);
17805 /* Step 4: Epilogue to copy the remaining bytes. */
17809 /* When the main loop is done, COUNT_EXP might hold original count,
17810 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17811 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17812 bytes. Compensate if needed. */
17814 if (size_needed < desired_align - align)
17817 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17818 GEN_INT (size_needed - 1), count_exp, 1,
17820 size_needed = desired_align - align + 1;
17821 if (tmp != count_exp)
17822 emit_move_insn (count_exp, tmp);
17824 emit_label (label);
17825 LABEL_NUSES (label) = 1;
17827 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17829 if (force_loopy_epilogue)
17830 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17833 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17836 if (jump_around_label)
17837 emit_label (jump_around_label);
17841 /* Expand the appropriate insns for doing strlen if not just doing
17844 out = result, initialized with the start address
17845 align_rtx = alignment of the address.
17846 scratch = scratch register, initialized with the startaddress when
17847 not aligned, otherwise undefined
17849 This is just the body. It needs the initializations mentioned above and
17850 some address computing at the end. These things are done in i386.md. */
17853 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17857 rtx align_2_label = NULL_RTX;
17858 rtx align_3_label = NULL_RTX;
17859 rtx align_4_label = gen_label_rtx ();
17860 rtx end_0_label = gen_label_rtx ();
17862 rtx tmpreg = gen_reg_rtx (SImode);
17863 rtx scratch = gen_reg_rtx (SImode);
17867 if (CONST_INT_P (align_rtx))
17868 align = INTVAL (align_rtx);
17870 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17872 /* Is there a known alignment and is it less than 4? */
17875 rtx scratch1 = gen_reg_rtx (Pmode);
17876 emit_move_insn (scratch1, out);
17877 /* Is there a known alignment and is it not 2? */
17880 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17881 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17883 /* Leave just the 3 lower bits. */
17884 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17885 NULL_RTX, 0, OPTAB_WIDEN);
17887 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17888 Pmode, 1, align_4_label);
17889 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17890 Pmode, 1, align_2_label);
17891 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17892 Pmode, 1, align_3_label);
17896 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17897 check if is aligned to 4 - byte. */
17899 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17900 NULL_RTX, 0, OPTAB_WIDEN);
17902 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17903 Pmode, 1, align_4_label);
17906 mem = change_address (src, QImode, out);
17908 /* Now compare the bytes. */
17910 /* Compare the first n unaligned byte on a byte per byte basis. */
17911 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17912 QImode, 1, end_0_label);
17914 /* Increment the address. */
17915 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17917 /* Not needed with an alignment of 2 */
17920 emit_label (align_2_label);
17922 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17925 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17927 emit_label (align_3_label);
17930 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17933 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17936 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17937 align this loop. It gives only huge programs, but does not help to
17939 emit_label (align_4_label);
17941 mem = change_address (src, SImode, out);
17942 emit_move_insn (scratch, mem);
17943 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17945 /* This formula yields a nonzero result iff one of the bytes is zero.
17946 This saves three branches inside loop and many cycles. */
17948 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17949 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17950 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17951 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17952 gen_int_mode (0x80808080, SImode)));
17953 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17958 rtx reg = gen_reg_rtx (SImode);
17959 rtx reg2 = gen_reg_rtx (Pmode);
17960 emit_move_insn (reg, tmpreg);
17961 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17963 /* If zero is not in the first two bytes, move two bytes forward. */
17964 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17965 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17966 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17967 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17968 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17971 /* Emit lea manually to avoid clobbering of flags. */
17972 emit_insn (gen_rtx_SET (SImode, reg2,
17973 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17975 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17976 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17977 emit_insn (gen_rtx_SET (VOIDmode, out,
17978 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17985 rtx end_2_label = gen_label_rtx ();
17986 /* Is zero in the first two bytes? */
17988 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17989 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17990 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17991 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17992 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17994 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17995 JUMP_LABEL (tmp) = end_2_label;
17997 /* Not in the first two. Move two bytes forward. */
17998 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17999 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18001 emit_label (end_2_label);
18005 /* Avoid branch in fixing the byte. */
18006 tmpreg = gen_lowpart (QImode, tmpreg);
18007 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18008 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18009 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18011 emit_label (end_0_label);
18014 /* Expand strlen. */
18017 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18019 rtx addr, scratch1, scratch2, scratch3, scratch4;
18021 /* The generic case of strlen expander is long. Avoid it's
18022 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18024 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18025 && !TARGET_INLINE_ALL_STRINGOPS
18026 && !optimize_insn_for_size_p ()
18027 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18030 addr = force_reg (Pmode, XEXP (src, 0));
18031 scratch1 = gen_reg_rtx (Pmode);
18033 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18034 && !optimize_insn_for_size_p ())
18036 /* Well it seems that some optimizer does not combine a call like
18037 foo(strlen(bar), strlen(bar));
18038 when the move and the subtraction is done here. It does calculate
18039 the length just once when these instructions are done inside of
18040 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18041 often used and I use one fewer register for the lifetime of
18042 output_strlen_unroll() this is better. */
18044 emit_move_insn (out, addr);
18046 ix86_expand_strlensi_unroll_1 (out, src, align);
18048 /* strlensi_unroll_1 returns the address of the zero at the end of
18049 the string, like memchr(), so compute the length by subtracting
18050 the start address. */
18051 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18057 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18058 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18061 scratch2 = gen_reg_rtx (Pmode);
18062 scratch3 = gen_reg_rtx (Pmode);
18063 scratch4 = force_reg (Pmode, constm1_rtx);
18065 emit_move_insn (scratch3, addr);
18066 eoschar = force_reg (QImode, eoschar);
18068 src = replace_equiv_address_nv (src, scratch3);
18070 /* If .md starts supporting :P, this can be done in .md. */
18071 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18072 scratch4), UNSPEC_SCAS);
18073 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18074 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18075 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18080 /* For given symbol (function) construct code to compute address of it's PLT
18081 entry in large x86-64 PIC model. */
18083 construct_plt_address (rtx symbol)
18085 rtx tmp = gen_reg_rtx (Pmode);
18086 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18088 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18089 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18091 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18092 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18097 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18098 rtx callarg2 ATTRIBUTE_UNUSED,
18099 rtx pop, int sibcall)
18101 rtx use = NULL, call;
18103 if (pop == const0_rtx)
18105 gcc_assert (!TARGET_64BIT || !pop);
18107 if (TARGET_MACHO && !TARGET_64BIT)
18110 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18111 fnaddr = machopic_indirect_call_target (fnaddr);
18116 /* Static functions and indirect calls don't need the pic register. */
18117 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18118 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18119 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18120 use_reg (&use, pic_offset_table_rtx);
18123 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18125 rtx al = gen_rtx_REG (QImode, AX_REG);
18126 emit_move_insn (al, callarg2);
18127 use_reg (&use, al);
18130 if (ix86_cmodel == CM_LARGE_PIC
18131 && GET_CODE (fnaddr) == MEM
18132 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18133 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18134 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18135 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18137 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18138 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18140 if (sibcall && TARGET_64BIT
18141 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18144 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18145 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18146 emit_move_insn (fnaddr, addr);
18147 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18150 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18152 call = gen_rtx_SET (VOIDmode, retval, call);
18155 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18156 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18157 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18160 call = emit_call_insn (call);
18162 CALL_INSN_FUNCTION_USAGE (call) = use;
18166 /* Clear stack slot assignments remembered from previous functions.
18167 This is called from INIT_EXPANDERS once before RTL is emitted for each
18170 static struct machine_function *
18171 ix86_init_machine_status (void)
18173 struct machine_function *f;
18175 f = GGC_CNEW (struct machine_function);
18176 f->use_fast_prologue_epilogue_nregs = -1;
18177 f->tls_descriptor_call_expanded_p = 0;
18178 f->call_abi = DEFAULT_ABI;
18183 /* Return a MEM corresponding to a stack slot with mode MODE.
18184 Allocate a new slot if necessary.
18186 The RTL for a function can have several slots available: N is
18187 which slot to use. */
18190 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18192 struct stack_local_entry *s;
18194 gcc_assert (n < MAX_386_STACK_LOCALS);
18196 /* Virtual slot is valid only before vregs are instantiated. */
18197 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18199 for (s = ix86_stack_locals; s; s = s->next)
18200 if (s->mode == mode && s->n == n)
18201 return copy_rtx (s->rtl);
18203 s = (struct stack_local_entry *)
18204 ggc_alloc (sizeof (struct stack_local_entry));
18207 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18209 s->next = ix86_stack_locals;
18210 ix86_stack_locals = s;
18214 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18216 static GTY(()) rtx ix86_tls_symbol;
18218 ix86_tls_get_addr (void)
18221 if (!ix86_tls_symbol)
18223 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18224 (TARGET_ANY_GNU_TLS
18226 ? "___tls_get_addr"
18227 : "__tls_get_addr");
18230 return ix86_tls_symbol;
18233 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18235 static GTY(()) rtx ix86_tls_module_base_symbol;
18237 ix86_tls_module_base (void)
18240 if (!ix86_tls_module_base_symbol)
18242 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18243 "_TLS_MODULE_BASE_");
18244 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18245 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18248 return ix86_tls_module_base_symbol;
18251 /* Calculate the length of the memory address in the instruction
18252 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18255 memory_address_length (rtx addr)
18257 struct ix86_address parts;
18258 rtx base, index, disp;
18262 if (GET_CODE (addr) == PRE_DEC
18263 || GET_CODE (addr) == POST_INC
18264 || GET_CODE (addr) == PRE_MODIFY
18265 || GET_CODE (addr) == POST_MODIFY)
18268 ok = ix86_decompose_address (addr, &parts);
18271 if (parts.base && GET_CODE (parts.base) == SUBREG)
18272 parts.base = SUBREG_REG (parts.base);
18273 if (parts.index && GET_CODE (parts.index) == SUBREG)
18274 parts.index = SUBREG_REG (parts.index);
18277 index = parts.index;
18282 - esp as the base always wants an index,
18283 - ebp as the base always wants a displacement. */
18285 /* Register Indirect. */
18286 if (base && !index && !disp)
18288 /* esp (for its index) and ebp (for its displacement) need
18289 the two-byte modrm form. */
18290 if (addr == stack_pointer_rtx
18291 || addr == arg_pointer_rtx
18292 || addr == frame_pointer_rtx
18293 || addr == hard_frame_pointer_rtx)
18297 /* Direct Addressing. */
18298 else if (disp && !base && !index)
18303 /* Find the length of the displacement constant. */
18306 if (base && satisfies_constraint_K (disp))
18311 /* ebp always wants a displacement. */
18312 else if (base == hard_frame_pointer_rtx)
18315 /* An index requires the two-byte modrm form.... */
18317 /* ...like esp, which always wants an index. */
18318 || base == stack_pointer_rtx
18319 || base == arg_pointer_rtx
18320 || base == frame_pointer_rtx)
18327 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18328 is set, expect that insn have 8bit immediate alternative. */
18330 ix86_attr_length_immediate_default (rtx insn, int shortform)
18334 extract_insn_cached (insn);
18335 for (i = recog_data.n_operands - 1; i >= 0; --i)
18336 if (CONSTANT_P (recog_data.operand[i]))
18339 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18343 switch (get_attr_mode (insn))
18354 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18359 fatal_insn ("unknown insn mode", insn);
18365 /* Compute default value for "length_address" attribute. */
18367 ix86_attr_length_address_default (rtx insn)
18371 if (get_attr_type (insn) == TYPE_LEA)
18373 rtx set = PATTERN (insn);
18375 if (GET_CODE (set) == PARALLEL)
18376 set = XVECEXP (set, 0, 0);
18378 gcc_assert (GET_CODE (set) == SET);
18380 return memory_address_length (SET_SRC (set));
18383 extract_insn_cached (insn);
18384 for (i = recog_data.n_operands - 1; i >= 0; --i)
18385 if (MEM_P (recog_data.operand[i]))
18387 return memory_address_length (XEXP (recog_data.operand[i], 0));
18393 /* Compute default value for "length_vex" attribute. It includes
18394 2 or 3 byte VEX prefix and 1 opcode byte. */
18397 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18402 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18403 byte VEX prefix. */
18404 if (!has_0f_opcode || has_vex_w)
18407 /* We can always use 2 byte VEX prefix in 32bit. */
18411 extract_insn_cached (insn);
18413 for (i = recog_data.n_operands - 1; i >= 0; --i)
18414 if (REG_P (recog_data.operand[i]))
18416 /* REX.W bit uses 3 byte VEX prefix. */
18417 if (GET_MODE (recog_data.operand[i]) == DImode)
18422 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18423 if (MEM_P (recog_data.operand[i])
18424 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18431 /* Return the maximum number of instructions a cpu can issue. */
18434 ix86_issue_rate (void)
18438 case PROCESSOR_PENTIUM:
18442 case PROCESSOR_PENTIUMPRO:
18443 case PROCESSOR_PENTIUM4:
18444 case PROCESSOR_ATHLON:
18446 case PROCESSOR_AMDFAM10:
18447 case PROCESSOR_NOCONA:
18448 case PROCESSOR_GENERIC32:
18449 case PROCESSOR_GENERIC64:
18452 case PROCESSOR_CORE2:
18460 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18461 by DEP_INSN and nothing set by DEP_INSN. */
18464 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18468 /* Simplify the test for uninteresting insns. */
18469 if (insn_type != TYPE_SETCC
18470 && insn_type != TYPE_ICMOV
18471 && insn_type != TYPE_FCMOV
18472 && insn_type != TYPE_IBR)
18475 if ((set = single_set (dep_insn)) != 0)
18477 set = SET_DEST (set);
18480 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18481 && XVECLEN (PATTERN (dep_insn), 0) == 2
18482 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18483 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18485 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18486 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18491 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18494 /* This test is true if the dependent insn reads the flags but
18495 not any other potentially set register. */
18496 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18499 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18505 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18506 address with operands set by DEP_INSN. */
18509 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18513 if (insn_type == TYPE_LEA
18516 addr = PATTERN (insn);
18518 if (GET_CODE (addr) == PARALLEL)
18519 addr = XVECEXP (addr, 0, 0);
18521 gcc_assert (GET_CODE (addr) == SET);
18523 addr = SET_SRC (addr);
18528 extract_insn_cached (insn);
18529 for (i = recog_data.n_operands - 1; i >= 0; --i)
18530 if (MEM_P (recog_data.operand[i]))
18532 addr = XEXP (recog_data.operand[i], 0);
18539 return modified_in_p (addr, dep_insn);
18543 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18545 enum attr_type insn_type, dep_insn_type;
18546 enum attr_memory memory;
18548 int dep_insn_code_number;
18550 /* Anti and output dependencies have zero cost on all CPUs. */
18551 if (REG_NOTE_KIND (link) != 0)
18554 dep_insn_code_number = recog_memoized (dep_insn);
18556 /* If we can't recognize the insns, we can't really do anything. */
18557 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18560 insn_type = get_attr_type (insn);
18561 dep_insn_type = get_attr_type (dep_insn);
18565 case PROCESSOR_PENTIUM:
18566 /* Address Generation Interlock adds a cycle of latency. */
18567 if (ix86_agi_dependent (insn, dep_insn, insn_type))
18570 /* ??? Compares pair with jump/setcc. */
18571 if (ix86_flags_dependent (insn, dep_insn, insn_type))
18574 /* Floating point stores require value to be ready one cycle earlier. */
18575 if (insn_type == TYPE_FMOV
18576 && get_attr_memory (insn) == MEMORY_STORE
18577 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18581 case PROCESSOR_PENTIUMPRO:
18582 memory = get_attr_memory (insn);
18584 /* INT->FP conversion is expensive. */
18585 if (get_attr_fp_int_src (dep_insn))
18588 /* There is one cycle extra latency between an FP op and a store. */
18589 if (insn_type == TYPE_FMOV
18590 && (set = single_set (dep_insn)) != NULL_RTX
18591 && (set2 = single_set (insn)) != NULL_RTX
18592 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
18593 && MEM_P (SET_DEST (set2)))
18596 /* Show ability of reorder buffer to hide latency of load by executing
18597 in parallel with previous instruction in case
18598 previous instruction is not needed to compute the address. */
18599 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18600 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18602 /* Claim moves to take one cycle, as core can issue one load
18603 at time and the next load can start cycle later. */
18604 if (dep_insn_type == TYPE_IMOV
18605 || dep_insn_type == TYPE_FMOV)
18613 memory = get_attr_memory (insn);
18615 /* The esp dependency is resolved before the instruction is really
18617 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
18618 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
18621 /* INT->FP conversion is expensive. */
18622 if (get_attr_fp_int_src (dep_insn))
18625 /* Show ability of reorder buffer to hide latency of load by executing
18626 in parallel with previous instruction in case
18627 previous instruction is not needed to compute the address. */
18628 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18629 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18631 /* Claim moves to take one cycle, as core can issue one load
18632 at time and the next load can start cycle later. */
18633 if (dep_insn_type == TYPE_IMOV
18634 || dep_insn_type == TYPE_FMOV)
18643 case PROCESSOR_ATHLON:
18645 case PROCESSOR_AMDFAM10:
18646 case PROCESSOR_GENERIC32:
18647 case PROCESSOR_GENERIC64:
18648 memory = get_attr_memory (insn);
18650 /* Show ability of reorder buffer to hide latency of load by executing
18651 in parallel with previous instruction in case
18652 previous instruction is not needed to compute the address. */
18653 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18654 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18656 enum attr_unit unit = get_attr_unit (insn);
18659 /* Because of the difference between the length of integer and
18660 floating unit pipeline preparation stages, the memory operands
18661 for floating point are cheaper.
18663 ??? For Athlon it the difference is most probably 2. */
18664 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
18667 loadcost = TARGET_ATHLON ? 2 : 0;
18669 if (cost >= loadcost)
18682 /* How many alternative schedules to try. This should be as wide as the
18683 scheduling freedom in the DFA, but no wider. Making this value too
18684 large results extra work for the scheduler. */
18687 ia32_multipass_dfa_lookahead (void)
18691 case PROCESSOR_PENTIUM:
18694 case PROCESSOR_PENTIUMPRO:
18704 /* Compute the alignment given to a constant that is being placed in memory.
18705 EXP is the constant and ALIGN is the alignment that the object would
18707 The value of this function is used instead of that alignment to align
18711 ix86_constant_alignment (tree exp, int align)
18713 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18714 || TREE_CODE (exp) == INTEGER_CST)
18716 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18718 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18721 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18722 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18723 return BITS_PER_WORD;
18728 /* Compute the alignment for a static variable.
18729 TYPE is the data type, and ALIGN is the alignment that
18730 the object would ordinarily have. The value of this function is used
18731 instead of that alignment to align the object. */
18734 ix86_data_alignment (tree type, int align)
18736 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18738 if (AGGREGATE_TYPE_P (type)
18739 && TYPE_SIZE (type)
18740 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18741 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18742 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18743 && align < max_align)
18746 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18747 to 16byte boundary. */
18750 if (AGGREGATE_TYPE_P (type)
18751 && TYPE_SIZE (type)
18752 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18753 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18754 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18758 if (TREE_CODE (type) == ARRAY_TYPE)
18760 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18762 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18765 else if (TREE_CODE (type) == COMPLEX_TYPE)
18768 if (TYPE_MODE (type) == DCmode && align < 64)
18770 if ((TYPE_MODE (type) == XCmode
18771 || TYPE_MODE (type) == TCmode) && align < 128)
18774 else if ((TREE_CODE (type) == RECORD_TYPE
18775 || TREE_CODE (type) == UNION_TYPE
18776 || TREE_CODE (type) == QUAL_UNION_TYPE)
18777 && TYPE_FIELDS (type))
18779 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18781 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18784 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18785 || TREE_CODE (type) == INTEGER_TYPE)
18787 if (TYPE_MODE (type) == DFmode && align < 64)
18789 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18796 /* Compute the alignment for a local variable or a stack slot. TYPE is
18797 the data type, MODE is the widest mode available and ALIGN is the
18798 alignment that the object would ordinarily have. The value of this
18799 macro is used instead of that alignment to align the object. */
18802 ix86_local_alignment (tree type, enum machine_mode mode,
18803 unsigned int align)
18805 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18806 register in MODE. We will return the largest alignment of XF
18810 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18811 align = GET_MODE_ALIGNMENT (DFmode);
18815 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18816 to 16byte boundary. */
18819 if (AGGREGATE_TYPE_P (type)
18820 && TYPE_SIZE (type)
18821 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18822 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18823 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18826 if (TREE_CODE (type) == ARRAY_TYPE)
18828 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18830 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18833 else if (TREE_CODE (type) == COMPLEX_TYPE)
18835 if (TYPE_MODE (type) == DCmode && align < 64)
18837 if ((TYPE_MODE (type) == XCmode
18838 || TYPE_MODE (type) == TCmode) && align < 128)
18841 else if ((TREE_CODE (type) == RECORD_TYPE
18842 || TREE_CODE (type) == UNION_TYPE
18843 || TREE_CODE (type) == QUAL_UNION_TYPE)
18844 && TYPE_FIELDS (type))
18846 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18848 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18851 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18852 || TREE_CODE (type) == INTEGER_TYPE)
18855 if (TYPE_MODE (type) == DFmode && align < 64)
18857 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18863 /* Emit RTL insns to initialize the variable parts of a trampoline.
18864 FNADDR is an RTX for the address of the function's pure code.
18865 CXT is an RTX for the static chain value for the function. */
18867 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18871 /* Compute offset from the end of the jmp to the target function. */
18872 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18873 plus_constant (tramp, 10),
18874 NULL_RTX, 1, OPTAB_DIRECT);
18875 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18876 gen_int_mode (0xb9, QImode));
18877 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18878 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18879 gen_int_mode (0xe9, QImode));
18880 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18885 /* Try to load address using shorter movl instead of movabs.
18886 We may want to support movq for kernel mode, but kernel does not use
18887 trampolines at the moment. */
18888 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18890 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18891 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18892 gen_int_mode (0xbb41, HImode));
18893 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18894 gen_lowpart (SImode, fnaddr));
18899 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18900 gen_int_mode (0xbb49, HImode));
18901 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18905 /* Load static chain using movabs to r10. */
18906 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18907 gen_int_mode (0xba49, HImode));
18908 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18911 /* Jump to the r11 */
18912 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18913 gen_int_mode (0xff49, HImode));
18914 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18915 gen_int_mode (0xe3, QImode));
18917 gcc_assert (offset <= TRAMPOLINE_SIZE);
18920 #ifdef ENABLE_EXECUTE_STACK
18921 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18922 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18926 /* Codes for all the SSE/MMX builtins. */
18929 IX86_BUILTIN_ADDPS,
18930 IX86_BUILTIN_ADDSS,
18931 IX86_BUILTIN_DIVPS,
18932 IX86_BUILTIN_DIVSS,
18933 IX86_BUILTIN_MULPS,
18934 IX86_BUILTIN_MULSS,
18935 IX86_BUILTIN_SUBPS,
18936 IX86_BUILTIN_SUBSS,
18938 IX86_BUILTIN_CMPEQPS,
18939 IX86_BUILTIN_CMPLTPS,
18940 IX86_BUILTIN_CMPLEPS,
18941 IX86_BUILTIN_CMPGTPS,
18942 IX86_BUILTIN_CMPGEPS,
18943 IX86_BUILTIN_CMPNEQPS,
18944 IX86_BUILTIN_CMPNLTPS,
18945 IX86_BUILTIN_CMPNLEPS,
18946 IX86_BUILTIN_CMPNGTPS,
18947 IX86_BUILTIN_CMPNGEPS,
18948 IX86_BUILTIN_CMPORDPS,
18949 IX86_BUILTIN_CMPUNORDPS,
18950 IX86_BUILTIN_CMPEQSS,
18951 IX86_BUILTIN_CMPLTSS,
18952 IX86_BUILTIN_CMPLESS,
18953 IX86_BUILTIN_CMPNEQSS,
18954 IX86_BUILTIN_CMPNLTSS,
18955 IX86_BUILTIN_CMPNLESS,
18956 IX86_BUILTIN_CMPNGTSS,
18957 IX86_BUILTIN_CMPNGESS,
18958 IX86_BUILTIN_CMPORDSS,
18959 IX86_BUILTIN_CMPUNORDSS,
18961 IX86_BUILTIN_COMIEQSS,
18962 IX86_BUILTIN_COMILTSS,
18963 IX86_BUILTIN_COMILESS,
18964 IX86_BUILTIN_COMIGTSS,
18965 IX86_BUILTIN_COMIGESS,
18966 IX86_BUILTIN_COMINEQSS,
18967 IX86_BUILTIN_UCOMIEQSS,
18968 IX86_BUILTIN_UCOMILTSS,
18969 IX86_BUILTIN_UCOMILESS,
18970 IX86_BUILTIN_UCOMIGTSS,
18971 IX86_BUILTIN_UCOMIGESS,
18972 IX86_BUILTIN_UCOMINEQSS,
18974 IX86_BUILTIN_CVTPI2PS,
18975 IX86_BUILTIN_CVTPS2PI,
18976 IX86_BUILTIN_CVTSI2SS,
18977 IX86_BUILTIN_CVTSI642SS,
18978 IX86_BUILTIN_CVTSS2SI,
18979 IX86_BUILTIN_CVTSS2SI64,
18980 IX86_BUILTIN_CVTTPS2PI,
18981 IX86_BUILTIN_CVTTSS2SI,
18982 IX86_BUILTIN_CVTTSS2SI64,
18984 IX86_BUILTIN_MAXPS,
18985 IX86_BUILTIN_MAXSS,
18986 IX86_BUILTIN_MINPS,
18987 IX86_BUILTIN_MINSS,
18989 IX86_BUILTIN_LOADUPS,
18990 IX86_BUILTIN_STOREUPS,
18991 IX86_BUILTIN_MOVSS,
18993 IX86_BUILTIN_MOVHLPS,
18994 IX86_BUILTIN_MOVLHPS,
18995 IX86_BUILTIN_LOADHPS,
18996 IX86_BUILTIN_LOADLPS,
18997 IX86_BUILTIN_STOREHPS,
18998 IX86_BUILTIN_STORELPS,
19000 IX86_BUILTIN_MASKMOVQ,
19001 IX86_BUILTIN_MOVMSKPS,
19002 IX86_BUILTIN_PMOVMSKB,
19004 IX86_BUILTIN_MOVNTPS,
19005 IX86_BUILTIN_MOVNTQ,
19007 IX86_BUILTIN_LOADDQU,
19008 IX86_BUILTIN_STOREDQU,
19010 IX86_BUILTIN_PACKSSWB,
19011 IX86_BUILTIN_PACKSSDW,
19012 IX86_BUILTIN_PACKUSWB,
19014 IX86_BUILTIN_PADDB,
19015 IX86_BUILTIN_PADDW,
19016 IX86_BUILTIN_PADDD,
19017 IX86_BUILTIN_PADDQ,
19018 IX86_BUILTIN_PADDSB,
19019 IX86_BUILTIN_PADDSW,
19020 IX86_BUILTIN_PADDUSB,
19021 IX86_BUILTIN_PADDUSW,
19022 IX86_BUILTIN_PSUBB,
19023 IX86_BUILTIN_PSUBW,
19024 IX86_BUILTIN_PSUBD,
19025 IX86_BUILTIN_PSUBQ,
19026 IX86_BUILTIN_PSUBSB,
19027 IX86_BUILTIN_PSUBSW,
19028 IX86_BUILTIN_PSUBUSB,
19029 IX86_BUILTIN_PSUBUSW,
19032 IX86_BUILTIN_PANDN,
19036 IX86_BUILTIN_PAVGB,
19037 IX86_BUILTIN_PAVGW,
19039 IX86_BUILTIN_PCMPEQB,
19040 IX86_BUILTIN_PCMPEQW,
19041 IX86_BUILTIN_PCMPEQD,
19042 IX86_BUILTIN_PCMPGTB,
19043 IX86_BUILTIN_PCMPGTW,
19044 IX86_BUILTIN_PCMPGTD,
19046 IX86_BUILTIN_PMADDWD,
19048 IX86_BUILTIN_PMAXSW,
19049 IX86_BUILTIN_PMAXUB,
19050 IX86_BUILTIN_PMINSW,
19051 IX86_BUILTIN_PMINUB,
19053 IX86_BUILTIN_PMULHUW,
19054 IX86_BUILTIN_PMULHW,
19055 IX86_BUILTIN_PMULLW,
19057 IX86_BUILTIN_PSADBW,
19058 IX86_BUILTIN_PSHUFW,
19060 IX86_BUILTIN_PSLLW,
19061 IX86_BUILTIN_PSLLD,
19062 IX86_BUILTIN_PSLLQ,
19063 IX86_BUILTIN_PSRAW,
19064 IX86_BUILTIN_PSRAD,
19065 IX86_BUILTIN_PSRLW,
19066 IX86_BUILTIN_PSRLD,
19067 IX86_BUILTIN_PSRLQ,
19068 IX86_BUILTIN_PSLLWI,
19069 IX86_BUILTIN_PSLLDI,
19070 IX86_BUILTIN_PSLLQI,
19071 IX86_BUILTIN_PSRAWI,
19072 IX86_BUILTIN_PSRADI,
19073 IX86_BUILTIN_PSRLWI,
19074 IX86_BUILTIN_PSRLDI,
19075 IX86_BUILTIN_PSRLQI,
19077 IX86_BUILTIN_PUNPCKHBW,
19078 IX86_BUILTIN_PUNPCKHWD,
19079 IX86_BUILTIN_PUNPCKHDQ,
19080 IX86_BUILTIN_PUNPCKLBW,
19081 IX86_BUILTIN_PUNPCKLWD,
19082 IX86_BUILTIN_PUNPCKLDQ,
19084 IX86_BUILTIN_SHUFPS,
19086 IX86_BUILTIN_RCPPS,
19087 IX86_BUILTIN_RCPSS,
19088 IX86_BUILTIN_RSQRTPS,
19089 IX86_BUILTIN_RSQRTPS_NR,
19090 IX86_BUILTIN_RSQRTSS,
19091 IX86_BUILTIN_RSQRTF,
19092 IX86_BUILTIN_SQRTPS,
19093 IX86_BUILTIN_SQRTPS_NR,
19094 IX86_BUILTIN_SQRTSS,
19096 IX86_BUILTIN_UNPCKHPS,
19097 IX86_BUILTIN_UNPCKLPS,
19099 IX86_BUILTIN_ANDPS,
19100 IX86_BUILTIN_ANDNPS,
19102 IX86_BUILTIN_XORPS,
19105 IX86_BUILTIN_LDMXCSR,
19106 IX86_BUILTIN_STMXCSR,
19107 IX86_BUILTIN_SFENCE,
19109 /* 3DNow! Original */
19110 IX86_BUILTIN_FEMMS,
19111 IX86_BUILTIN_PAVGUSB,
19112 IX86_BUILTIN_PF2ID,
19113 IX86_BUILTIN_PFACC,
19114 IX86_BUILTIN_PFADD,
19115 IX86_BUILTIN_PFCMPEQ,
19116 IX86_BUILTIN_PFCMPGE,
19117 IX86_BUILTIN_PFCMPGT,
19118 IX86_BUILTIN_PFMAX,
19119 IX86_BUILTIN_PFMIN,
19120 IX86_BUILTIN_PFMUL,
19121 IX86_BUILTIN_PFRCP,
19122 IX86_BUILTIN_PFRCPIT1,
19123 IX86_BUILTIN_PFRCPIT2,
19124 IX86_BUILTIN_PFRSQIT1,
19125 IX86_BUILTIN_PFRSQRT,
19126 IX86_BUILTIN_PFSUB,
19127 IX86_BUILTIN_PFSUBR,
19128 IX86_BUILTIN_PI2FD,
19129 IX86_BUILTIN_PMULHRW,
19131 /* 3DNow! Athlon Extensions */
19132 IX86_BUILTIN_PF2IW,
19133 IX86_BUILTIN_PFNACC,
19134 IX86_BUILTIN_PFPNACC,
19135 IX86_BUILTIN_PI2FW,
19136 IX86_BUILTIN_PSWAPDSI,
19137 IX86_BUILTIN_PSWAPDSF,
19140 IX86_BUILTIN_ADDPD,
19141 IX86_BUILTIN_ADDSD,
19142 IX86_BUILTIN_DIVPD,
19143 IX86_BUILTIN_DIVSD,
19144 IX86_BUILTIN_MULPD,
19145 IX86_BUILTIN_MULSD,
19146 IX86_BUILTIN_SUBPD,
19147 IX86_BUILTIN_SUBSD,
19149 IX86_BUILTIN_CMPEQPD,
19150 IX86_BUILTIN_CMPLTPD,
19151 IX86_BUILTIN_CMPLEPD,
19152 IX86_BUILTIN_CMPGTPD,
19153 IX86_BUILTIN_CMPGEPD,
19154 IX86_BUILTIN_CMPNEQPD,
19155 IX86_BUILTIN_CMPNLTPD,
19156 IX86_BUILTIN_CMPNLEPD,
19157 IX86_BUILTIN_CMPNGTPD,
19158 IX86_BUILTIN_CMPNGEPD,
19159 IX86_BUILTIN_CMPORDPD,
19160 IX86_BUILTIN_CMPUNORDPD,
19161 IX86_BUILTIN_CMPEQSD,
19162 IX86_BUILTIN_CMPLTSD,
19163 IX86_BUILTIN_CMPLESD,
19164 IX86_BUILTIN_CMPNEQSD,
19165 IX86_BUILTIN_CMPNLTSD,
19166 IX86_BUILTIN_CMPNLESD,
19167 IX86_BUILTIN_CMPORDSD,
19168 IX86_BUILTIN_CMPUNORDSD,
19170 IX86_BUILTIN_COMIEQSD,
19171 IX86_BUILTIN_COMILTSD,
19172 IX86_BUILTIN_COMILESD,
19173 IX86_BUILTIN_COMIGTSD,
19174 IX86_BUILTIN_COMIGESD,
19175 IX86_BUILTIN_COMINEQSD,
19176 IX86_BUILTIN_UCOMIEQSD,
19177 IX86_BUILTIN_UCOMILTSD,
19178 IX86_BUILTIN_UCOMILESD,
19179 IX86_BUILTIN_UCOMIGTSD,
19180 IX86_BUILTIN_UCOMIGESD,
19181 IX86_BUILTIN_UCOMINEQSD,
19183 IX86_BUILTIN_MAXPD,
19184 IX86_BUILTIN_MAXSD,
19185 IX86_BUILTIN_MINPD,
19186 IX86_BUILTIN_MINSD,
19188 IX86_BUILTIN_ANDPD,
19189 IX86_BUILTIN_ANDNPD,
19191 IX86_BUILTIN_XORPD,
19193 IX86_BUILTIN_SQRTPD,
19194 IX86_BUILTIN_SQRTSD,
19196 IX86_BUILTIN_UNPCKHPD,
19197 IX86_BUILTIN_UNPCKLPD,
19199 IX86_BUILTIN_SHUFPD,
19201 IX86_BUILTIN_LOADUPD,
19202 IX86_BUILTIN_STOREUPD,
19203 IX86_BUILTIN_MOVSD,
19205 IX86_BUILTIN_LOADHPD,
19206 IX86_BUILTIN_LOADLPD,
19208 IX86_BUILTIN_CVTDQ2PD,
19209 IX86_BUILTIN_CVTDQ2PS,
19211 IX86_BUILTIN_CVTPD2DQ,
19212 IX86_BUILTIN_CVTPD2PI,
19213 IX86_BUILTIN_CVTPD2PS,
19214 IX86_BUILTIN_CVTTPD2DQ,
19215 IX86_BUILTIN_CVTTPD2PI,
19217 IX86_BUILTIN_CVTPI2PD,
19218 IX86_BUILTIN_CVTSI2SD,
19219 IX86_BUILTIN_CVTSI642SD,
19221 IX86_BUILTIN_CVTSD2SI,
19222 IX86_BUILTIN_CVTSD2SI64,
19223 IX86_BUILTIN_CVTSD2SS,
19224 IX86_BUILTIN_CVTSS2SD,
19225 IX86_BUILTIN_CVTTSD2SI,
19226 IX86_BUILTIN_CVTTSD2SI64,
19228 IX86_BUILTIN_CVTPS2DQ,
19229 IX86_BUILTIN_CVTPS2PD,
19230 IX86_BUILTIN_CVTTPS2DQ,
19232 IX86_BUILTIN_MOVNTI,
19233 IX86_BUILTIN_MOVNTPD,
19234 IX86_BUILTIN_MOVNTDQ,
19236 IX86_BUILTIN_MOVQ128,
19239 IX86_BUILTIN_MASKMOVDQU,
19240 IX86_BUILTIN_MOVMSKPD,
19241 IX86_BUILTIN_PMOVMSKB128,
19243 IX86_BUILTIN_PACKSSWB128,
19244 IX86_BUILTIN_PACKSSDW128,
19245 IX86_BUILTIN_PACKUSWB128,
19247 IX86_BUILTIN_PADDB128,
19248 IX86_BUILTIN_PADDW128,
19249 IX86_BUILTIN_PADDD128,
19250 IX86_BUILTIN_PADDQ128,
19251 IX86_BUILTIN_PADDSB128,
19252 IX86_BUILTIN_PADDSW128,
19253 IX86_BUILTIN_PADDUSB128,
19254 IX86_BUILTIN_PADDUSW128,
19255 IX86_BUILTIN_PSUBB128,
19256 IX86_BUILTIN_PSUBW128,
19257 IX86_BUILTIN_PSUBD128,
19258 IX86_BUILTIN_PSUBQ128,
19259 IX86_BUILTIN_PSUBSB128,
19260 IX86_BUILTIN_PSUBSW128,
19261 IX86_BUILTIN_PSUBUSB128,
19262 IX86_BUILTIN_PSUBUSW128,
19264 IX86_BUILTIN_PAND128,
19265 IX86_BUILTIN_PANDN128,
19266 IX86_BUILTIN_POR128,
19267 IX86_BUILTIN_PXOR128,
19269 IX86_BUILTIN_PAVGB128,
19270 IX86_BUILTIN_PAVGW128,
19272 IX86_BUILTIN_PCMPEQB128,
19273 IX86_BUILTIN_PCMPEQW128,
19274 IX86_BUILTIN_PCMPEQD128,
19275 IX86_BUILTIN_PCMPGTB128,
19276 IX86_BUILTIN_PCMPGTW128,
19277 IX86_BUILTIN_PCMPGTD128,
19279 IX86_BUILTIN_PMADDWD128,
19281 IX86_BUILTIN_PMAXSW128,
19282 IX86_BUILTIN_PMAXUB128,
19283 IX86_BUILTIN_PMINSW128,
19284 IX86_BUILTIN_PMINUB128,
19286 IX86_BUILTIN_PMULUDQ,
19287 IX86_BUILTIN_PMULUDQ128,
19288 IX86_BUILTIN_PMULHUW128,
19289 IX86_BUILTIN_PMULHW128,
19290 IX86_BUILTIN_PMULLW128,
19292 IX86_BUILTIN_PSADBW128,
19293 IX86_BUILTIN_PSHUFHW,
19294 IX86_BUILTIN_PSHUFLW,
19295 IX86_BUILTIN_PSHUFD,
19297 IX86_BUILTIN_PSLLDQI128,
19298 IX86_BUILTIN_PSLLWI128,
19299 IX86_BUILTIN_PSLLDI128,
19300 IX86_BUILTIN_PSLLQI128,
19301 IX86_BUILTIN_PSRAWI128,
19302 IX86_BUILTIN_PSRADI128,
19303 IX86_BUILTIN_PSRLDQI128,
19304 IX86_BUILTIN_PSRLWI128,
19305 IX86_BUILTIN_PSRLDI128,
19306 IX86_BUILTIN_PSRLQI128,
19308 IX86_BUILTIN_PSLLDQ128,
19309 IX86_BUILTIN_PSLLW128,
19310 IX86_BUILTIN_PSLLD128,
19311 IX86_BUILTIN_PSLLQ128,
19312 IX86_BUILTIN_PSRAW128,
19313 IX86_BUILTIN_PSRAD128,
19314 IX86_BUILTIN_PSRLW128,
19315 IX86_BUILTIN_PSRLD128,
19316 IX86_BUILTIN_PSRLQ128,
19318 IX86_BUILTIN_PUNPCKHBW128,
19319 IX86_BUILTIN_PUNPCKHWD128,
19320 IX86_BUILTIN_PUNPCKHDQ128,
19321 IX86_BUILTIN_PUNPCKHQDQ128,
19322 IX86_BUILTIN_PUNPCKLBW128,
19323 IX86_BUILTIN_PUNPCKLWD128,
19324 IX86_BUILTIN_PUNPCKLDQ128,
19325 IX86_BUILTIN_PUNPCKLQDQ128,
19327 IX86_BUILTIN_CLFLUSH,
19328 IX86_BUILTIN_MFENCE,
19329 IX86_BUILTIN_LFENCE,
19332 IX86_BUILTIN_ADDSUBPS,
19333 IX86_BUILTIN_HADDPS,
19334 IX86_BUILTIN_HSUBPS,
19335 IX86_BUILTIN_MOVSHDUP,
19336 IX86_BUILTIN_MOVSLDUP,
19337 IX86_BUILTIN_ADDSUBPD,
19338 IX86_BUILTIN_HADDPD,
19339 IX86_BUILTIN_HSUBPD,
19340 IX86_BUILTIN_LDDQU,
19342 IX86_BUILTIN_MONITOR,
19343 IX86_BUILTIN_MWAIT,
19346 IX86_BUILTIN_PHADDW,
19347 IX86_BUILTIN_PHADDD,
19348 IX86_BUILTIN_PHADDSW,
19349 IX86_BUILTIN_PHSUBW,
19350 IX86_BUILTIN_PHSUBD,
19351 IX86_BUILTIN_PHSUBSW,
19352 IX86_BUILTIN_PMADDUBSW,
19353 IX86_BUILTIN_PMULHRSW,
19354 IX86_BUILTIN_PSHUFB,
19355 IX86_BUILTIN_PSIGNB,
19356 IX86_BUILTIN_PSIGNW,
19357 IX86_BUILTIN_PSIGND,
19358 IX86_BUILTIN_PALIGNR,
19359 IX86_BUILTIN_PABSB,
19360 IX86_BUILTIN_PABSW,
19361 IX86_BUILTIN_PABSD,
19363 IX86_BUILTIN_PHADDW128,
19364 IX86_BUILTIN_PHADDD128,
19365 IX86_BUILTIN_PHADDSW128,
19366 IX86_BUILTIN_PHSUBW128,
19367 IX86_BUILTIN_PHSUBD128,
19368 IX86_BUILTIN_PHSUBSW128,
19369 IX86_BUILTIN_PMADDUBSW128,
19370 IX86_BUILTIN_PMULHRSW128,
19371 IX86_BUILTIN_PSHUFB128,
19372 IX86_BUILTIN_PSIGNB128,
19373 IX86_BUILTIN_PSIGNW128,
19374 IX86_BUILTIN_PSIGND128,
19375 IX86_BUILTIN_PALIGNR128,
19376 IX86_BUILTIN_PABSB128,
19377 IX86_BUILTIN_PABSW128,
19378 IX86_BUILTIN_PABSD128,
19380 /* AMDFAM10 - SSE4A New Instructions. */
19381 IX86_BUILTIN_MOVNTSD,
19382 IX86_BUILTIN_MOVNTSS,
19383 IX86_BUILTIN_EXTRQI,
19384 IX86_BUILTIN_EXTRQ,
19385 IX86_BUILTIN_INSERTQI,
19386 IX86_BUILTIN_INSERTQ,
19389 IX86_BUILTIN_BLENDPD,
19390 IX86_BUILTIN_BLENDPS,
19391 IX86_BUILTIN_BLENDVPD,
19392 IX86_BUILTIN_BLENDVPS,
19393 IX86_BUILTIN_PBLENDVB128,
19394 IX86_BUILTIN_PBLENDW128,
19399 IX86_BUILTIN_INSERTPS128,
19401 IX86_BUILTIN_MOVNTDQA,
19402 IX86_BUILTIN_MPSADBW128,
19403 IX86_BUILTIN_PACKUSDW128,
19404 IX86_BUILTIN_PCMPEQQ,
19405 IX86_BUILTIN_PHMINPOSUW128,
19407 IX86_BUILTIN_PMAXSB128,
19408 IX86_BUILTIN_PMAXSD128,
19409 IX86_BUILTIN_PMAXUD128,
19410 IX86_BUILTIN_PMAXUW128,
19412 IX86_BUILTIN_PMINSB128,
19413 IX86_BUILTIN_PMINSD128,
19414 IX86_BUILTIN_PMINUD128,
19415 IX86_BUILTIN_PMINUW128,
19417 IX86_BUILTIN_PMOVSXBW128,
19418 IX86_BUILTIN_PMOVSXBD128,
19419 IX86_BUILTIN_PMOVSXBQ128,
19420 IX86_BUILTIN_PMOVSXWD128,
19421 IX86_BUILTIN_PMOVSXWQ128,
19422 IX86_BUILTIN_PMOVSXDQ128,
19424 IX86_BUILTIN_PMOVZXBW128,
19425 IX86_BUILTIN_PMOVZXBD128,
19426 IX86_BUILTIN_PMOVZXBQ128,
19427 IX86_BUILTIN_PMOVZXWD128,
19428 IX86_BUILTIN_PMOVZXWQ128,
19429 IX86_BUILTIN_PMOVZXDQ128,
19431 IX86_BUILTIN_PMULDQ128,
19432 IX86_BUILTIN_PMULLD128,
19434 IX86_BUILTIN_ROUNDPD,
19435 IX86_BUILTIN_ROUNDPS,
19436 IX86_BUILTIN_ROUNDSD,
19437 IX86_BUILTIN_ROUNDSS,
19439 IX86_BUILTIN_PTESTZ,
19440 IX86_BUILTIN_PTESTC,
19441 IX86_BUILTIN_PTESTNZC,
19443 IX86_BUILTIN_VEC_INIT_V2SI,
19444 IX86_BUILTIN_VEC_INIT_V4HI,
19445 IX86_BUILTIN_VEC_INIT_V8QI,
19446 IX86_BUILTIN_VEC_EXT_V2DF,
19447 IX86_BUILTIN_VEC_EXT_V2DI,
19448 IX86_BUILTIN_VEC_EXT_V4SF,
19449 IX86_BUILTIN_VEC_EXT_V4SI,
19450 IX86_BUILTIN_VEC_EXT_V8HI,
19451 IX86_BUILTIN_VEC_EXT_V2SI,
19452 IX86_BUILTIN_VEC_EXT_V4HI,
19453 IX86_BUILTIN_VEC_EXT_V16QI,
19454 IX86_BUILTIN_VEC_SET_V2DI,
19455 IX86_BUILTIN_VEC_SET_V4SF,
19456 IX86_BUILTIN_VEC_SET_V4SI,
19457 IX86_BUILTIN_VEC_SET_V8HI,
19458 IX86_BUILTIN_VEC_SET_V4HI,
19459 IX86_BUILTIN_VEC_SET_V16QI,
19461 IX86_BUILTIN_VEC_PACK_SFIX,
19464 IX86_BUILTIN_CRC32QI,
19465 IX86_BUILTIN_CRC32HI,
19466 IX86_BUILTIN_CRC32SI,
19467 IX86_BUILTIN_CRC32DI,
19469 IX86_BUILTIN_PCMPESTRI128,
19470 IX86_BUILTIN_PCMPESTRM128,
19471 IX86_BUILTIN_PCMPESTRA128,
19472 IX86_BUILTIN_PCMPESTRC128,
19473 IX86_BUILTIN_PCMPESTRO128,
19474 IX86_BUILTIN_PCMPESTRS128,
19475 IX86_BUILTIN_PCMPESTRZ128,
19476 IX86_BUILTIN_PCMPISTRI128,
19477 IX86_BUILTIN_PCMPISTRM128,
19478 IX86_BUILTIN_PCMPISTRA128,
19479 IX86_BUILTIN_PCMPISTRC128,
19480 IX86_BUILTIN_PCMPISTRO128,
19481 IX86_BUILTIN_PCMPISTRS128,
19482 IX86_BUILTIN_PCMPISTRZ128,
19484 IX86_BUILTIN_PCMPGTQ,
19486 /* AES instructions */
19487 IX86_BUILTIN_AESENC128,
19488 IX86_BUILTIN_AESENCLAST128,
19489 IX86_BUILTIN_AESDEC128,
19490 IX86_BUILTIN_AESDECLAST128,
19491 IX86_BUILTIN_AESIMC128,
19492 IX86_BUILTIN_AESKEYGENASSIST128,
19494 /* PCLMUL instruction */
19495 IX86_BUILTIN_PCLMULQDQ128,
19498 IX86_BUILTIN_ADDPD256,
19499 IX86_BUILTIN_ADDPS256,
19500 IX86_BUILTIN_ADDSUBPD256,
19501 IX86_BUILTIN_ADDSUBPS256,
19502 IX86_BUILTIN_ANDPD256,
19503 IX86_BUILTIN_ANDPS256,
19504 IX86_BUILTIN_ANDNPD256,
19505 IX86_BUILTIN_ANDNPS256,
19506 IX86_BUILTIN_BLENDPD256,
19507 IX86_BUILTIN_BLENDPS256,
19508 IX86_BUILTIN_BLENDVPD256,
19509 IX86_BUILTIN_BLENDVPS256,
19510 IX86_BUILTIN_DIVPD256,
19511 IX86_BUILTIN_DIVPS256,
19512 IX86_BUILTIN_DPPS256,
19513 IX86_BUILTIN_HADDPD256,
19514 IX86_BUILTIN_HADDPS256,
19515 IX86_BUILTIN_HSUBPD256,
19516 IX86_BUILTIN_HSUBPS256,
19517 IX86_BUILTIN_MAXPD256,
19518 IX86_BUILTIN_MAXPS256,
19519 IX86_BUILTIN_MINPD256,
19520 IX86_BUILTIN_MINPS256,
19521 IX86_BUILTIN_MULPD256,
19522 IX86_BUILTIN_MULPS256,
19523 IX86_BUILTIN_ORPD256,
19524 IX86_BUILTIN_ORPS256,
19525 IX86_BUILTIN_SHUFPD256,
19526 IX86_BUILTIN_SHUFPS256,
19527 IX86_BUILTIN_SUBPD256,
19528 IX86_BUILTIN_SUBPS256,
19529 IX86_BUILTIN_XORPD256,
19530 IX86_BUILTIN_XORPS256,
19531 IX86_BUILTIN_CMPSD,
19532 IX86_BUILTIN_CMPSS,
19533 IX86_BUILTIN_CMPPD,
19534 IX86_BUILTIN_CMPPS,
19535 IX86_BUILTIN_CMPPD256,
19536 IX86_BUILTIN_CMPPS256,
19537 IX86_BUILTIN_CVTDQ2PD256,
19538 IX86_BUILTIN_CVTDQ2PS256,
19539 IX86_BUILTIN_CVTPD2PS256,
19540 IX86_BUILTIN_CVTPS2DQ256,
19541 IX86_BUILTIN_CVTPS2PD256,
19542 IX86_BUILTIN_CVTTPD2DQ256,
19543 IX86_BUILTIN_CVTPD2DQ256,
19544 IX86_BUILTIN_CVTTPS2DQ256,
19545 IX86_BUILTIN_EXTRACTF128PD256,
19546 IX86_BUILTIN_EXTRACTF128PS256,
19547 IX86_BUILTIN_EXTRACTF128SI256,
19548 IX86_BUILTIN_VZEROALL,
19549 IX86_BUILTIN_VZEROUPPER,
19550 IX86_BUILTIN_VZEROUPPER_REX64,
19551 IX86_BUILTIN_VPERMILVARPD,
19552 IX86_BUILTIN_VPERMILVARPS,
19553 IX86_BUILTIN_VPERMILVARPD256,
19554 IX86_BUILTIN_VPERMILVARPS256,
19555 IX86_BUILTIN_VPERMILPD,
19556 IX86_BUILTIN_VPERMILPS,
19557 IX86_BUILTIN_VPERMILPD256,
19558 IX86_BUILTIN_VPERMILPS256,
19559 IX86_BUILTIN_VPERMIL2PD,
19560 IX86_BUILTIN_VPERMIL2PS,
19561 IX86_BUILTIN_VPERMIL2PD256,
19562 IX86_BUILTIN_VPERMIL2PS256,
19563 IX86_BUILTIN_VPERM2F128PD256,
19564 IX86_BUILTIN_VPERM2F128PS256,
19565 IX86_BUILTIN_VPERM2F128SI256,
19566 IX86_BUILTIN_VBROADCASTSS,
19567 IX86_BUILTIN_VBROADCASTSD256,
19568 IX86_BUILTIN_VBROADCASTSS256,
19569 IX86_BUILTIN_VBROADCASTPD256,
19570 IX86_BUILTIN_VBROADCASTPS256,
19571 IX86_BUILTIN_VINSERTF128PD256,
19572 IX86_BUILTIN_VINSERTF128PS256,
19573 IX86_BUILTIN_VINSERTF128SI256,
19574 IX86_BUILTIN_LOADUPD256,
19575 IX86_BUILTIN_LOADUPS256,
19576 IX86_BUILTIN_STOREUPD256,
19577 IX86_BUILTIN_STOREUPS256,
19578 IX86_BUILTIN_LDDQU256,
19579 IX86_BUILTIN_LOADDQU256,
19580 IX86_BUILTIN_STOREDQU256,
19581 IX86_BUILTIN_MASKLOADPD,
19582 IX86_BUILTIN_MASKLOADPS,
19583 IX86_BUILTIN_MASKSTOREPD,
19584 IX86_BUILTIN_MASKSTOREPS,
19585 IX86_BUILTIN_MASKLOADPD256,
19586 IX86_BUILTIN_MASKLOADPS256,
19587 IX86_BUILTIN_MASKSTOREPD256,
19588 IX86_BUILTIN_MASKSTOREPS256,
19589 IX86_BUILTIN_MOVSHDUP256,
19590 IX86_BUILTIN_MOVSLDUP256,
19591 IX86_BUILTIN_MOVDDUP256,
19593 IX86_BUILTIN_SQRTPD256,
19594 IX86_BUILTIN_SQRTPS256,
19595 IX86_BUILTIN_SQRTPS_NR256,
19596 IX86_BUILTIN_RSQRTPS256,
19597 IX86_BUILTIN_RSQRTPS_NR256,
19599 IX86_BUILTIN_RCPPS256,
19601 IX86_BUILTIN_ROUNDPD256,
19602 IX86_BUILTIN_ROUNDPS256,
19604 IX86_BUILTIN_UNPCKHPD256,
19605 IX86_BUILTIN_UNPCKLPD256,
19606 IX86_BUILTIN_UNPCKHPS256,
19607 IX86_BUILTIN_UNPCKLPS256,
19609 IX86_BUILTIN_SI256_SI,
19610 IX86_BUILTIN_PS256_PS,
19611 IX86_BUILTIN_PD256_PD,
19612 IX86_BUILTIN_SI_SI256,
19613 IX86_BUILTIN_PS_PS256,
19614 IX86_BUILTIN_PD_PD256,
19616 IX86_BUILTIN_VTESTZPD,
19617 IX86_BUILTIN_VTESTCPD,
19618 IX86_BUILTIN_VTESTNZCPD,
19619 IX86_BUILTIN_VTESTZPS,
19620 IX86_BUILTIN_VTESTCPS,
19621 IX86_BUILTIN_VTESTNZCPS,
19622 IX86_BUILTIN_VTESTZPD256,
19623 IX86_BUILTIN_VTESTCPD256,
19624 IX86_BUILTIN_VTESTNZCPD256,
19625 IX86_BUILTIN_VTESTZPS256,
19626 IX86_BUILTIN_VTESTCPS256,
19627 IX86_BUILTIN_VTESTNZCPS256,
19628 IX86_BUILTIN_PTESTZ256,
19629 IX86_BUILTIN_PTESTC256,
19630 IX86_BUILTIN_PTESTNZC256,
19632 IX86_BUILTIN_MOVMSKPD256,
19633 IX86_BUILTIN_MOVMSKPS256,
19635 /* TFmode support builtins. */
19637 IX86_BUILTIN_FABSQ,
19638 IX86_BUILTIN_COPYSIGNQ,
19640 /* SSE5 instructions */
19641 IX86_BUILTIN_FMADDSS,
19642 IX86_BUILTIN_FMADDSD,
19643 IX86_BUILTIN_FMADDPS,
19644 IX86_BUILTIN_FMADDPD,
19645 IX86_BUILTIN_FMSUBSS,
19646 IX86_BUILTIN_FMSUBSD,
19647 IX86_BUILTIN_FMSUBPS,
19648 IX86_BUILTIN_FMSUBPD,
19649 IX86_BUILTIN_FNMADDSS,
19650 IX86_BUILTIN_FNMADDSD,
19651 IX86_BUILTIN_FNMADDPS,
19652 IX86_BUILTIN_FNMADDPD,
19653 IX86_BUILTIN_FNMSUBSS,
19654 IX86_BUILTIN_FNMSUBSD,
19655 IX86_BUILTIN_FNMSUBPS,
19656 IX86_BUILTIN_FNMSUBPD,
19657 IX86_BUILTIN_PCMOV,
19658 IX86_BUILTIN_PCMOV_V2DI,
19659 IX86_BUILTIN_PCMOV_V4SI,
19660 IX86_BUILTIN_PCMOV_V8HI,
19661 IX86_BUILTIN_PCMOV_V16QI,
19662 IX86_BUILTIN_PCMOV_V4SF,
19663 IX86_BUILTIN_PCMOV_V2DF,
19664 IX86_BUILTIN_PPERM,
19665 IX86_BUILTIN_PERMPS,
19666 IX86_BUILTIN_PERMPD,
19667 IX86_BUILTIN_PMACSSWW,
19668 IX86_BUILTIN_PMACSWW,
19669 IX86_BUILTIN_PMACSSWD,
19670 IX86_BUILTIN_PMACSWD,
19671 IX86_BUILTIN_PMACSSDD,
19672 IX86_BUILTIN_PMACSDD,
19673 IX86_BUILTIN_PMACSSDQL,
19674 IX86_BUILTIN_PMACSSDQH,
19675 IX86_BUILTIN_PMACSDQL,
19676 IX86_BUILTIN_PMACSDQH,
19677 IX86_BUILTIN_PMADCSSWD,
19678 IX86_BUILTIN_PMADCSWD,
19679 IX86_BUILTIN_PHADDBW,
19680 IX86_BUILTIN_PHADDBD,
19681 IX86_BUILTIN_PHADDBQ,
19682 IX86_BUILTIN_PHADDWD,
19683 IX86_BUILTIN_PHADDWQ,
19684 IX86_BUILTIN_PHADDDQ,
19685 IX86_BUILTIN_PHADDUBW,
19686 IX86_BUILTIN_PHADDUBD,
19687 IX86_BUILTIN_PHADDUBQ,
19688 IX86_BUILTIN_PHADDUWD,
19689 IX86_BUILTIN_PHADDUWQ,
19690 IX86_BUILTIN_PHADDUDQ,
19691 IX86_BUILTIN_PHSUBBW,
19692 IX86_BUILTIN_PHSUBWD,
19693 IX86_BUILTIN_PHSUBDQ,
19694 IX86_BUILTIN_PROTB,
19695 IX86_BUILTIN_PROTW,
19696 IX86_BUILTIN_PROTD,
19697 IX86_BUILTIN_PROTQ,
19698 IX86_BUILTIN_PROTB_IMM,
19699 IX86_BUILTIN_PROTW_IMM,
19700 IX86_BUILTIN_PROTD_IMM,
19701 IX86_BUILTIN_PROTQ_IMM,
19702 IX86_BUILTIN_PSHLB,
19703 IX86_BUILTIN_PSHLW,
19704 IX86_BUILTIN_PSHLD,
19705 IX86_BUILTIN_PSHLQ,
19706 IX86_BUILTIN_PSHAB,
19707 IX86_BUILTIN_PSHAW,
19708 IX86_BUILTIN_PSHAD,
19709 IX86_BUILTIN_PSHAQ,
19710 IX86_BUILTIN_FRCZSS,
19711 IX86_BUILTIN_FRCZSD,
19712 IX86_BUILTIN_FRCZPS,
19713 IX86_BUILTIN_FRCZPD,
19714 IX86_BUILTIN_CVTPH2PS,
19715 IX86_BUILTIN_CVTPS2PH,
19717 IX86_BUILTIN_COMEQSS,
19718 IX86_BUILTIN_COMNESS,
19719 IX86_BUILTIN_COMLTSS,
19720 IX86_BUILTIN_COMLESS,
19721 IX86_BUILTIN_COMGTSS,
19722 IX86_BUILTIN_COMGESS,
19723 IX86_BUILTIN_COMUEQSS,
19724 IX86_BUILTIN_COMUNESS,
19725 IX86_BUILTIN_COMULTSS,
19726 IX86_BUILTIN_COMULESS,
19727 IX86_BUILTIN_COMUGTSS,
19728 IX86_BUILTIN_COMUGESS,
19729 IX86_BUILTIN_COMORDSS,
19730 IX86_BUILTIN_COMUNORDSS,
19731 IX86_BUILTIN_COMFALSESS,
19732 IX86_BUILTIN_COMTRUESS,
19734 IX86_BUILTIN_COMEQSD,
19735 IX86_BUILTIN_COMNESD,
19736 IX86_BUILTIN_COMLTSD,
19737 IX86_BUILTIN_COMLESD,
19738 IX86_BUILTIN_COMGTSD,
19739 IX86_BUILTIN_COMGESD,
19740 IX86_BUILTIN_COMUEQSD,
19741 IX86_BUILTIN_COMUNESD,
19742 IX86_BUILTIN_COMULTSD,
19743 IX86_BUILTIN_COMULESD,
19744 IX86_BUILTIN_COMUGTSD,
19745 IX86_BUILTIN_COMUGESD,
19746 IX86_BUILTIN_COMORDSD,
19747 IX86_BUILTIN_COMUNORDSD,
19748 IX86_BUILTIN_COMFALSESD,
19749 IX86_BUILTIN_COMTRUESD,
19751 IX86_BUILTIN_COMEQPS,
19752 IX86_BUILTIN_COMNEPS,
19753 IX86_BUILTIN_COMLTPS,
19754 IX86_BUILTIN_COMLEPS,
19755 IX86_BUILTIN_COMGTPS,
19756 IX86_BUILTIN_COMGEPS,
19757 IX86_BUILTIN_COMUEQPS,
19758 IX86_BUILTIN_COMUNEPS,
19759 IX86_BUILTIN_COMULTPS,
19760 IX86_BUILTIN_COMULEPS,
19761 IX86_BUILTIN_COMUGTPS,
19762 IX86_BUILTIN_COMUGEPS,
19763 IX86_BUILTIN_COMORDPS,
19764 IX86_BUILTIN_COMUNORDPS,
19765 IX86_BUILTIN_COMFALSEPS,
19766 IX86_BUILTIN_COMTRUEPS,
19768 IX86_BUILTIN_COMEQPD,
19769 IX86_BUILTIN_COMNEPD,
19770 IX86_BUILTIN_COMLTPD,
19771 IX86_BUILTIN_COMLEPD,
19772 IX86_BUILTIN_COMGTPD,
19773 IX86_BUILTIN_COMGEPD,
19774 IX86_BUILTIN_COMUEQPD,
19775 IX86_BUILTIN_COMUNEPD,
19776 IX86_BUILTIN_COMULTPD,
19777 IX86_BUILTIN_COMULEPD,
19778 IX86_BUILTIN_COMUGTPD,
19779 IX86_BUILTIN_COMUGEPD,
19780 IX86_BUILTIN_COMORDPD,
19781 IX86_BUILTIN_COMUNORDPD,
19782 IX86_BUILTIN_COMFALSEPD,
19783 IX86_BUILTIN_COMTRUEPD,
19785 IX86_BUILTIN_PCOMEQUB,
19786 IX86_BUILTIN_PCOMNEUB,
19787 IX86_BUILTIN_PCOMLTUB,
19788 IX86_BUILTIN_PCOMLEUB,
19789 IX86_BUILTIN_PCOMGTUB,
19790 IX86_BUILTIN_PCOMGEUB,
19791 IX86_BUILTIN_PCOMFALSEUB,
19792 IX86_BUILTIN_PCOMTRUEUB,
19793 IX86_BUILTIN_PCOMEQUW,
19794 IX86_BUILTIN_PCOMNEUW,
19795 IX86_BUILTIN_PCOMLTUW,
19796 IX86_BUILTIN_PCOMLEUW,
19797 IX86_BUILTIN_PCOMGTUW,
19798 IX86_BUILTIN_PCOMGEUW,
19799 IX86_BUILTIN_PCOMFALSEUW,
19800 IX86_BUILTIN_PCOMTRUEUW,
19801 IX86_BUILTIN_PCOMEQUD,
19802 IX86_BUILTIN_PCOMNEUD,
19803 IX86_BUILTIN_PCOMLTUD,
19804 IX86_BUILTIN_PCOMLEUD,
19805 IX86_BUILTIN_PCOMGTUD,
19806 IX86_BUILTIN_PCOMGEUD,
19807 IX86_BUILTIN_PCOMFALSEUD,
19808 IX86_BUILTIN_PCOMTRUEUD,
19809 IX86_BUILTIN_PCOMEQUQ,
19810 IX86_BUILTIN_PCOMNEUQ,
19811 IX86_BUILTIN_PCOMLTUQ,
19812 IX86_BUILTIN_PCOMLEUQ,
19813 IX86_BUILTIN_PCOMGTUQ,
19814 IX86_BUILTIN_PCOMGEUQ,
19815 IX86_BUILTIN_PCOMFALSEUQ,
19816 IX86_BUILTIN_PCOMTRUEUQ,
19818 IX86_BUILTIN_PCOMEQB,
19819 IX86_BUILTIN_PCOMNEB,
19820 IX86_BUILTIN_PCOMLTB,
19821 IX86_BUILTIN_PCOMLEB,
19822 IX86_BUILTIN_PCOMGTB,
19823 IX86_BUILTIN_PCOMGEB,
19824 IX86_BUILTIN_PCOMFALSEB,
19825 IX86_BUILTIN_PCOMTRUEB,
19826 IX86_BUILTIN_PCOMEQW,
19827 IX86_BUILTIN_PCOMNEW,
19828 IX86_BUILTIN_PCOMLTW,
19829 IX86_BUILTIN_PCOMLEW,
19830 IX86_BUILTIN_PCOMGTW,
19831 IX86_BUILTIN_PCOMGEW,
19832 IX86_BUILTIN_PCOMFALSEW,
19833 IX86_BUILTIN_PCOMTRUEW,
19834 IX86_BUILTIN_PCOMEQD,
19835 IX86_BUILTIN_PCOMNED,
19836 IX86_BUILTIN_PCOMLTD,
19837 IX86_BUILTIN_PCOMLED,
19838 IX86_BUILTIN_PCOMGTD,
19839 IX86_BUILTIN_PCOMGED,
19840 IX86_BUILTIN_PCOMFALSED,
19841 IX86_BUILTIN_PCOMTRUED,
19842 IX86_BUILTIN_PCOMEQQ,
19843 IX86_BUILTIN_PCOMNEQ,
19844 IX86_BUILTIN_PCOMLTQ,
19845 IX86_BUILTIN_PCOMLEQ,
19846 IX86_BUILTIN_PCOMGTQ,
19847 IX86_BUILTIN_PCOMGEQ,
19848 IX86_BUILTIN_PCOMFALSEQ,
19849 IX86_BUILTIN_PCOMTRUEQ,
19854 /* Table for the ix86 builtin decls. */
19855 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19857 /* Table of all of the builtin functions that are possible with different ISA's
19858 but are waiting to be built until a function is declared to use that
19860 struct builtin_isa GTY(())
19862 tree type; /* builtin type to use in the declaration */
19863 const char *name; /* function name */
19864 int isa; /* isa_flags this builtin is defined for */
19865 bool const_p; /* true if the declaration is constant */
19868 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19871 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19872 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19873 * function decl in the ix86_builtins array. Returns the function decl or
19874 * NULL_TREE, if the builtin was not added.
19876 * If the front end has a special hook for builtin functions, delay adding
19877 * builtin functions that aren't in the current ISA until the ISA is changed
19878 * with function specific optimization. Doing so, can save about 300K for the
19879 * default compiler. When the builtin is expanded, check at that time whether
19882 * If the front end doesn't have a special hook, record all builtins, even if
19883 * it isn't an instruction set in the current ISA in case the user uses
19884 * function specific options for a different ISA, so that we don't get scope
19885 * errors if a builtin is added in the middle of a function scope. */
19888 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19890 tree decl = NULL_TREE;
19892 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19894 ix86_builtins_isa[(int) code].isa = mask;
19896 if ((mask & ix86_isa_flags) != 0
19897 || (lang_hooks.builtin_function
19898 == lang_hooks.builtin_function_ext_scope))
19901 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
19903 ix86_builtins[(int) code] = decl;
19904 ix86_builtins_isa[(int) code].type = NULL_TREE;
19908 ix86_builtins[(int) code] = NULL_TREE;
19909 ix86_builtins_isa[(int) code].const_p = false;
19910 ix86_builtins_isa[(int) code].type = type;
19911 ix86_builtins_isa[(int) code].name = name;
19918 /* Like def_builtin, but also marks the function decl "const". */
19921 def_builtin_const (int mask, const char *name, tree type,
19922 enum ix86_builtins code)
19924 tree decl = def_builtin (mask, name, type, code);
19926 TREE_READONLY (decl) = 1;
19928 ix86_builtins_isa[(int) code].const_p = true;
19933 /* Add any new builtin functions for a given ISA that may not have been
19934 declared. This saves a bit of space compared to adding all of the
19935 declarations to the tree, even if we didn't use them. */
19938 ix86_add_new_builtins (int isa)
19943 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
19945 if ((ix86_builtins_isa[i].isa & isa) != 0
19946 && ix86_builtins_isa[i].type != NULL_TREE)
19948 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
19949 ix86_builtins_isa[i].type,
19950 i, BUILT_IN_MD, NULL,
19953 ix86_builtins[i] = decl;
19954 ix86_builtins_isa[i].type = NULL_TREE;
19955 if (ix86_builtins_isa[i].const_p)
19956 TREE_READONLY (decl) = 1;
19961 /* Bits for builtin_description.flag. */
19963 /* Set when we don't support the comparison natively, and should
19964 swap_comparison in order to support it. */
19965 #define BUILTIN_DESC_SWAP_OPERANDS 1
19967 struct builtin_description
19969 const unsigned int mask;
19970 const enum insn_code icode;
19971 const char *const name;
19972 const enum ix86_builtins code;
19973 const enum rtx_code comparison;
19977 static const struct builtin_description bdesc_comi[] =
19979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20005 static const struct builtin_description bdesc_pcmpestr[] =
20008 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20009 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20010 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20011 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20012 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20013 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20014 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20017 static const struct builtin_description bdesc_pcmpistr[] =
20020 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20021 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20022 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20024 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20026 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20029 /* Special builtin types */
20030 enum ix86_special_builtin_type
20032 SPECIAL_FTYPE_UNKNOWN,
20034 V32QI_FTYPE_PCCHAR,
20035 V16QI_FTYPE_PCCHAR,
20037 V8SF_FTYPE_PCFLOAT,
20039 V4DF_FTYPE_PCDOUBLE,
20040 V4SF_FTYPE_PCFLOAT,
20041 V2DF_FTYPE_PCDOUBLE,
20042 V8SF_FTYPE_PCV8SF_V8SF,
20043 V4DF_FTYPE_PCV4DF_V4DF,
20044 V4SF_FTYPE_V4SF_PCV2SF,
20045 V4SF_FTYPE_PCV4SF_V4SF,
20046 V2DF_FTYPE_V2DF_PCDOUBLE,
20047 V2DF_FTYPE_PCV2DF_V2DF,
20049 VOID_FTYPE_PV2SF_V4SF,
20050 VOID_FTYPE_PV2DI_V2DI,
20051 VOID_FTYPE_PCHAR_V32QI,
20052 VOID_FTYPE_PCHAR_V16QI,
20053 VOID_FTYPE_PFLOAT_V8SF,
20054 VOID_FTYPE_PFLOAT_V4SF,
20055 VOID_FTYPE_PDOUBLE_V4DF,
20056 VOID_FTYPE_PDOUBLE_V2DF,
20058 VOID_FTYPE_PINT_INT,
20059 VOID_FTYPE_PV8SF_V8SF_V8SF,
20060 VOID_FTYPE_PV4DF_V4DF_V4DF,
20061 VOID_FTYPE_PV4SF_V4SF_V4SF,
20062 VOID_FTYPE_PV2DF_V2DF_V2DF
20065 /* Builtin types */
20066 enum ix86_builtin_type
20069 FLOAT128_FTYPE_FLOAT128,
20071 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20072 INT_FTYPE_V8SF_V8SF_PTEST,
20073 INT_FTYPE_V4DI_V4DI_PTEST,
20074 INT_FTYPE_V4DF_V4DF_PTEST,
20075 INT_FTYPE_V4SF_V4SF_PTEST,
20076 INT_FTYPE_V2DI_V2DI_PTEST,
20077 INT_FTYPE_V2DF_V2DF_PTEST,
20109 V4SF_FTYPE_V4SF_VEC_MERGE,
20118 V2DF_FTYPE_V2DF_VEC_MERGE,
20129 V16QI_FTYPE_V16QI_V16QI,
20130 V16QI_FTYPE_V8HI_V8HI,
20131 V8QI_FTYPE_V8QI_V8QI,
20132 V8QI_FTYPE_V4HI_V4HI,
20133 V8HI_FTYPE_V8HI_V8HI,
20134 V8HI_FTYPE_V8HI_V8HI_COUNT,
20135 V8HI_FTYPE_V16QI_V16QI,
20136 V8HI_FTYPE_V4SI_V4SI,
20137 V8HI_FTYPE_V8HI_SI_COUNT,
20138 V8SF_FTYPE_V8SF_V8SF,
20139 V8SF_FTYPE_V8SF_V8SI,
20140 V4SI_FTYPE_V4SI_V4SI,
20141 V4SI_FTYPE_V4SI_V4SI_COUNT,
20142 V4SI_FTYPE_V8HI_V8HI,
20143 V4SI_FTYPE_V4SF_V4SF,
20144 V4SI_FTYPE_V2DF_V2DF,
20145 V4SI_FTYPE_V4SI_SI_COUNT,
20146 V4HI_FTYPE_V4HI_V4HI,
20147 V4HI_FTYPE_V4HI_V4HI_COUNT,
20148 V4HI_FTYPE_V8QI_V8QI,
20149 V4HI_FTYPE_V2SI_V2SI,
20150 V4HI_FTYPE_V4HI_SI_COUNT,
20151 V4DF_FTYPE_V4DF_V4DF,
20152 V4DF_FTYPE_V4DF_V4DI,
20153 V4SF_FTYPE_V4SF_V4SF,
20154 V4SF_FTYPE_V4SF_V4SF_SWAP,
20155 V4SF_FTYPE_V4SF_V4SI,
20156 V4SF_FTYPE_V4SF_V2SI,
20157 V4SF_FTYPE_V4SF_V2DF,
20158 V4SF_FTYPE_V4SF_DI,
20159 V4SF_FTYPE_V4SF_SI,
20160 V2DI_FTYPE_V2DI_V2DI,
20161 V2DI_FTYPE_V2DI_V2DI_COUNT,
20162 V2DI_FTYPE_V16QI_V16QI,
20163 V2DI_FTYPE_V4SI_V4SI,
20164 V2DI_FTYPE_V2DI_V16QI,
20165 V2DI_FTYPE_V2DF_V2DF,
20166 V2DI_FTYPE_V2DI_SI_COUNT,
20167 V2SI_FTYPE_V2SI_V2SI,
20168 V2SI_FTYPE_V2SI_V2SI_COUNT,
20169 V2SI_FTYPE_V4HI_V4HI,
20170 V2SI_FTYPE_V2SF_V2SF,
20171 V2SI_FTYPE_V2SI_SI_COUNT,
20172 V2DF_FTYPE_V2DF_V2DF,
20173 V2DF_FTYPE_V2DF_V2DF_SWAP,
20174 V2DF_FTYPE_V2DF_V4SF,
20175 V2DF_FTYPE_V2DF_V2DI,
20176 V2DF_FTYPE_V2DF_DI,
20177 V2DF_FTYPE_V2DF_SI,
20178 V2SF_FTYPE_V2SF_V2SF,
20179 V1DI_FTYPE_V1DI_V1DI,
20180 V1DI_FTYPE_V1DI_V1DI_COUNT,
20181 V1DI_FTYPE_V8QI_V8QI,
20182 V1DI_FTYPE_V2SI_V2SI,
20183 V1DI_FTYPE_V1DI_SI_COUNT,
20184 UINT64_FTYPE_UINT64_UINT64,
20185 UINT_FTYPE_UINT_UINT,
20186 UINT_FTYPE_UINT_USHORT,
20187 UINT_FTYPE_UINT_UCHAR,
20188 V8HI_FTYPE_V8HI_INT,
20189 V4SI_FTYPE_V4SI_INT,
20190 V4HI_FTYPE_V4HI_INT,
20191 V8SF_FTYPE_V8SF_INT,
20192 V4SI_FTYPE_V8SI_INT,
20193 V4SF_FTYPE_V8SF_INT,
20194 V2DF_FTYPE_V4DF_INT,
20195 V4DF_FTYPE_V4DF_INT,
20196 V4SF_FTYPE_V4SF_INT,
20197 V2DI_FTYPE_V2DI_INT,
20198 V2DI2TI_FTYPE_V2DI_INT,
20199 V2DF_FTYPE_V2DF_INT,
20200 V16QI_FTYPE_V16QI_V16QI_V16QI,
20201 V8SF_FTYPE_V8SF_V8SF_V8SF,
20202 V4DF_FTYPE_V4DF_V4DF_V4DF,
20203 V4SF_FTYPE_V4SF_V4SF_V4SF,
20204 V2DF_FTYPE_V2DF_V2DF_V2DF,
20205 V16QI_FTYPE_V16QI_V16QI_INT,
20206 V8SI_FTYPE_V8SI_V8SI_INT,
20207 V8SI_FTYPE_V8SI_V4SI_INT,
20208 V8HI_FTYPE_V8HI_V8HI_INT,
20209 V8SF_FTYPE_V8SF_V8SF_INT,
20210 V8SF_FTYPE_V8SF_V4SF_INT,
20211 V4SI_FTYPE_V4SI_V4SI_INT,
20212 V4DF_FTYPE_V4DF_V4DF_INT,
20213 V4DF_FTYPE_V4DF_V2DF_INT,
20214 V4SF_FTYPE_V4SF_V4SF_INT,
20215 V2DI_FTYPE_V2DI_V2DI_INT,
20216 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20217 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20218 V2DF_FTYPE_V2DF_V2DF_INT,
20219 V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
20220 V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
20221 V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
20222 V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
20223 V2DI_FTYPE_V2DI_UINT_UINT,
20224 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20227 /* Special builtins with variable number of arguments. */
20228 static const struct builtin_description bdesc_special_args[] =
20231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20234 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20246 /* SSE or 3DNow!A */
20247 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20248 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20265 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20268 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20271 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20272 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20277 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20303 /* Builtins with variable number of arguments. */
20304 static const struct builtin_description bdesc_args[] =
20307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20371 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20372 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20373 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20374 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20387 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20388 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20389 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20390 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20393 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20394 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20395 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20396 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20397 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20398 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20403 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20405 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20409 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20412 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20416 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20417 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20418 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20448 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20449 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20453 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20455 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20456 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20466 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20468 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20474 /* SSE MMX or 3Dnow!A */
20475 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20476 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20477 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20479 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20480 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20481 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20484 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20485 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20508 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20509 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20515 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20516 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20517 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20518 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20546 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20550 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20552 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20553 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20559 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
20561 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20562 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20563 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20564 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20565 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20566 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20567 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20568 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20579 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20580 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
20582 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20584 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20585 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20597 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20598 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20599 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20615 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
20618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
20619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
20623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
20624 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
20625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
20626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
20628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20629 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20630 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20631 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20632 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20633 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20634 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20637 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20638 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20639 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20640 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20641 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20642 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20644 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20645 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20646 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20647 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
20650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
20655 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
20656 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
20658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20661 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20662 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20665 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
20666 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20668 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20669 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20670 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20671 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20672 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20673 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
20677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
20678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
20680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
20681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
20696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
20697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
20710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
20713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
20716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
20717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
20721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
20722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
20724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20751 /* SSE4.1 and SSE5 */
20752 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20753 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20754 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20755 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20757 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20758 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20759 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20762 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20763 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
20764 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
20765 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
20766 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
20769 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
20770 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
20771 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
20772 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20775 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
20776 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20778 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20779 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20780 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20781 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20784 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
20787 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20788 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20791 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20792 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20795 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20801 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20802 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20803 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20804 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20805 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20806 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20807 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20808 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20809 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20810 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20811 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20812 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
20815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
20816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
20817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
20819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
20822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
20823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
20833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
20834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
20835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
20836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
20837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
20838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
20840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
20846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
20851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
20852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
20853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
20854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
20855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
20856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
20858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20862 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20864 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20866 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
20879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
20880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
20881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
20882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
20883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
20885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
20902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
20906 enum multi_arg_type {
20916 MULTI_ARG_3_PERMPS,
20917 MULTI_ARG_3_PERMPD,
20924 MULTI_ARG_2_DI_IMM,
20925 MULTI_ARG_2_SI_IMM,
20926 MULTI_ARG_2_HI_IMM,
20927 MULTI_ARG_2_QI_IMM,
20928 MULTI_ARG_2_SF_CMP,
20929 MULTI_ARG_2_DF_CMP,
20930 MULTI_ARG_2_DI_CMP,
20931 MULTI_ARG_2_SI_CMP,
20932 MULTI_ARG_2_HI_CMP,
20933 MULTI_ARG_2_QI_CMP,
20956 static const struct builtin_description bdesc_multi_arg[] =
20958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
20959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
20960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
20961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
20962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
20963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
20964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
20965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
20966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
20967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
20968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
20969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
20970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
20971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
20972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
20973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
20974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
20975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
20977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
20978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
20979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
20980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
20981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
20982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
20983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
20984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
20985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
20986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
20988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
20989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
20990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
20996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
20997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
20998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
20999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21194 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21195 in the current target ISA to allow the user to compile particular modules
21196 with different target specific options that differ from the command line
21199 ix86_init_mmx_sse_builtins (void)
21201 const struct builtin_description * d;
21204 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21205 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21206 tree V1DI_type_node
21207 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21208 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21209 tree V2DI_type_node
21210 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21211 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21212 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21213 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21214 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21215 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21216 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21218 tree pchar_type_node = build_pointer_type (char_type_node);
21219 tree pcchar_type_node
21220 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21221 tree pfloat_type_node = build_pointer_type (float_type_node);
21222 tree pcfloat_type_node
21223 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21224 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21225 tree pcv2sf_type_node
21226 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21227 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21228 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21231 tree int_ftype_v4sf_v4sf
21232 = build_function_type_list (integer_type_node,
21233 V4SF_type_node, V4SF_type_node, NULL_TREE);
21234 tree v4si_ftype_v4sf_v4sf
21235 = build_function_type_list (V4SI_type_node,
21236 V4SF_type_node, V4SF_type_node, NULL_TREE);
21237 /* MMX/SSE/integer conversions. */
21238 tree int_ftype_v4sf
21239 = build_function_type_list (integer_type_node,
21240 V4SF_type_node, NULL_TREE);
21241 tree int64_ftype_v4sf
21242 = build_function_type_list (long_long_integer_type_node,
21243 V4SF_type_node, NULL_TREE);
21244 tree int_ftype_v8qi
21245 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21246 tree v4sf_ftype_v4sf_int
21247 = build_function_type_list (V4SF_type_node,
21248 V4SF_type_node, integer_type_node, NULL_TREE);
21249 tree v4sf_ftype_v4sf_int64
21250 = build_function_type_list (V4SF_type_node,
21251 V4SF_type_node, long_long_integer_type_node,
21253 tree v4sf_ftype_v4sf_v2si
21254 = build_function_type_list (V4SF_type_node,
21255 V4SF_type_node, V2SI_type_node, NULL_TREE);
21257 /* Miscellaneous. */
21258 tree v8qi_ftype_v4hi_v4hi
21259 = build_function_type_list (V8QI_type_node,
21260 V4HI_type_node, V4HI_type_node, NULL_TREE);
21261 tree v4hi_ftype_v2si_v2si
21262 = build_function_type_list (V4HI_type_node,
21263 V2SI_type_node, V2SI_type_node, NULL_TREE);
21264 tree v4sf_ftype_v4sf_v4sf_int
21265 = build_function_type_list (V4SF_type_node,
21266 V4SF_type_node, V4SF_type_node,
21267 integer_type_node, NULL_TREE);
21268 tree v2si_ftype_v4hi_v4hi
21269 = build_function_type_list (V2SI_type_node,
21270 V4HI_type_node, V4HI_type_node, NULL_TREE);
21271 tree v4hi_ftype_v4hi_int
21272 = build_function_type_list (V4HI_type_node,
21273 V4HI_type_node, integer_type_node, NULL_TREE);
21274 tree v2si_ftype_v2si_int
21275 = build_function_type_list (V2SI_type_node,
21276 V2SI_type_node, integer_type_node, NULL_TREE);
21277 tree v1di_ftype_v1di_int
21278 = build_function_type_list (V1DI_type_node,
21279 V1DI_type_node, integer_type_node, NULL_TREE);
21281 tree void_ftype_void
21282 = build_function_type (void_type_node, void_list_node);
21283 tree void_ftype_unsigned
21284 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21285 tree void_ftype_unsigned_unsigned
21286 = build_function_type_list (void_type_node, unsigned_type_node,
21287 unsigned_type_node, NULL_TREE);
21288 tree void_ftype_pcvoid_unsigned_unsigned
21289 = build_function_type_list (void_type_node, const_ptr_type_node,
21290 unsigned_type_node, unsigned_type_node,
21292 tree unsigned_ftype_void
21293 = build_function_type (unsigned_type_node, void_list_node);
21294 tree v2si_ftype_v4sf
21295 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21296 /* Loads/stores. */
21297 tree void_ftype_v8qi_v8qi_pchar
21298 = build_function_type_list (void_type_node,
21299 V8QI_type_node, V8QI_type_node,
21300 pchar_type_node, NULL_TREE);
21301 tree v4sf_ftype_pcfloat
21302 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21303 tree v4sf_ftype_v4sf_pcv2sf
21304 = build_function_type_list (V4SF_type_node,
21305 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21306 tree void_ftype_pv2sf_v4sf
21307 = build_function_type_list (void_type_node,
21308 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21309 tree void_ftype_pfloat_v4sf
21310 = build_function_type_list (void_type_node,
21311 pfloat_type_node, V4SF_type_node, NULL_TREE);
21312 tree void_ftype_pdi_di
21313 = build_function_type_list (void_type_node,
21314 pdi_type_node, long_long_unsigned_type_node,
21316 tree void_ftype_pv2di_v2di
21317 = build_function_type_list (void_type_node,
21318 pv2di_type_node, V2DI_type_node, NULL_TREE);
21319 /* Normal vector unops. */
21320 tree v4sf_ftype_v4sf
21321 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21322 tree v16qi_ftype_v16qi
21323 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21324 tree v8hi_ftype_v8hi
21325 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21326 tree v4si_ftype_v4si
21327 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21328 tree v8qi_ftype_v8qi
21329 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21330 tree v4hi_ftype_v4hi
21331 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21333 /* Normal vector binops. */
21334 tree v4sf_ftype_v4sf_v4sf
21335 = build_function_type_list (V4SF_type_node,
21336 V4SF_type_node, V4SF_type_node, NULL_TREE);
21337 tree v8qi_ftype_v8qi_v8qi
21338 = build_function_type_list (V8QI_type_node,
21339 V8QI_type_node, V8QI_type_node, NULL_TREE);
21340 tree v4hi_ftype_v4hi_v4hi
21341 = build_function_type_list (V4HI_type_node,
21342 V4HI_type_node, V4HI_type_node, NULL_TREE);
21343 tree v2si_ftype_v2si_v2si
21344 = build_function_type_list (V2SI_type_node,
21345 V2SI_type_node, V2SI_type_node, NULL_TREE);
21346 tree v1di_ftype_v1di_v1di
21347 = build_function_type_list (V1DI_type_node,
21348 V1DI_type_node, V1DI_type_node, NULL_TREE);
21349 tree v1di_ftype_v1di_v1di_int
21350 = build_function_type_list (V1DI_type_node,
21351 V1DI_type_node, V1DI_type_node,
21352 integer_type_node, NULL_TREE);
21353 tree v2si_ftype_v2sf
21354 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21355 tree v2sf_ftype_v2si
21356 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21357 tree v2si_ftype_v2si
21358 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21359 tree v2sf_ftype_v2sf
21360 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21361 tree v2sf_ftype_v2sf_v2sf
21362 = build_function_type_list (V2SF_type_node,
21363 V2SF_type_node, V2SF_type_node, NULL_TREE);
21364 tree v2si_ftype_v2sf_v2sf
21365 = build_function_type_list (V2SI_type_node,
21366 V2SF_type_node, V2SF_type_node, NULL_TREE);
21367 tree pint_type_node = build_pointer_type (integer_type_node);
21368 tree pdouble_type_node = build_pointer_type (double_type_node);
21369 tree pcdouble_type_node = build_pointer_type (
21370 build_type_variant (double_type_node, 1, 0));
21371 tree int_ftype_v2df_v2df
21372 = build_function_type_list (integer_type_node,
21373 V2DF_type_node, V2DF_type_node, NULL_TREE);
21375 tree void_ftype_pcvoid
21376 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21377 tree v4sf_ftype_v4si
21378 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21379 tree v4si_ftype_v4sf
21380 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21381 tree v2df_ftype_v4si
21382 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21383 tree v4si_ftype_v2df
21384 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21385 tree v4si_ftype_v2df_v2df
21386 = build_function_type_list (V4SI_type_node,
21387 V2DF_type_node, V2DF_type_node, NULL_TREE);
21388 tree v2si_ftype_v2df
21389 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21390 tree v4sf_ftype_v2df
21391 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21392 tree v2df_ftype_v2si
21393 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21394 tree v2df_ftype_v4sf
21395 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21396 tree int_ftype_v2df
21397 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21398 tree int64_ftype_v2df
21399 = build_function_type_list (long_long_integer_type_node,
21400 V2DF_type_node, NULL_TREE);
21401 tree v2df_ftype_v2df_int
21402 = build_function_type_list (V2DF_type_node,
21403 V2DF_type_node, integer_type_node, NULL_TREE);
21404 tree v2df_ftype_v2df_int64
21405 = build_function_type_list (V2DF_type_node,
21406 V2DF_type_node, long_long_integer_type_node,
21408 tree v4sf_ftype_v4sf_v2df
21409 = build_function_type_list (V4SF_type_node,
21410 V4SF_type_node, V2DF_type_node, NULL_TREE);
21411 tree v2df_ftype_v2df_v4sf
21412 = build_function_type_list (V2DF_type_node,
21413 V2DF_type_node, V4SF_type_node, NULL_TREE);
21414 tree v2df_ftype_v2df_v2df_int
21415 = build_function_type_list (V2DF_type_node,
21416 V2DF_type_node, V2DF_type_node,
21419 tree v2df_ftype_v2df_pcdouble
21420 = build_function_type_list (V2DF_type_node,
21421 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21422 tree void_ftype_pdouble_v2df
21423 = build_function_type_list (void_type_node,
21424 pdouble_type_node, V2DF_type_node, NULL_TREE);
21425 tree void_ftype_pint_int
21426 = build_function_type_list (void_type_node,
21427 pint_type_node, integer_type_node, NULL_TREE);
21428 tree void_ftype_v16qi_v16qi_pchar
21429 = build_function_type_list (void_type_node,
21430 V16QI_type_node, V16QI_type_node,
21431 pchar_type_node, NULL_TREE);
21432 tree v2df_ftype_pcdouble
21433 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21434 tree v2df_ftype_v2df_v2df
21435 = build_function_type_list (V2DF_type_node,
21436 V2DF_type_node, V2DF_type_node, NULL_TREE);
21437 tree v16qi_ftype_v16qi_v16qi
21438 = build_function_type_list (V16QI_type_node,
21439 V16QI_type_node, V16QI_type_node, NULL_TREE);
21440 tree v8hi_ftype_v8hi_v8hi
21441 = build_function_type_list (V8HI_type_node,
21442 V8HI_type_node, V8HI_type_node, NULL_TREE);
21443 tree v4si_ftype_v4si_v4si
21444 = build_function_type_list (V4SI_type_node,
21445 V4SI_type_node, V4SI_type_node, NULL_TREE);
21446 tree v2di_ftype_v2di_v2di
21447 = build_function_type_list (V2DI_type_node,
21448 V2DI_type_node, V2DI_type_node, NULL_TREE);
21449 tree v2di_ftype_v2df_v2df
21450 = build_function_type_list (V2DI_type_node,
21451 V2DF_type_node, V2DF_type_node, NULL_TREE);
21452 tree v2df_ftype_v2df
21453 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21454 tree v2di_ftype_v2di_int
21455 = build_function_type_list (V2DI_type_node,
21456 V2DI_type_node, integer_type_node, NULL_TREE);
21457 tree v2di_ftype_v2di_v2di_int
21458 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21459 V2DI_type_node, integer_type_node, NULL_TREE);
21460 tree v4si_ftype_v4si_int
21461 = build_function_type_list (V4SI_type_node,
21462 V4SI_type_node, integer_type_node, NULL_TREE);
21463 tree v8hi_ftype_v8hi_int
21464 = build_function_type_list (V8HI_type_node,
21465 V8HI_type_node, integer_type_node, NULL_TREE);
21466 tree v4si_ftype_v8hi_v8hi
21467 = build_function_type_list (V4SI_type_node,
21468 V8HI_type_node, V8HI_type_node, NULL_TREE);
21469 tree v1di_ftype_v8qi_v8qi
21470 = build_function_type_list (V1DI_type_node,
21471 V8QI_type_node, V8QI_type_node, NULL_TREE);
21472 tree v1di_ftype_v2si_v2si
21473 = build_function_type_list (V1DI_type_node,
21474 V2SI_type_node, V2SI_type_node, NULL_TREE);
21475 tree v2di_ftype_v16qi_v16qi
21476 = build_function_type_list (V2DI_type_node,
21477 V16QI_type_node, V16QI_type_node, NULL_TREE);
21478 tree v2di_ftype_v4si_v4si
21479 = build_function_type_list (V2DI_type_node,
21480 V4SI_type_node, V4SI_type_node, NULL_TREE);
21481 tree int_ftype_v16qi
21482 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21483 tree v16qi_ftype_pcchar
21484 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21485 tree void_ftype_pchar_v16qi
21486 = build_function_type_list (void_type_node,
21487 pchar_type_node, V16QI_type_node, NULL_TREE);
21489 tree v2di_ftype_v2di_unsigned_unsigned
21490 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21491 unsigned_type_node, unsigned_type_node,
21493 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21494 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21495 unsigned_type_node, unsigned_type_node,
21497 tree v2di_ftype_v2di_v16qi
21498 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21500 tree v2df_ftype_v2df_v2df_v2df
21501 = build_function_type_list (V2DF_type_node,
21502 V2DF_type_node, V2DF_type_node,
21503 V2DF_type_node, NULL_TREE);
21504 tree v4sf_ftype_v4sf_v4sf_v4sf
21505 = build_function_type_list (V4SF_type_node,
21506 V4SF_type_node, V4SF_type_node,
21507 V4SF_type_node, NULL_TREE);
21508 tree v8hi_ftype_v16qi
21509 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21511 tree v4si_ftype_v16qi
21512 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21514 tree v2di_ftype_v16qi
21515 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21517 tree v4si_ftype_v8hi
21518 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21520 tree v2di_ftype_v8hi
21521 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21523 tree v2di_ftype_v4si
21524 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21526 tree v2di_ftype_pv2di
21527 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21529 tree v16qi_ftype_v16qi_v16qi_int
21530 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21531 V16QI_type_node, integer_type_node,
21533 tree v16qi_ftype_v16qi_v16qi_v16qi
21534 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21535 V16QI_type_node, V16QI_type_node,
21537 tree v8hi_ftype_v8hi_v8hi_int
21538 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21539 V8HI_type_node, integer_type_node,
21541 tree v4si_ftype_v4si_v4si_int
21542 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21543 V4SI_type_node, integer_type_node,
21545 tree int_ftype_v2di_v2di
21546 = build_function_type_list (integer_type_node,
21547 V2DI_type_node, V2DI_type_node,
21549 tree int_ftype_v16qi_int_v16qi_int_int
21550 = build_function_type_list (integer_type_node,
21557 tree v16qi_ftype_v16qi_int_v16qi_int_int
21558 = build_function_type_list (V16QI_type_node,
21565 tree int_ftype_v16qi_v16qi_int
21566 = build_function_type_list (integer_type_node,
21572 /* SSE5 instructions */
21573 tree v2di_ftype_v2di_v2di_v2di
21574 = build_function_type_list (V2DI_type_node,
21580 tree v4si_ftype_v4si_v4si_v4si
21581 = build_function_type_list (V4SI_type_node,
21587 tree v4si_ftype_v4si_v4si_v2di
21588 = build_function_type_list (V4SI_type_node,
21594 tree v8hi_ftype_v8hi_v8hi_v8hi
21595 = build_function_type_list (V8HI_type_node,
21601 tree v8hi_ftype_v8hi_v8hi_v4si
21602 = build_function_type_list (V8HI_type_node,
21608 tree v2df_ftype_v2df_v2df_v16qi
21609 = build_function_type_list (V2DF_type_node,
21615 tree v4sf_ftype_v4sf_v4sf_v16qi
21616 = build_function_type_list (V4SF_type_node,
21622 tree v2di_ftype_v2di_si
21623 = build_function_type_list (V2DI_type_node,
21628 tree v4si_ftype_v4si_si
21629 = build_function_type_list (V4SI_type_node,
21634 tree v8hi_ftype_v8hi_si
21635 = build_function_type_list (V8HI_type_node,
21640 tree v16qi_ftype_v16qi_si
21641 = build_function_type_list (V16QI_type_node,
21645 tree v4sf_ftype_v4hi
21646 = build_function_type_list (V4SF_type_node,
21650 tree v4hi_ftype_v4sf
21651 = build_function_type_list (V4HI_type_node,
21655 tree v2di_ftype_v2di
21656 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
21658 tree v16qi_ftype_v8hi_v8hi
21659 = build_function_type_list (V16QI_type_node,
21660 V8HI_type_node, V8HI_type_node,
21662 tree v8hi_ftype_v4si_v4si
21663 = build_function_type_list (V8HI_type_node,
21664 V4SI_type_node, V4SI_type_node,
21666 tree v8hi_ftype_v16qi_v16qi
21667 = build_function_type_list (V8HI_type_node,
21668 V16QI_type_node, V16QI_type_node,
21670 tree v4hi_ftype_v8qi_v8qi
21671 = build_function_type_list (V4HI_type_node,
21672 V8QI_type_node, V8QI_type_node,
21674 tree unsigned_ftype_unsigned_uchar
21675 = build_function_type_list (unsigned_type_node,
21676 unsigned_type_node,
21677 unsigned_char_type_node,
21679 tree unsigned_ftype_unsigned_ushort
21680 = build_function_type_list (unsigned_type_node,
21681 unsigned_type_node,
21682 short_unsigned_type_node,
21684 tree unsigned_ftype_unsigned_unsigned
21685 = build_function_type_list (unsigned_type_node,
21686 unsigned_type_node,
21687 unsigned_type_node,
21689 tree uint64_ftype_uint64_uint64
21690 = build_function_type_list (long_long_unsigned_type_node,
21691 long_long_unsigned_type_node,
21692 long_long_unsigned_type_node,
21694 tree float_ftype_float
21695 = build_function_type_list (float_type_node,
21700 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
21702 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
21704 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
21706 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
21708 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
21710 tree v8sf_ftype_v8sf
21711 = build_function_type_list (V8SF_type_node,
21714 tree v8si_ftype_v8sf
21715 = build_function_type_list (V8SI_type_node,
21718 tree v8sf_ftype_v8si
21719 = build_function_type_list (V8SF_type_node,
21722 tree v4si_ftype_v4df
21723 = build_function_type_list (V4SI_type_node,
21726 tree v4df_ftype_v4df
21727 = build_function_type_list (V4DF_type_node,
21730 tree v4df_ftype_v4si
21731 = build_function_type_list (V4DF_type_node,
21734 tree v4df_ftype_v4sf
21735 = build_function_type_list (V4DF_type_node,
21738 tree v4sf_ftype_v4df
21739 = build_function_type_list (V4SF_type_node,
21742 tree v8sf_ftype_v8sf_v8sf
21743 = build_function_type_list (V8SF_type_node,
21744 V8SF_type_node, V8SF_type_node,
21746 tree v4df_ftype_v4df_v4df
21747 = build_function_type_list (V4DF_type_node,
21748 V4DF_type_node, V4DF_type_node,
21750 tree v8sf_ftype_v8sf_int
21751 = build_function_type_list (V8SF_type_node,
21752 V8SF_type_node, integer_type_node,
21754 tree v4si_ftype_v8si_int
21755 = build_function_type_list (V4SI_type_node,
21756 V8SI_type_node, integer_type_node,
21758 tree v4df_ftype_v4df_int
21759 = build_function_type_list (V4DF_type_node,
21760 V4DF_type_node, integer_type_node,
21762 tree v4sf_ftype_v8sf_int
21763 = build_function_type_list (V4SF_type_node,
21764 V8SF_type_node, integer_type_node,
21766 tree v2df_ftype_v4df_int
21767 = build_function_type_list (V2DF_type_node,
21768 V4DF_type_node, integer_type_node,
21770 tree v8sf_ftype_v8sf_v8sf_int
21771 = build_function_type_list (V8SF_type_node,
21772 V8SF_type_node, V8SF_type_node,
21775 tree v8sf_ftype_v8sf_v8sf_v8sf
21776 = build_function_type_list (V8SF_type_node,
21777 V8SF_type_node, V8SF_type_node,
21780 tree v4df_ftype_v4df_v4df_v4df
21781 = build_function_type_list (V4DF_type_node,
21782 V4DF_type_node, V4DF_type_node,
21785 tree v8si_ftype_v8si_v8si_int
21786 = build_function_type_list (V8SI_type_node,
21787 V8SI_type_node, V8SI_type_node,
21790 tree v4df_ftype_v4df_v4df_int
21791 = build_function_type_list (V4DF_type_node,
21792 V4DF_type_node, V4DF_type_node,
21795 tree v8sf_ftype_v8sf_v8sf_v8si_int
21796 = build_function_type_list (V8SF_type_node,
21797 V8SF_type_node, V8SF_type_node,
21798 V8SI_type_node, integer_type_node,
21800 tree v4df_ftype_v4df_v4df_v4di_int
21801 = build_function_type_list (V4DF_type_node,
21802 V4DF_type_node, V4DF_type_node,
21803 V4DI_type_node, integer_type_node,
21805 tree v4sf_ftype_v4sf_v4sf_v4si_int
21806 = build_function_type_list (V4SF_type_node,
21807 V4SF_type_node, V4SF_type_node,
21808 V4SI_type_node, integer_type_node,
21810 tree v2df_ftype_v2df_v2df_v2di_int
21811 = build_function_type_list (V2DF_type_node,
21812 V2DF_type_node, V2DF_type_node,
21813 V2DI_type_node, integer_type_node,
21815 tree v8sf_ftype_pcfloat
21816 = build_function_type_list (V8SF_type_node,
21819 tree v4df_ftype_pcdouble
21820 = build_function_type_list (V4DF_type_node,
21821 pcdouble_type_node,
21823 tree pcv4sf_type_node
21824 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
21825 tree pcv2df_type_node
21826 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
21827 tree v8sf_ftype_pcv4sf
21828 = build_function_type_list (V8SF_type_node,
21831 tree v4df_ftype_pcv2df
21832 = build_function_type_list (V4DF_type_node,
21835 tree v32qi_ftype_pcchar
21836 = build_function_type_list (V32QI_type_node,
21839 tree void_ftype_pchar_v32qi
21840 = build_function_type_list (void_type_node,
21841 pchar_type_node, V32QI_type_node,
21843 tree v8si_ftype_v8si_v4si_int
21844 = build_function_type_list (V8SI_type_node,
21845 V8SI_type_node, V4SI_type_node,
21848 tree v8sf_ftype_v8sf_v4sf_int
21849 = build_function_type_list (V8SF_type_node,
21850 V8SF_type_node, V4SF_type_node,
21853 tree v4df_ftype_v4df_v2df_int
21854 = build_function_type_list (V4DF_type_node,
21855 V4DF_type_node, V2DF_type_node,
21858 tree void_ftype_pfloat_v8sf
21859 = build_function_type_list (void_type_node,
21860 pfloat_type_node, V8SF_type_node,
21862 tree void_ftype_pdouble_v4df
21863 = build_function_type_list (void_type_node,
21864 pdouble_type_node, V4DF_type_node,
21866 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
21867 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
21868 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
21869 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
21870 tree pcv8sf_type_node
21871 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
21872 tree pcv4df_type_node
21873 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
21874 tree v8sf_ftype_pcv8sf_v8sf
21875 = build_function_type_list (V8SF_type_node,
21876 pcv8sf_type_node, V8SF_type_node,
21878 tree v4df_ftype_pcv4df_v4df
21879 = build_function_type_list (V4DF_type_node,
21880 pcv4df_type_node, V4DF_type_node,
21882 tree v4sf_ftype_pcv4sf_v4sf
21883 = build_function_type_list (V4SF_type_node,
21884 pcv4sf_type_node, V4SF_type_node,
21886 tree v2df_ftype_pcv2df_v2df
21887 = build_function_type_list (V2DF_type_node,
21888 pcv2df_type_node, V2DF_type_node,
21890 tree void_ftype_pv8sf_v8sf_v8sf
21891 = build_function_type_list (void_type_node,
21892 pv8sf_type_node, V8SF_type_node,
21895 tree void_ftype_pv4df_v4df_v4df
21896 = build_function_type_list (void_type_node,
21897 pv4df_type_node, V4DF_type_node,
21900 tree void_ftype_pv4sf_v4sf_v4sf
21901 = build_function_type_list (void_type_node,
21902 pv4sf_type_node, V4SF_type_node,
21905 tree void_ftype_pv2df_v2df_v2df
21906 = build_function_type_list (void_type_node,
21907 pv2df_type_node, V2DF_type_node,
21910 tree v4df_ftype_v2df
21911 = build_function_type_list (V4DF_type_node,
21914 tree v8sf_ftype_v4sf
21915 = build_function_type_list (V8SF_type_node,
21918 tree v8si_ftype_v4si
21919 = build_function_type_list (V8SI_type_node,
21922 tree v2df_ftype_v4df
21923 = build_function_type_list (V2DF_type_node,
21926 tree v4sf_ftype_v8sf
21927 = build_function_type_list (V4SF_type_node,
21930 tree v4si_ftype_v8si
21931 = build_function_type_list (V4SI_type_node,
21934 tree int_ftype_v4df
21935 = build_function_type_list (integer_type_node,
21938 tree int_ftype_v8sf
21939 = build_function_type_list (integer_type_node,
21942 tree int_ftype_v8sf_v8sf
21943 = build_function_type_list (integer_type_node,
21944 V8SF_type_node, V8SF_type_node,
21946 tree int_ftype_v4di_v4di
21947 = build_function_type_list (integer_type_node,
21948 V4DI_type_node, V4DI_type_node,
21950 tree int_ftype_v4df_v4df
21951 = build_function_type_list (integer_type_node,
21952 V4DF_type_node, V4DF_type_node,
21954 tree v8sf_ftype_v8sf_v8si
21955 = build_function_type_list (V8SF_type_node,
21956 V8SF_type_node, V8SI_type_node,
21958 tree v4df_ftype_v4df_v4di
21959 = build_function_type_list (V4DF_type_node,
21960 V4DF_type_node, V4DI_type_node,
21962 tree v4sf_ftype_v4sf_v4si
21963 = build_function_type_list (V4SF_type_node,
21964 V4SF_type_node, V4SI_type_node, NULL_TREE);
21965 tree v2df_ftype_v2df_v2di
21966 = build_function_type_list (V2DF_type_node,
21967 V2DF_type_node, V2DI_type_node, NULL_TREE);
21971 /* Add all special builtins with variable number of operands. */
21972 for (i = 0, d = bdesc_special_args;
21973 i < ARRAY_SIZE (bdesc_special_args);
21981 switch ((enum ix86_special_builtin_type) d->flag)
21983 case VOID_FTYPE_VOID:
21984 type = void_ftype_void;
21986 case V32QI_FTYPE_PCCHAR:
21987 type = v32qi_ftype_pcchar;
21989 case V16QI_FTYPE_PCCHAR:
21990 type = v16qi_ftype_pcchar;
21992 case V8SF_FTYPE_PCV4SF:
21993 type = v8sf_ftype_pcv4sf;
21995 case V8SF_FTYPE_PCFLOAT:
21996 type = v8sf_ftype_pcfloat;
21998 case V4DF_FTYPE_PCV2DF:
21999 type = v4df_ftype_pcv2df;
22001 case V4DF_FTYPE_PCDOUBLE:
22002 type = v4df_ftype_pcdouble;
22004 case V4SF_FTYPE_PCFLOAT:
22005 type = v4sf_ftype_pcfloat;
22007 case V2DI_FTYPE_PV2DI:
22008 type = v2di_ftype_pv2di;
22010 case V2DF_FTYPE_PCDOUBLE:
22011 type = v2df_ftype_pcdouble;
22013 case V8SF_FTYPE_PCV8SF_V8SF:
22014 type = v8sf_ftype_pcv8sf_v8sf;
22016 case V4DF_FTYPE_PCV4DF_V4DF:
22017 type = v4df_ftype_pcv4df_v4df;
22019 case V4SF_FTYPE_V4SF_PCV2SF:
22020 type = v4sf_ftype_v4sf_pcv2sf;
22022 case V4SF_FTYPE_PCV4SF_V4SF:
22023 type = v4sf_ftype_pcv4sf_v4sf;
22025 case V2DF_FTYPE_V2DF_PCDOUBLE:
22026 type = v2df_ftype_v2df_pcdouble;
22028 case V2DF_FTYPE_PCV2DF_V2DF:
22029 type = v2df_ftype_pcv2df_v2df;
22031 case VOID_FTYPE_PV2SF_V4SF:
22032 type = void_ftype_pv2sf_v4sf;
22034 case VOID_FTYPE_PV2DI_V2DI:
22035 type = void_ftype_pv2di_v2di;
22037 case VOID_FTYPE_PCHAR_V32QI:
22038 type = void_ftype_pchar_v32qi;
22040 case VOID_FTYPE_PCHAR_V16QI:
22041 type = void_ftype_pchar_v16qi;
22043 case VOID_FTYPE_PFLOAT_V8SF:
22044 type = void_ftype_pfloat_v8sf;
22046 case VOID_FTYPE_PFLOAT_V4SF:
22047 type = void_ftype_pfloat_v4sf;
22049 case VOID_FTYPE_PDOUBLE_V4DF:
22050 type = void_ftype_pdouble_v4df;
22052 case VOID_FTYPE_PDOUBLE_V2DF:
22053 type = void_ftype_pdouble_v2df;
22055 case VOID_FTYPE_PDI_DI:
22056 type = void_ftype_pdi_di;
22058 case VOID_FTYPE_PINT_INT:
22059 type = void_ftype_pint_int;
22061 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22062 type = void_ftype_pv8sf_v8sf_v8sf;
22064 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22065 type = void_ftype_pv4df_v4df_v4df;
22067 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22068 type = void_ftype_pv4sf_v4sf_v4sf;
22070 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22071 type = void_ftype_pv2df_v2df_v2df;
22074 gcc_unreachable ();
22077 def_builtin (d->mask, d->name, type, d->code);
22080 /* Add all builtins with variable number of operands. */
22081 for (i = 0, d = bdesc_args;
22082 i < ARRAY_SIZE (bdesc_args);
22090 switch ((enum ix86_builtin_type) d->flag)
22092 case FLOAT_FTYPE_FLOAT:
22093 type = float_ftype_float;
22095 case INT_FTYPE_V8SF_V8SF_PTEST:
22096 type = int_ftype_v8sf_v8sf;
22098 case INT_FTYPE_V4DI_V4DI_PTEST:
22099 type = int_ftype_v4di_v4di;
22101 case INT_FTYPE_V4DF_V4DF_PTEST:
22102 type = int_ftype_v4df_v4df;
22104 case INT_FTYPE_V4SF_V4SF_PTEST:
22105 type = int_ftype_v4sf_v4sf;
22107 case INT_FTYPE_V2DI_V2DI_PTEST:
22108 type = int_ftype_v2di_v2di;
22110 case INT_FTYPE_V2DF_V2DF_PTEST:
22111 type = int_ftype_v2df_v2df;
22113 case INT64_FTYPE_V4SF:
22114 type = int64_ftype_v4sf;
22116 case INT64_FTYPE_V2DF:
22117 type = int64_ftype_v2df;
22119 case INT_FTYPE_V16QI:
22120 type = int_ftype_v16qi;
22122 case INT_FTYPE_V8QI:
22123 type = int_ftype_v8qi;
22125 case INT_FTYPE_V8SF:
22126 type = int_ftype_v8sf;
22128 case INT_FTYPE_V4DF:
22129 type = int_ftype_v4df;
22131 case INT_FTYPE_V4SF:
22132 type = int_ftype_v4sf;
22134 case INT_FTYPE_V2DF:
22135 type = int_ftype_v2df;
22137 case V16QI_FTYPE_V16QI:
22138 type = v16qi_ftype_v16qi;
22140 case V8SI_FTYPE_V8SF:
22141 type = v8si_ftype_v8sf;
22143 case V8SI_FTYPE_V4SI:
22144 type = v8si_ftype_v4si;
22146 case V8HI_FTYPE_V8HI:
22147 type = v8hi_ftype_v8hi;
22149 case V8HI_FTYPE_V16QI:
22150 type = v8hi_ftype_v16qi;
22152 case V8QI_FTYPE_V8QI:
22153 type = v8qi_ftype_v8qi;
22155 case V8SF_FTYPE_V8SF:
22156 type = v8sf_ftype_v8sf;
22158 case V8SF_FTYPE_V8SI:
22159 type = v8sf_ftype_v8si;
22161 case V8SF_FTYPE_V4SF:
22162 type = v8sf_ftype_v4sf;
22164 case V4SI_FTYPE_V4DF:
22165 type = v4si_ftype_v4df;
22167 case V4SI_FTYPE_V4SI:
22168 type = v4si_ftype_v4si;
22170 case V4SI_FTYPE_V16QI:
22171 type = v4si_ftype_v16qi;
22173 case V4SI_FTYPE_V8SI:
22174 type = v4si_ftype_v8si;
22176 case V4SI_FTYPE_V8HI:
22177 type = v4si_ftype_v8hi;
22179 case V4SI_FTYPE_V4SF:
22180 type = v4si_ftype_v4sf;
22182 case V4SI_FTYPE_V2DF:
22183 type = v4si_ftype_v2df;
22185 case V4HI_FTYPE_V4HI:
22186 type = v4hi_ftype_v4hi;
22188 case V4DF_FTYPE_V4DF:
22189 type = v4df_ftype_v4df;
22191 case V4DF_FTYPE_V4SI:
22192 type = v4df_ftype_v4si;
22194 case V4DF_FTYPE_V4SF:
22195 type = v4df_ftype_v4sf;
22197 case V4DF_FTYPE_V2DF:
22198 type = v4df_ftype_v2df;
22200 case V4SF_FTYPE_V4SF:
22201 case V4SF_FTYPE_V4SF_VEC_MERGE:
22202 type = v4sf_ftype_v4sf;
22204 case V4SF_FTYPE_V8SF:
22205 type = v4sf_ftype_v8sf;
22207 case V4SF_FTYPE_V4SI:
22208 type = v4sf_ftype_v4si;
22210 case V4SF_FTYPE_V4DF:
22211 type = v4sf_ftype_v4df;
22213 case V4SF_FTYPE_V2DF:
22214 type = v4sf_ftype_v2df;
22216 case V2DI_FTYPE_V2DI:
22217 type = v2di_ftype_v2di;
22219 case V2DI_FTYPE_V16QI:
22220 type = v2di_ftype_v16qi;
22222 case V2DI_FTYPE_V8HI:
22223 type = v2di_ftype_v8hi;
22225 case V2DI_FTYPE_V4SI:
22226 type = v2di_ftype_v4si;
22228 case V2SI_FTYPE_V2SI:
22229 type = v2si_ftype_v2si;
22231 case V2SI_FTYPE_V4SF:
22232 type = v2si_ftype_v4sf;
22234 case V2SI_FTYPE_V2DF:
22235 type = v2si_ftype_v2df;
22237 case V2SI_FTYPE_V2SF:
22238 type = v2si_ftype_v2sf;
22240 case V2DF_FTYPE_V4DF:
22241 type = v2df_ftype_v4df;
22243 case V2DF_FTYPE_V4SF:
22244 type = v2df_ftype_v4sf;
22246 case V2DF_FTYPE_V2DF:
22247 case V2DF_FTYPE_V2DF_VEC_MERGE:
22248 type = v2df_ftype_v2df;
22250 case V2DF_FTYPE_V2SI:
22251 type = v2df_ftype_v2si;
22253 case V2DF_FTYPE_V4SI:
22254 type = v2df_ftype_v4si;
22256 case V2SF_FTYPE_V2SF:
22257 type = v2sf_ftype_v2sf;
22259 case V2SF_FTYPE_V2SI:
22260 type = v2sf_ftype_v2si;
22262 case V16QI_FTYPE_V16QI_V16QI:
22263 type = v16qi_ftype_v16qi_v16qi;
22265 case V16QI_FTYPE_V8HI_V8HI:
22266 type = v16qi_ftype_v8hi_v8hi;
22268 case V8QI_FTYPE_V8QI_V8QI:
22269 type = v8qi_ftype_v8qi_v8qi;
22271 case V8QI_FTYPE_V4HI_V4HI:
22272 type = v8qi_ftype_v4hi_v4hi;
22274 case V8HI_FTYPE_V8HI_V8HI:
22275 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22276 type = v8hi_ftype_v8hi_v8hi;
22278 case V8HI_FTYPE_V16QI_V16QI:
22279 type = v8hi_ftype_v16qi_v16qi;
22281 case V8HI_FTYPE_V4SI_V4SI:
22282 type = v8hi_ftype_v4si_v4si;
22284 case V8HI_FTYPE_V8HI_SI_COUNT:
22285 type = v8hi_ftype_v8hi_int;
22287 case V8SF_FTYPE_V8SF_V8SF:
22288 type = v8sf_ftype_v8sf_v8sf;
22290 case V8SF_FTYPE_V8SF_V8SI:
22291 type = v8sf_ftype_v8sf_v8si;
22293 case V4SI_FTYPE_V4SI_V4SI:
22294 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22295 type = v4si_ftype_v4si_v4si;
22297 case V4SI_FTYPE_V8HI_V8HI:
22298 type = v4si_ftype_v8hi_v8hi;
22300 case V4SI_FTYPE_V4SF_V4SF:
22301 type = v4si_ftype_v4sf_v4sf;
22303 case V4SI_FTYPE_V2DF_V2DF:
22304 type = v4si_ftype_v2df_v2df;
22306 case V4SI_FTYPE_V4SI_SI_COUNT:
22307 type = v4si_ftype_v4si_int;
22309 case V4HI_FTYPE_V4HI_V4HI:
22310 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22311 type = v4hi_ftype_v4hi_v4hi;
22313 case V4HI_FTYPE_V8QI_V8QI:
22314 type = v4hi_ftype_v8qi_v8qi;
22316 case V4HI_FTYPE_V2SI_V2SI:
22317 type = v4hi_ftype_v2si_v2si;
22319 case V4HI_FTYPE_V4HI_SI_COUNT:
22320 type = v4hi_ftype_v4hi_int;
22322 case V4DF_FTYPE_V4DF_V4DF:
22323 type = v4df_ftype_v4df_v4df;
22325 case V4DF_FTYPE_V4DF_V4DI:
22326 type = v4df_ftype_v4df_v4di;
22328 case V4SF_FTYPE_V4SF_V4SF:
22329 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22330 type = v4sf_ftype_v4sf_v4sf;
22332 case V4SF_FTYPE_V4SF_V4SI:
22333 type = v4sf_ftype_v4sf_v4si;
22335 case V4SF_FTYPE_V4SF_V2SI:
22336 type = v4sf_ftype_v4sf_v2si;
22338 case V4SF_FTYPE_V4SF_V2DF:
22339 type = v4sf_ftype_v4sf_v2df;
22341 case V4SF_FTYPE_V4SF_DI:
22342 type = v4sf_ftype_v4sf_int64;
22344 case V4SF_FTYPE_V4SF_SI:
22345 type = v4sf_ftype_v4sf_int;
22347 case V2DI_FTYPE_V2DI_V2DI:
22348 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22349 type = v2di_ftype_v2di_v2di;
22351 case V2DI_FTYPE_V16QI_V16QI:
22352 type = v2di_ftype_v16qi_v16qi;
22354 case V2DI_FTYPE_V4SI_V4SI:
22355 type = v2di_ftype_v4si_v4si;
22357 case V2DI_FTYPE_V2DI_V16QI:
22358 type = v2di_ftype_v2di_v16qi;
22360 case V2DI_FTYPE_V2DF_V2DF:
22361 type = v2di_ftype_v2df_v2df;
22363 case V2DI_FTYPE_V2DI_SI_COUNT:
22364 type = v2di_ftype_v2di_int;
22366 case V2SI_FTYPE_V2SI_V2SI:
22367 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22368 type = v2si_ftype_v2si_v2si;
22370 case V2SI_FTYPE_V4HI_V4HI:
22371 type = v2si_ftype_v4hi_v4hi;
22373 case V2SI_FTYPE_V2SF_V2SF:
22374 type = v2si_ftype_v2sf_v2sf;
22376 case V2SI_FTYPE_V2SI_SI_COUNT:
22377 type = v2si_ftype_v2si_int;
22379 case V2DF_FTYPE_V2DF_V2DF:
22380 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22381 type = v2df_ftype_v2df_v2df;
22383 case V2DF_FTYPE_V2DF_V4SF:
22384 type = v2df_ftype_v2df_v4sf;
22386 case V2DF_FTYPE_V2DF_V2DI:
22387 type = v2df_ftype_v2df_v2di;
22389 case V2DF_FTYPE_V2DF_DI:
22390 type = v2df_ftype_v2df_int64;
22392 case V2DF_FTYPE_V2DF_SI:
22393 type = v2df_ftype_v2df_int;
22395 case V2SF_FTYPE_V2SF_V2SF:
22396 type = v2sf_ftype_v2sf_v2sf;
22398 case V1DI_FTYPE_V1DI_V1DI:
22399 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22400 type = v1di_ftype_v1di_v1di;
22402 case V1DI_FTYPE_V8QI_V8QI:
22403 type = v1di_ftype_v8qi_v8qi;
22405 case V1DI_FTYPE_V2SI_V2SI:
22406 type = v1di_ftype_v2si_v2si;
22408 case V1DI_FTYPE_V1DI_SI_COUNT:
22409 type = v1di_ftype_v1di_int;
22411 case UINT64_FTYPE_UINT64_UINT64:
22412 type = uint64_ftype_uint64_uint64;
22414 case UINT_FTYPE_UINT_UINT:
22415 type = unsigned_ftype_unsigned_unsigned;
22417 case UINT_FTYPE_UINT_USHORT:
22418 type = unsigned_ftype_unsigned_ushort;
22420 case UINT_FTYPE_UINT_UCHAR:
22421 type = unsigned_ftype_unsigned_uchar;
22423 case V8HI_FTYPE_V8HI_INT:
22424 type = v8hi_ftype_v8hi_int;
22426 case V8SF_FTYPE_V8SF_INT:
22427 type = v8sf_ftype_v8sf_int;
22429 case V4SI_FTYPE_V4SI_INT:
22430 type = v4si_ftype_v4si_int;
22432 case V4SI_FTYPE_V8SI_INT:
22433 type = v4si_ftype_v8si_int;
22435 case V4HI_FTYPE_V4HI_INT:
22436 type = v4hi_ftype_v4hi_int;
22438 case V4DF_FTYPE_V4DF_INT:
22439 type = v4df_ftype_v4df_int;
22441 case V4SF_FTYPE_V4SF_INT:
22442 type = v4sf_ftype_v4sf_int;
22444 case V4SF_FTYPE_V8SF_INT:
22445 type = v4sf_ftype_v8sf_int;
22447 case V2DI_FTYPE_V2DI_INT:
22448 case V2DI2TI_FTYPE_V2DI_INT:
22449 type = v2di_ftype_v2di_int;
22451 case V2DF_FTYPE_V2DF_INT:
22452 type = v2df_ftype_v2df_int;
22454 case V2DF_FTYPE_V4DF_INT:
22455 type = v2df_ftype_v4df_int;
22457 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22458 type = v16qi_ftype_v16qi_v16qi_v16qi;
22460 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22461 type = v8sf_ftype_v8sf_v8sf_v8sf;
22463 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22464 type = v4df_ftype_v4df_v4df_v4df;
22466 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22467 type = v4sf_ftype_v4sf_v4sf_v4sf;
22469 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22470 type = v2df_ftype_v2df_v2df_v2df;
22472 case V16QI_FTYPE_V16QI_V16QI_INT:
22473 type = v16qi_ftype_v16qi_v16qi_int;
22475 case V8SI_FTYPE_V8SI_V8SI_INT:
22476 type = v8si_ftype_v8si_v8si_int;
22478 case V8SI_FTYPE_V8SI_V4SI_INT:
22479 type = v8si_ftype_v8si_v4si_int;
22481 case V8HI_FTYPE_V8HI_V8HI_INT:
22482 type = v8hi_ftype_v8hi_v8hi_int;
22484 case V8SF_FTYPE_V8SF_V8SF_INT:
22485 type = v8sf_ftype_v8sf_v8sf_int;
22487 case V8SF_FTYPE_V8SF_V4SF_INT:
22488 type = v8sf_ftype_v8sf_v4sf_int;
22490 case V4SI_FTYPE_V4SI_V4SI_INT:
22491 type = v4si_ftype_v4si_v4si_int;
22493 case V4DF_FTYPE_V4DF_V4DF_INT:
22494 type = v4df_ftype_v4df_v4df_int;
22496 case V4DF_FTYPE_V4DF_V2DF_INT:
22497 type = v4df_ftype_v4df_v2df_int;
22499 case V4SF_FTYPE_V4SF_V4SF_INT:
22500 type = v4sf_ftype_v4sf_v4sf_int;
22502 case V2DI_FTYPE_V2DI_V2DI_INT:
22503 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22504 type = v2di_ftype_v2di_v2di_int;
22506 case V2DF_FTYPE_V2DF_V2DF_INT:
22507 type = v2df_ftype_v2df_v2df_int;
22509 case V2DI_FTYPE_V2DI_UINT_UINT:
22510 type = v2di_ftype_v2di_unsigned_unsigned;
22512 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22513 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22515 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22516 type = v1di_ftype_v1di_v1di_int;
22518 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
22519 type = v8sf_ftype_v8sf_v8sf_v8si_int;
22521 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
22522 type = v4df_ftype_v4df_v4df_v4di_int;
22524 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
22525 type = v4sf_ftype_v4sf_v4sf_v4si_int;
22527 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
22528 type = v2df_ftype_v2df_v2df_v2di_int;
22531 gcc_unreachable ();
22534 def_builtin_const (d->mask, d->name, type, d->code);
22537 /* pcmpestr[im] insns. */
22538 for (i = 0, d = bdesc_pcmpestr;
22539 i < ARRAY_SIZE (bdesc_pcmpestr);
22542 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22543 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22545 ftype = int_ftype_v16qi_int_v16qi_int_int;
22546 def_builtin_const (d->mask, d->name, ftype, d->code);
22549 /* pcmpistr[im] insns. */
22550 for (i = 0, d = bdesc_pcmpistr;
22551 i < ARRAY_SIZE (bdesc_pcmpistr);
22554 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22555 ftype = v16qi_ftype_v16qi_v16qi_int;
22557 ftype = int_ftype_v16qi_v16qi_int;
22558 def_builtin_const (d->mask, d->name, ftype, d->code);
22561 /* comi/ucomi insns. */
22562 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22563 if (d->mask == OPTION_MASK_ISA_SSE2)
22564 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22566 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22569 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22570 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22572 /* SSE or 3DNow!A */
22573 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22576 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22578 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22579 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
22582 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
22583 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
22586 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
22587 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
22588 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
22589 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
22590 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
22591 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
22594 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
22597 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
22598 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
22600 /* Access to the vec_init patterns. */
22601 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
22602 integer_type_node, NULL_TREE);
22603 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
22605 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
22606 short_integer_type_node,
22607 short_integer_type_node,
22608 short_integer_type_node, NULL_TREE);
22609 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
22611 ftype = build_function_type_list (V8QI_type_node, char_type_node,
22612 char_type_node, char_type_node,
22613 char_type_node, char_type_node,
22614 char_type_node, char_type_node,
22615 char_type_node, NULL_TREE);
22616 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
22618 /* Access to the vec_extract patterns. */
22619 ftype = build_function_type_list (double_type_node, V2DF_type_node,
22620 integer_type_node, NULL_TREE);
22621 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
22623 ftype = build_function_type_list (long_long_integer_type_node,
22624 V2DI_type_node, integer_type_node,
22626 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
22628 ftype = build_function_type_list (float_type_node, V4SF_type_node,
22629 integer_type_node, NULL_TREE);
22630 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
22632 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
22633 integer_type_node, NULL_TREE);
22634 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
22636 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
22637 integer_type_node, NULL_TREE);
22638 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
22640 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
22641 integer_type_node, NULL_TREE);
22642 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
22644 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
22645 integer_type_node, NULL_TREE);
22646 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
22648 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
22649 integer_type_node, NULL_TREE);
22650 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
22652 /* Access to the vec_set patterns. */
22653 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
22655 integer_type_node, NULL_TREE);
22656 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
22658 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
22660 integer_type_node, NULL_TREE);
22661 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
22663 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
22665 integer_type_node, NULL_TREE);
22666 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
22668 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
22670 integer_type_node, NULL_TREE);
22671 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
22673 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
22675 integer_type_node, NULL_TREE);
22676 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
22678 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
22680 integer_type_node, NULL_TREE);
22681 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
22683 /* Add SSE5 multi-arg argument instructions */
22684 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22686 tree mtype = NULL_TREE;
22691 switch ((enum multi_arg_type)d->flag)
22693 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
22694 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
22695 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
22696 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
22697 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
22698 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
22699 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
22700 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
22701 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
22702 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
22703 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
22704 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
22705 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
22706 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
22707 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
22708 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
22709 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
22710 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
22711 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
22712 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
22713 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
22714 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
22715 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
22716 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
22717 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
22718 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
22719 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
22720 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
22721 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
22722 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
22723 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
22724 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
22725 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
22726 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
22727 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
22728 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
22729 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
22730 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
22731 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
22732 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
22733 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
22734 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
22735 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
22736 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
22737 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
22738 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
22739 case MULTI_ARG_UNKNOWN:
22741 gcc_unreachable ();
22745 def_builtin_const (d->mask, d->name, mtype, d->code);
22749 /* Internal method for ix86_init_builtins. */
22752 ix86_init_builtins_va_builtins_abi (void)
22754 tree ms_va_ref, sysv_va_ref;
22755 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22756 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22757 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22758 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22762 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22763 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22764 ms_va_ref = build_reference_type (ms_va_list_type_node);
22766 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22769 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22770 fnvoid_va_start_ms =
22771 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22772 fnvoid_va_end_sysv =
22773 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22774 fnvoid_va_start_sysv =
22775 build_varargs_function_type_list (void_type_node, sysv_va_ref,
22777 fnvoid_va_copy_ms =
22778 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22780 fnvoid_va_copy_sysv =
22781 build_function_type_list (void_type_node, sysv_va_ref,
22782 sysv_va_ref, NULL_TREE);
22784 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22785 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22786 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22787 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22788 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22789 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22790 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22791 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22792 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22793 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22794 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22795 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22799 ix86_init_builtins (void)
22801 tree float128_type_node = make_node (REAL_TYPE);
22804 /* The __float80 type. */
22805 if (TYPE_MODE (long_double_type_node) == XFmode)
22806 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
22810 /* The __float80 type. */
22811 tree float80_type_node = make_node (REAL_TYPE);
22813 TYPE_PRECISION (float80_type_node) = 80;
22814 layout_type (float80_type_node);
22815 (*lang_hooks.types.register_builtin_type) (float80_type_node,
22819 /* The __float128 type. */
22820 TYPE_PRECISION (float128_type_node) = 128;
22821 layout_type (float128_type_node);
22822 (*lang_hooks.types.register_builtin_type) (float128_type_node,
22825 /* TFmode support builtins. */
22826 ftype = build_function_type (float128_type_node, void_list_node);
22827 decl = add_builtin_function ("__builtin_infq", ftype,
22828 IX86_BUILTIN_INFQ, BUILT_IN_MD,
22830 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
22832 /* We will expand them to normal call if SSE2 isn't available since
22833 they are used by libgcc. */
22834 ftype = build_function_type_list (float128_type_node,
22835 float128_type_node,
22837 decl = add_builtin_function ("__builtin_fabsq", ftype,
22838 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
22839 "__fabstf2", NULL_TREE);
22840 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
22841 TREE_READONLY (decl) = 1;
22843 ftype = build_function_type_list (float128_type_node,
22844 float128_type_node,
22845 float128_type_node,
22847 decl = add_builtin_function ("__builtin_copysignq", ftype,
22848 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
22849 "__copysigntf3", NULL_TREE);
22850 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
22851 TREE_READONLY (decl) = 1;
22853 ix86_init_mmx_sse_builtins ();
22855 ix86_init_builtins_va_builtins_abi ();
22858 /* Errors in the source file can cause expand_expr to return const0_rtx
22859 where we expect a vector. To avoid crashing, use one of the vector
22860 clear instructions. */
22862 safe_vector_operand (rtx x, enum machine_mode mode)
22864 if (x == const0_rtx)
22865 x = CONST0_RTX (mode);
22869 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
22872 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22875 tree arg0 = CALL_EXPR_ARG (exp, 0);
22876 tree arg1 = CALL_EXPR_ARG (exp, 1);
22877 rtx op0 = expand_normal (arg0);
22878 rtx op1 = expand_normal (arg1);
22879 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22880 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22881 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22883 if (VECTOR_MODE_P (mode0))
22884 op0 = safe_vector_operand (op0, mode0);
22885 if (VECTOR_MODE_P (mode1))
22886 op1 = safe_vector_operand (op1, mode1);
22888 if (optimize || !target
22889 || GET_MODE (target) != tmode
22890 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22891 target = gen_reg_rtx (tmode);
22893 if (GET_MODE (op1) == SImode && mode1 == TImode)
22895 rtx x = gen_reg_rtx (V4SImode);
22896 emit_insn (gen_sse2_loadd (x, op1));
22897 op1 = gen_lowpart (TImode, x);
22900 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22901 op0 = copy_to_mode_reg (mode0, op0);
22902 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22903 op1 = copy_to_mode_reg (mode1, op1);
22905 pat = GEN_FCN (icode) (target, op0, op1);
22914 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
22917 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22918 enum multi_arg_type m_type,
22919 enum insn_code sub_code)
22924 bool comparison_p = false;
22926 bool last_arg_constant = false;
22927 int num_memory = 0;
22930 enum machine_mode mode;
22933 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22937 case MULTI_ARG_3_SF:
22938 case MULTI_ARG_3_DF:
22939 case MULTI_ARG_3_DI:
22940 case MULTI_ARG_3_SI:
22941 case MULTI_ARG_3_SI_DI:
22942 case MULTI_ARG_3_HI:
22943 case MULTI_ARG_3_HI_SI:
22944 case MULTI_ARG_3_QI:
22945 case MULTI_ARG_3_PERMPS:
22946 case MULTI_ARG_3_PERMPD:
22950 case MULTI_ARG_2_SF:
22951 case MULTI_ARG_2_DF:
22952 case MULTI_ARG_2_DI:
22953 case MULTI_ARG_2_SI:
22954 case MULTI_ARG_2_HI:
22955 case MULTI_ARG_2_QI:
22959 case MULTI_ARG_2_DI_IMM:
22960 case MULTI_ARG_2_SI_IMM:
22961 case MULTI_ARG_2_HI_IMM:
22962 case MULTI_ARG_2_QI_IMM:
22964 last_arg_constant = true;
22967 case MULTI_ARG_1_SF:
22968 case MULTI_ARG_1_DF:
22969 case MULTI_ARG_1_DI:
22970 case MULTI_ARG_1_SI:
22971 case MULTI_ARG_1_HI:
22972 case MULTI_ARG_1_QI:
22973 case MULTI_ARG_1_SI_DI:
22974 case MULTI_ARG_1_HI_DI:
22975 case MULTI_ARG_1_HI_SI:
22976 case MULTI_ARG_1_QI_DI:
22977 case MULTI_ARG_1_QI_SI:
22978 case MULTI_ARG_1_QI_HI:
22979 case MULTI_ARG_1_PH2PS:
22980 case MULTI_ARG_1_PS2PH:
22984 case MULTI_ARG_2_SF_CMP:
22985 case MULTI_ARG_2_DF_CMP:
22986 case MULTI_ARG_2_DI_CMP:
22987 case MULTI_ARG_2_SI_CMP:
22988 case MULTI_ARG_2_HI_CMP:
22989 case MULTI_ARG_2_QI_CMP:
22991 comparison_p = true;
22994 case MULTI_ARG_2_SF_TF:
22995 case MULTI_ARG_2_DF_TF:
22996 case MULTI_ARG_2_DI_TF:
22997 case MULTI_ARG_2_SI_TF:
22998 case MULTI_ARG_2_HI_TF:
22999 case MULTI_ARG_2_QI_TF:
23004 case MULTI_ARG_UNKNOWN:
23006 gcc_unreachable ();
23009 if (optimize || !target
23010 || GET_MODE (target) != tmode
23011 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23012 target = gen_reg_rtx (tmode);
23014 gcc_assert (nargs <= 4);
23016 for (i = 0; i < nargs; i++)
23018 tree arg = CALL_EXPR_ARG (exp, i);
23019 rtx op = expand_normal (arg);
23020 int adjust = (comparison_p) ? 1 : 0;
23021 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23023 if (last_arg_constant && i == nargs-1)
23025 if (GET_CODE (op) != CONST_INT)
23027 error ("last argument must be an immediate");
23028 return gen_reg_rtx (tmode);
23033 if (VECTOR_MODE_P (mode))
23034 op = safe_vector_operand (op, mode);
23036 /* If we aren't optimizing, only allow one memory operand to be
23038 if (memory_operand (op, mode))
23041 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23044 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23046 op = force_reg (mode, op);
23050 args[i].mode = mode;
23056 pat = GEN_FCN (icode) (target, args[0].op);
23061 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23062 GEN_INT ((int)sub_code));
23063 else if (! comparison_p)
23064 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23067 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23071 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23076 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23080 gcc_unreachable ();
23090 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23091 insns with vec_merge. */
23094 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23098 tree arg0 = CALL_EXPR_ARG (exp, 0);
23099 rtx op1, op0 = expand_normal (arg0);
23100 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23101 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23103 if (optimize || !target
23104 || GET_MODE (target) != tmode
23105 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23106 target = gen_reg_rtx (tmode);
23108 if (VECTOR_MODE_P (mode0))
23109 op0 = safe_vector_operand (op0, mode0);
23111 if ((optimize && !register_operand (op0, mode0))
23112 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23113 op0 = copy_to_mode_reg (mode0, op0);
23116 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23117 op1 = copy_to_mode_reg (mode0, op1);
23119 pat = GEN_FCN (icode) (target, op0, op1);
23126 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23129 ix86_expand_sse_compare (const struct builtin_description *d,
23130 tree exp, rtx target, bool swap)
23133 tree arg0 = CALL_EXPR_ARG (exp, 0);
23134 tree arg1 = CALL_EXPR_ARG (exp, 1);
23135 rtx op0 = expand_normal (arg0);
23136 rtx op1 = expand_normal (arg1);
23138 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23139 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23140 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23141 enum rtx_code comparison = d->comparison;
23143 if (VECTOR_MODE_P (mode0))
23144 op0 = safe_vector_operand (op0, mode0);
23145 if (VECTOR_MODE_P (mode1))
23146 op1 = safe_vector_operand (op1, mode1);
23148 /* Swap operands if we have a comparison that isn't available in
23152 rtx tmp = gen_reg_rtx (mode1);
23153 emit_move_insn (tmp, op1);
23158 if (optimize || !target
23159 || GET_MODE (target) != tmode
23160 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23161 target = gen_reg_rtx (tmode);
23163 if ((optimize && !register_operand (op0, mode0))
23164 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23165 op0 = copy_to_mode_reg (mode0, op0);
23166 if ((optimize && !register_operand (op1, mode1))
23167 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23168 op1 = copy_to_mode_reg (mode1, op1);
23170 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23171 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23178 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23181 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23185 tree arg0 = CALL_EXPR_ARG (exp, 0);
23186 tree arg1 = CALL_EXPR_ARG (exp, 1);
23187 rtx op0 = expand_normal (arg0);
23188 rtx op1 = expand_normal (arg1);
23189 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23190 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23191 enum rtx_code comparison = d->comparison;
23193 if (VECTOR_MODE_P (mode0))
23194 op0 = safe_vector_operand (op0, mode0);
23195 if (VECTOR_MODE_P (mode1))
23196 op1 = safe_vector_operand (op1, mode1);
23198 /* Swap operands if we have a comparison that isn't available in
23200 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23207 target = gen_reg_rtx (SImode);
23208 emit_move_insn (target, const0_rtx);
23209 target = gen_rtx_SUBREG (QImode, target, 0);
23211 if ((optimize && !register_operand (op0, mode0))
23212 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23213 op0 = copy_to_mode_reg (mode0, op0);
23214 if ((optimize && !register_operand (op1, mode1))
23215 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23216 op1 = copy_to_mode_reg (mode1, op1);
23218 pat = GEN_FCN (d->icode) (op0, op1);
23222 emit_insn (gen_rtx_SET (VOIDmode,
23223 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23224 gen_rtx_fmt_ee (comparison, QImode,
23228 return SUBREG_REG (target);
23231 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23234 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23238 tree arg0 = CALL_EXPR_ARG (exp, 0);
23239 tree arg1 = CALL_EXPR_ARG (exp, 1);
23240 rtx op0 = expand_normal (arg0);
23241 rtx op1 = expand_normal (arg1);
23242 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23243 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23244 enum rtx_code comparison = d->comparison;
23246 if (VECTOR_MODE_P (mode0))
23247 op0 = safe_vector_operand (op0, mode0);
23248 if (VECTOR_MODE_P (mode1))
23249 op1 = safe_vector_operand (op1, mode1);
23251 target = gen_reg_rtx (SImode);
23252 emit_move_insn (target, const0_rtx);
23253 target = gen_rtx_SUBREG (QImode, target, 0);
23255 if ((optimize && !register_operand (op0, mode0))
23256 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23257 op0 = copy_to_mode_reg (mode0, op0);
23258 if ((optimize && !register_operand (op1, mode1))
23259 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23260 op1 = copy_to_mode_reg (mode1, op1);
23262 pat = GEN_FCN (d->icode) (op0, op1);
23266 emit_insn (gen_rtx_SET (VOIDmode,
23267 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23268 gen_rtx_fmt_ee (comparison, QImode,
23272 return SUBREG_REG (target);
23275 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23278 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23279 tree exp, rtx target)
23282 tree arg0 = CALL_EXPR_ARG (exp, 0);
23283 tree arg1 = CALL_EXPR_ARG (exp, 1);
23284 tree arg2 = CALL_EXPR_ARG (exp, 2);
23285 tree arg3 = CALL_EXPR_ARG (exp, 3);
23286 tree arg4 = CALL_EXPR_ARG (exp, 4);
23287 rtx scratch0, scratch1;
23288 rtx op0 = expand_normal (arg0);
23289 rtx op1 = expand_normal (arg1);
23290 rtx op2 = expand_normal (arg2);
23291 rtx op3 = expand_normal (arg3);
23292 rtx op4 = expand_normal (arg4);
23293 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23295 tmode0 = insn_data[d->icode].operand[0].mode;
23296 tmode1 = insn_data[d->icode].operand[1].mode;
23297 modev2 = insn_data[d->icode].operand[2].mode;
23298 modei3 = insn_data[d->icode].operand[3].mode;
23299 modev4 = insn_data[d->icode].operand[4].mode;
23300 modei5 = insn_data[d->icode].operand[5].mode;
23301 modeimm = insn_data[d->icode].operand[6].mode;
23303 if (VECTOR_MODE_P (modev2))
23304 op0 = safe_vector_operand (op0, modev2);
23305 if (VECTOR_MODE_P (modev4))
23306 op2 = safe_vector_operand (op2, modev4);
23308 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23309 op0 = copy_to_mode_reg (modev2, op0);
23310 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23311 op1 = copy_to_mode_reg (modei3, op1);
23312 if ((optimize && !register_operand (op2, modev4))
23313 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23314 op2 = copy_to_mode_reg (modev4, op2);
23315 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23316 op3 = copy_to_mode_reg (modei5, op3);
23318 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23320 error ("the fifth argument must be a 8-bit immediate");
23324 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23326 if (optimize || !target
23327 || GET_MODE (target) != tmode0
23328 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23329 target = gen_reg_rtx (tmode0);
23331 scratch1 = gen_reg_rtx (tmode1);
23333 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23335 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23337 if (optimize || !target
23338 || GET_MODE (target) != tmode1
23339 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23340 target = gen_reg_rtx (tmode1);
23342 scratch0 = gen_reg_rtx (tmode0);
23344 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23348 gcc_assert (d->flag);
23350 scratch0 = gen_reg_rtx (tmode0);
23351 scratch1 = gen_reg_rtx (tmode1);
23353 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23363 target = gen_reg_rtx (SImode);
23364 emit_move_insn (target, const0_rtx);
23365 target = gen_rtx_SUBREG (QImode, target, 0);
23368 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23369 gen_rtx_fmt_ee (EQ, QImode,
23370 gen_rtx_REG ((enum machine_mode) d->flag,
23373 return SUBREG_REG (target);
23380 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23383 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23384 tree exp, rtx target)
23387 tree arg0 = CALL_EXPR_ARG (exp, 0);
23388 tree arg1 = CALL_EXPR_ARG (exp, 1);
23389 tree arg2 = CALL_EXPR_ARG (exp, 2);
23390 rtx scratch0, scratch1;
23391 rtx op0 = expand_normal (arg0);
23392 rtx op1 = expand_normal (arg1);
23393 rtx op2 = expand_normal (arg2);
23394 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23396 tmode0 = insn_data[d->icode].operand[0].mode;
23397 tmode1 = insn_data[d->icode].operand[1].mode;
23398 modev2 = insn_data[d->icode].operand[2].mode;
23399 modev3 = insn_data[d->icode].operand[3].mode;
23400 modeimm = insn_data[d->icode].operand[4].mode;
23402 if (VECTOR_MODE_P (modev2))
23403 op0 = safe_vector_operand (op0, modev2);
23404 if (VECTOR_MODE_P (modev3))
23405 op1 = safe_vector_operand (op1, modev3);
23407 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23408 op0 = copy_to_mode_reg (modev2, op0);
23409 if ((optimize && !register_operand (op1, modev3))
23410 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23411 op1 = copy_to_mode_reg (modev3, op1);
23413 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23415 error ("the third argument must be a 8-bit immediate");
23419 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23421 if (optimize || !target
23422 || GET_MODE (target) != tmode0
23423 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23424 target = gen_reg_rtx (tmode0);
23426 scratch1 = gen_reg_rtx (tmode1);
23428 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23430 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23432 if (optimize || !target
23433 || GET_MODE (target) != tmode1
23434 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23435 target = gen_reg_rtx (tmode1);
23437 scratch0 = gen_reg_rtx (tmode0);
23439 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23443 gcc_assert (d->flag);
23445 scratch0 = gen_reg_rtx (tmode0);
23446 scratch1 = gen_reg_rtx (tmode1);
23448 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23458 target = gen_reg_rtx (SImode);
23459 emit_move_insn (target, const0_rtx);
23460 target = gen_rtx_SUBREG (QImode, target, 0);
23463 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23464 gen_rtx_fmt_ee (EQ, QImode,
23465 gen_rtx_REG ((enum machine_mode) d->flag,
23468 return SUBREG_REG (target);
23474 /* Subroutine of ix86_expand_builtin to take care of insns with
23475 variable number of operands. */
23478 ix86_expand_args_builtin (const struct builtin_description *d,
23479 tree exp, rtx target)
23481 rtx pat, real_target;
23482 unsigned int i, nargs;
23483 unsigned int nargs_constant = 0;
23484 int num_memory = 0;
23488 enum machine_mode mode;
23490 bool last_arg_count = false;
23491 enum insn_code icode = d->icode;
23492 const struct insn_data *insn_p = &insn_data[icode];
23493 enum machine_mode tmode = insn_p->operand[0].mode;
23494 enum machine_mode rmode = VOIDmode;
23496 enum rtx_code comparison = d->comparison;
23498 switch ((enum ix86_builtin_type) d->flag)
23500 case INT_FTYPE_V8SF_V8SF_PTEST:
23501 case INT_FTYPE_V4DI_V4DI_PTEST:
23502 case INT_FTYPE_V4DF_V4DF_PTEST:
23503 case INT_FTYPE_V4SF_V4SF_PTEST:
23504 case INT_FTYPE_V2DI_V2DI_PTEST:
23505 case INT_FTYPE_V2DF_V2DF_PTEST:
23506 return ix86_expand_sse_ptest (d, exp, target);
23507 case FLOAT128_FTYPE_FLOAT128:
23508 case FLOAT_FTYPE_FLOAT:
23509 case INT64_FTYPE_V4SF:
23510 case INT64_FTYPE_V2DF:
23511 case INT_FTYPE_V16QI:
23512 case INT_FTYPE_V8QI:
23513 case INT_FTYPE_V8SF:
23514 case INT_FTYPE_V4DF:
23515 case INT_FTYPE_V4SF:
23516 case INT_FTYPE_V2DF:
23517 case V16QI_FTYPE_V16QI:
23518 case V8SI_FTYPE_V8SF:
23519 case V8SI_FTYPE_V4SI:
23520 case V8HI_FTYPE_V8HI:
23521 case V8HI_FTYPE_V16QI:
23522 case V8QI_FTYPE_V8QI:
23523 case V8SF_FTYPE_V8SF:
23524 case V8SF_FTYPE_V8SI:
23525 case V8SF_FTYPE_V4SF:
23526 case V4SI_FTYPE_V4SI:
23527 case V4SI_FTYPE_V16QI:
23528 case V4SI_FTYPE_V4SF:
23529 case V4SI_FTYPE_V8SI:
23530 case V4SI_FTYPE_V8HI:
23531 case V4SI_FTYPE_V4DF:
23532 case V4SI_FTYPE_V2DF:
23533 case V4HI_FTYPE_V4HI:
23534 case V4DF_FTYPE_V4DF:
23535 case V4DF_FTYPE_V4SI:
23536 case V4DF_FTYPE_V4SF:
23537 case V4DF_FTYPE_V2DF:
23538 case V4SF_FTYPE_V4SF:
23539 case V4SF_FTYPE_V4SI:
23540 case V4SF_FTYPE_V8SF:
23541 case V4SF_FTYPE_V4DF:
23542 case V4SF_FTYPE_V2DF:
23543 case V2DI_FTYPE_V2DI:
23544 case V2DI_FTYPE_V16QI:
23545 case V2DI_FTYPE_V8HI:
23546 case V2DI_FTYPE_V4SI:
23547 case V2DF_FTYPE_V2DF:
23548 case V2DF_FTYPE_V4SI:
23549 case V2DF_FTYPE_V4DF:
23550 case V2DF_FTYPE_V4SF:
23551 case V2DF_FTYPE_V2SI:
23552 case V2SI_FTYPE_V2SI:
23553 case V2SI_FTYPE_V4SF:
23554 case V2SI_FTYPE_V2SF:
23555 case V2SI_FTYPE_V2DF:
23556 case V2SF_FTYPE_V2SF:
23557 case V2SF_FTYPE_V2SI:
23560 case V4SF_FTYPE_V4SF_VEC_MERGE:
23561 case V2DF_FTYPE_V2DF_VEC_MERGE:
23562 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23563 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23564 case V16QI_FTYPE_V16QI_V16QI:
23565 case V16QI_FTYPE_V8HI_V8HI:
23566 case V8QI_FTYPE_V8QI_V8QI:
23567 case V8QI_FTYPE_V4HI_V4HI:
23568 case V8HI_FTYPE_V8HI_V8HI:
23569 case V8HI_FTYPE_V16QI_V16QI:
23570 case V8HI_FTYPE_V4SI_V4SI:
23571 case V8SF_FTYPE_V8SF_V8SF:
23572 case V8SF_FTYPE_V8SF_V8SI:
23573 case V4SI_FTYPE_V4SI_V4SI:
23574 case V4SI_FTYPE_V8HI_V8HI:
23575 case V4SI_FTYPE_V4SF_V4SF:
23576 case V4SI_FTYPE_V2DF_V2DF:
23577 case V4HI_FTYPE_V4HI_V4HI:
23578 case V4HI_FTYPE_V8QI_V8QI:
23579 case V4HI_FTYPE_V2SI_V2SI:
23580 case V4DF_FTYPE_V4DF_V4DF:
23581 case V4DF_FTYPE_V4DF_V4DI:
23582 case V4SF_FTYPE_V4SF_V4SF:
23583 case V4SF_FTYPE_V4SF_V4SI:
23584 case V4SF_FTYPE_V4SF_V2SI:
23585 case V4SF_FTYPE_V4SF_V2DF:
23586 case V4SF_FTYPE_V4SF_DI:
23587 case V4SF_FTYPE_V4SF_SI:
23588 case V2DI_FTYPE_V2DI_V2DI:
23589 case V2DI_FTYPE_V16QI_V16QI:
23590 case V2DI_FTYPE_V4SI_V4SI:
23591 case V2DI_FTYPE_V2DI_V16QI:
23592 case V2DI_FTYPE_V2DF_V2DF:
23593 case V2SI_FTYPE_V2SI_V2SI:
23594 case V2SI_FTYPE_V4HI_V4HI:
23595 case V2SI_FTYPE_V2SF_V2SF:
23596 case V2DF_FTYPE_V2DF_V2DF:
23597 case V2DF_FTYPE_V2DF_V4SF:
23598 case V2DF_FTYPE_V2DF_V2DI:
23599 case V2DF_FTYPE_V2DF_DI:
23600 case V2DF_FTYPE_V2DF_SI:
23601 case V2SF_FTYPE_V2SF_V2SF:
23602 case V1DI_FTYPE_V1DI_V1DI:
23603 case V1DI_FTYPE_V8QI_V8QI:
23604 case V1DI_FTYPE_V2SI_V2SI:
23605 if (comparison == UNKNOWN)
23606 return ix86_expand_binop_builtin (icode, exp, target);
23609 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23610 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23611 gcc_assert (comparison != UNKNOWN);
23615 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23616 case V8HI_FTYPE_V8HI_SI_COUNT:
23617 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23618 case V4SI_FTYPE_V4SI_SI_COUNT:
23619 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23620 case V4HI_FTYPE_V4HI_SI_COUNT:
23621 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23622 case V2DI_FTYPE_V2DI_SI_COUNT:
23623 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23624 case V2SI_FTYPE_V2SI_SI_COUNT:
23625 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23626 case V1DI_FTYPE_V1DI_SI_COUNT:
23628 last_arg_count = true;
23630 case UINT64_FTYPE_UINT64_UINT64:
23631 case UINT_FTYPE_UINT_UINT:
23632 case UINT_FTYPE_UINT_USHORT:
23633 case UINT_FTYPE_UINT_UCHAR:
23636 case V2DI2TI_FTYPE_V2DI_INT:
23639 nargs_constant = 1;
23641 case V8HI_FTYPE_V8HI_INT:
23642 case V8SF_FTYPE_V8SF_INT:
23643 case V4SI_FTYPE_V4SI_INT:
23644 case V4SI_FTYPE_V8SI_INT:
23645 case V4HI_FTYPE_V4HI_INT:
23646 case V4DF_FTYPE_V4DF_INT:
23647 case V4SF_FTYPE_V4SF_INT:
23648 case V4SF_FTYPE_V8SF_INT:
23649 case V2DI_FTYPE_V2DI_INT:
23650 case V2DF_FTYPE_V2DF_INT:
23651 case V2DF_FTYPE_V4DF_INT:
23653 nargs_constant = 1;
23655 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23656 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23657 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23658 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23659 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23662 case V16QI_FTYPE_V16QI_V16QI_INT:
23663 case V8HI_FTYPE_V8HI_V8HI_INT:
23664 case V8SI_FTYPE_V8SI_V8SI_INT:
23665 case V8SI_FTYPE_V8SI_V4SI_INT:
23666 case V8SF_FTYPE_V8SF_V8SF_INT:
23667 case V8SF_FTYPE_V8SF_V4SF_INT:
23668 case V4SI_FTYPE_V4SI_V4SI_INT:
23669 case V4DF_FTYPE_V4DF_V4DF_INT:
23670 case V4DF_FTYPE_V4DF_V2DF_INT:
23671 case V4SF_FTYPE_V4SF_V4SF_INT:
23672 case V2DI_FTYPE_V2DI_V2DI_INT:
23673 case V2DF_FTYPE_V2DF_V2DF_INT:
23675 nargs_constant = 1;
23677 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23680 nargs_constant = 1;
23682 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23685 nargs_constant = 1;
23687 case V2DI_FTYPE_V2DI_UINT_UINT:
23689 nargs_constant = 2;
23691 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
23692 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
23693 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
23694 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
23696 nargs_constant = 1;
23698 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23700 nargs_constant = 2;
23703 gcc_unreachable ();
23706 gcc_assert (nargs <= ARRAY_SIZE (args));
23708 if (comparison != UNKNOWN)
23710 gcc_assert (nargs == 2);
23711 return ix86_expand_sse_compare (d, exp, target, swap);
23714 if (rmode == VOIDmode || rmode == tmode)
23718 || GET_MODE (target) != tmode
23719 || ! (*insn_p->operand[0].predicate) (target, tmode))
23720 target = gen_reg_rtx (tmode);
23721 real_target = target;
23725 target = gen_reg_rtx (rmode);
23726 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23729 for (i = 0; i < nargs; i++)
23731 tree arg = CALL_EXPR_ARG (exp, i);
23732 rtx op = expand_normal (arg);
23733 enum machine_mode mode = insn_p->operand[i + 1].mode;
23734 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23736 if (last_arg_count && (i + 1) == nargs)
23738 /* SIMD shift insns take either an 8-bit immediate or
23739 register as count. But builtin functions take int as
23740 count. If count doesn't match, we put it in register. */
23743 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23744 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23745 op = copy_to_reg (op);
23748 else if ((nargs - i) <= nargs_constant)
23753 case CODE_FOR_sse4_1_roundpd:
23754 case CODE_FOR_sse4_1_roundps:
23755 case CODE_FOR_sse4_1_roundsd:
23756 case CODE_FOR_sse4_1_roundss:
23757 case CODE_FOR_sse4_1_blendps:
23758 case CODE_FOR_avx_blendpd256:
23759 case CODE_FOR_avx_vpermilv4df:
23760 case CODE_FOR_avx_roundpd256:
23761 case CODE_FOR_avx_roundps256:
23762 error ("the last argument must be a 4-bit immediate");
23765 case CODE_FOR_sse4_1_blendpd:
23766 case CODE_FOR_avx_vpermilv2df:
23767 case CODE_FOR_avx_vpermil2v2df3:
23768 case CODE_FOR_avx_vpermil2v4sf3:
23769 case CODE_FOR_avx_vpermil2v4df3:
23770 case CODE_FOR_avx_vpermil2v8sf3:
23771 error ("the last argument must be a 2-bit immediate");
23774 case CODE_FOR_avx_vextractf128v4df:
23775 case CODE_FOR_avx_vextractf128v8sf:
23776 case CODE_FOR_avx_vextractf128v8si:
23777 case CODE_FOR_avx_vinsertf128v4df:
23778 case CODE_FOR_avx_vinsertf128v8sf:
23779 case CODE_FOR_avx_vinsertf128v8si:
23780 error ("the last argument must be a 1-bit immediate");
23783 case CODE_FOR_avx_cmpsdv2df3:
23784 case CODE_FOR_avx_cmpssv4sf3:
23785 case CODE_FOR_avx_cmppdv2df3:
23786 case CODE_FOR_avx_cmppsv4sf3:
23787 case CODE_FOR_avx_cmppdv4df3:
23788 case CODE_FOR_avx_cmppsv8sf3:
23789 error ("the last argument must be a 5-bit immediate");
23793 switch (nargs_constant)
23796 if ((nargs - i) == nargs_constant)
23798 error ("the next to last argument must be an 8-bit immediate");
23802 error ("the last argument must be an 8-bit immediate");
23805 gcc_unreachable ();
23812 if (VECTOR_MODE_P (mode))
23813 op = safe_vector_operand (op, mode);
23815 /* If we aren't optimizing, only allow one memory operand to
23817 if (memory_operand (op, mode))
23820 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23822 if (optimize || !match || num_memory > 1)
23823 op = copy_to_mode_reg (mode, op);
23827 op = copy_to_reg (op);
23828 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23833 args[i].mode = mode;
23839 pat = GEN_FCN (icode) (real_target, args[0].op);
23842 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23845 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23849 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23850 args[2].op, args[3].op);
23853 gcc_unreachable ();
23863 /* Subroutine of ix86_expand_builtin to take care of special insns
23864 with variable number of operands. */
23867 ix86_expand_special_args_builtin (const struct builtin_description *d,
23868 tree exp, rtx target)
23872 unsigned int i, nargs, arg_adjust, memory;
23876 enum machine_mode mode;
23878 enum insn_code icode = d->icode;
23879 bool last_arg_constant = false;
23880 const struct insn_data *insn_p = &insn_data[icode];
23881 enum machine_mode tmode = insn_p->operand[0].mode;
23882 enum { load, store } klass;
23884 switch ((enum ix86_special_builtin_type) d->flag)
23886 case VOID_FTYPE_VOID:
23887 emit_insn (GEN_FCN (icode) (target));
23889 case V2DI_FTYPE_PV2DI:
23890 case V32QI_FTYPE_PCCHAR:
23891 case V16QI_FTYPE_PCCHAR:
23892 case V8SF_FTYPE_PCV4SF:
23893 case V8SF_FTYPE_PCFLOAT:
23894 case V4SF_FTYPE_PCFLOAT:
23895 case V4DF_FTYPE_PCV2DF:
23896 case V4DF_FTYPE_PCDOUBLE:
23897 case V2DF_FTYPE_PCDOUBLE:
23902 case VOID_FTYPE_PV2SF_V4SF:
23903 case VOID_FTYPE_PV2DI_V2DI:
23904 case VOID_FTYPE_PCHAR_V32QI:
23905 case VOID_FTYPE_PCHAR_V16QI:
23906 case VOID_FTYPE_PFLOAT_V8SF:
23907 case VOID_FTYPE_PFLOAT_V4SF:
23908 case VOID_FTYPE_PDOUBLE_V4DF:
23909 case VOID_FTYPE_PDOUBLE_V2DF:
23910 case VOID_FTYPE_PDI_DI:
23911 case VOID_FTYPE_PINT_INT:
23914 /* Reserve memory operand for target. */
23915 memory = ARRAY_SIZE (args);
23917 case V4SF_FTYPE_V4SF_PCV2SF:
23918 case V2DF_FTYPE_V2DF_PCDOUBLE:
23923 case V8SF_FTYPE_PCV8SF_V8SF:
23924 case V4DF_FTYPE_PCV4DF_V4DF:
23925 case V4SF_FTYPE_PCV4SF_V4SF:
23926 case V2DF_FTYPE_PCV2DF_V2DF:
23931 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23932 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23933 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23934 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23937 /* Reserve memory operand for target. */
23938 memory = ARRAY_SIZE (args);
23941 gcc_unreachable ();
23944 gcc_assert (nargs <= ARRAY_SIZE (args));
23946 if (klass == store)
23948 arg = CALL_EXPR_ARG (exp, 0);
23949 op = expand_normal (arg);
23950 gcc_assert (target == 0);
23951 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23959 || GET_MODE (target) != tmode
23960 || ! (*insn_p->operand[0].predicate) (target, tmode))
23961 target = gen_reg_rtx (tmode);
23964 for (i = 0; i < nargs; i++)
23966 enum machine_mode mode = insn_p->operand[i + 1].mode;
23969 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23970 op = expand_normal (arg);
23971 match = (*insn_p->operand[i + 1].predicate) (op, mode);
23973 if (last_arg_constant && (i + 1) == nargs)
23979 error ("the last argument must be an 8-bit immediate");
23987 /* This must be the memory operand. */
23988 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23989 gcc_assert (GET_MODE (op) == mode
23990 || GET_MODE (op) == VOIDmode);
23994 /* This must be register. */
23995 if (VECTOR_MODE_P (mode))
23996 op = safe_vector_operand (op, mode);
23998 gcc_assert (GET_MODE (op) == mode
23999 || GET_MODE (op) == VOIDmode);
24000 op = copy_to_mode_reg (mode, op);
24005 args[i].mode = mode;
24011 pat = GEN_FCN (icode) (target, args[0].op);
24014 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24017 gcc_unreachable ();
24023 return klass == store ? 0 : target;
24026 /* Return the integer constant in ARG. Constrain it to be in the range
24027 of the subparts of VEC_TYPE; issue an error if not. */
24030 get_element_number (tree vec_type, tree arg)
24032 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24034 if (!host_integerp (arg, 1)
24035 || (elt = tree_low_cst (arg, 1), elt > max))
24037 error ("selector must be an integer constant in the range 0..%wi", max);
24044 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24045 ix86_expand_vector_init. We DO have language-level syntax for this, in
24046 the form of (type){ init-list }. Except that since we can't place emms
24047 instructions from inside the compiler, we can't allow the use of MMX
24048 registers unless the user explicitly asks for it. So we do *not* define
24049 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24050 we have builtins invoked by mmintrin.h that gives us license to emit
24051 these sorts of instructions. */
24054 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24056 enum machine_mode tmode = TYPE_MODE (type);
24057 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24058 int i, n_elt = GET_MODE_NUNITS (tmode);
24059 rtvec v = rtvec_alloc (n_elt);
24061 gcc_assert (VECTOR_MODE_P (tmode));
24062 gcc_assert (call_expr_nargs (exp) == n_elt);
24064 for (i = 0; i < n_elt; ++i)
24066 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24067 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24070 if (!target || !register_operand (target, tmode))
24071 target = gen_reg_rtx (tmode);
24073 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24077 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24078 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24079 had a language-level syntax for referencing vector elements. */
24082 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24084 enum machine_mode tmode, mode0;
24089 arg0 = CALL_EXPR_ARG (exp, 0);
24090 arg1 = CALL_EXPR_ARG (exp, 1);
24092 op0 = expand_normal (arg0);
24093 elt = get_element_number (TREE_TYPE (arg0), arg1);
24095 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24096 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24097 gcc_assert (VECTOR_MODE_P (mode0));
24099 op0 = force_reg (mode0, op0);
24101 if (optimize || !target || !register_operand (target, tmode))
24102 target = gen_reg_rtx (tmode);
24104 ix86_expand_vector_extract (true, target, op0, elt);
24109 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24110 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24111 a language-level syntax for referencing vector elements. */
24114 ix86_expand_vec_set_builtin (tree exp)
24116 enum machine_mode tmode, mode1;
24117 tree arg0, arg1, arg2;
24119 rtx op0, op1, target;
24121 arg0 = CALL_EXPR_ARG (exp, 0);
24122 arg1 = CALL_EXPR_ARG (exp, 1);
24123 arg2 = CALL_EXPR_ARG (exp, 2);
24125 tmode = TYPE_MODE (TREE_TYPE (arg0));
24126 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24127 gcc_assert (VECTOR_MODE_P (tmode));
24129 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24130 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24131 elt = get_element_number (TREE_TYPE (arg0), arg2);
24133 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24134 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24136 op0 = force_reg (tmode, op0);
24137 op1 = force_reg (mode1, op1);
24139 /* OP0 is the source of these builtin functions and shouldn't be
24140 modified. Create a copy, use it and return it as target. */
24141 target = gen_reg_rtx (tmode);
24142 emit_move_insn (target, op0);
24143 ix86_expand_vector_set (true, target, op1, elt);
24148 /* Expand an expression EXP that calls a built-in function,
24149 with result going to TARGET if that's convenient
24150 (and in mode MODE if that's convenient).
24151 SUBTARGET may be used as the target for computing one of EXP's operands.
24152 IGNORE is nonzero if the value is to be ignored. */
24155 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24156 enum machine_mode mode ATTRIBUTE_UNUSED,
24157 int ignore ATTRIBUTE_UNUSED)
24159 const struct builtin_description *d;
24161 enum insn_code icode;
24162 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24163 tree arg0, arg1, arg2;
24164 rtx op0, op1, op2, pat;
24165 enum machine_mode mode0, mode1, mode2;
24166 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24168 /* Determine whether the builtin function is available under the current ISA.
24169 Originally the builtin was not created if it wasn't applicable to the
24170 current ISA based on the command line switches. With function specific
24171 options, we need to check in the context of the function making the call
24172 whether it is supported. */
24173 if (ix86_builtins_isa[fcode].isa
24174 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24176 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24177 NULL, NULL, false);
24180 error ("%qE needs unknown isa option", fndecl);
24183 gcc_assert (opts != NULL);
24184 error ("%qE needs isa option %s", fndecl, opts);
24192 case IX86_BUILTIN_MASKMOVQ:
24193 case IX86_BUILTIN_MASKMOVDQU:
24194 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24195 ? CODE_FOR_mmx_maskmovq
24196 : CODE_FOR_sse2_maskmovdqu);
24197 /* Note the arg order is different from the operand order. */
24198 arg1 = CALL_EXPR_ARG (exp, 0);
24199 arg2 = CALL_EXPR_ARG (exp, 1);
24200 arg0 = CALL_EXPR_ARG (exp, 2);
24201 op0 = expand_normal (arg0);
24202 op1 = expand_normal (arg1);
24203 op2 = expand_normal (arg2);
24204 mode0 = insn_data[icode].operand[0].mode;
24205 mode1 = insn_data[icode].operand[1].mode;
24206 mode2 = insn_data[icode].operand[2].mode;
24208 op0 = force_reg (Pmode, op0);
24209 op0 = gen_rtx_MEM (mode1, op0);
24211 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24212 op0 = copy_to_mode_reg (mode0, op0);
24213 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24214 op1 = copy_to_mode_reg (mode1, op1);
24215 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24216 op2 = copy_to_mode_reg (mode2, op2);
24217 pat = GEN_FCN (icode) (op0, op1, op2);
24223 case IX86_BUILTIN_LDMXCSR:
24224 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24225 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24226 emit_move_insn (target, op0);
24227 emit_insn (gen_sse_ldmxcsr (target));
24230 case IX86_BUILTIN_STMXCSR:
24231 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24232 emit_insn (gen_sse_stmxcsr (target));
24233 return copy_to_mode_reg (SImode, target);
24235 case IX86_BUILTIN_CLFLUSH:
24236 arg0 = CALL_EXPR_ARG (exp, 0);
24237 op0 = expand_normal (arg0);
24238 icode = CODE_FOR_sse2_clflush;
24239 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24240 op0 = copy_to_mode_reg (Pmode, op0);
24242 emit_insn (gen_sse2_clflush (op0));
24245 case IX86_BUILTIN_MONITOR:
24246 arg0 = CALL_EXPR_ARG (exp, 0);
24247 arg1 = CALL_EXPR_ARG (exp, 1);
24248 arg2 = CALL_EXPR_ARG (exp, 2);
24249 op0 = expand_normal (arg0);
24250 op1 = expand_normal (arg1);
24251 op2 = expand_normal (arg2);
24253 op0 = copy_to_mode_reg (Pmode, op0);
24255 op1 = copy_to_mode_reg (SImode, op1);
24257 op2 = copy_to_mode_reg (SImode, op2);
24258 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24261 case IX86_BUILTIN_MWAIT:
24262 arg0 = CALL_EXPR_ARG (exp, 0);
24263 arg1 = CALL_EXPR_ARG (exp, 1);
24264 op0 = expand_normal (arg0);
24265 op1 = expand_normal (arg1);
24267 op0 = copy_to_mode_reg (SImode, op0);
24269 op1 = copy_to_mode_reg (SImode, op1);
24270 emit_insn (gen_sse3_mwait (op0, op1));
24273 case IX86_BUILTIN_VEC_INIT_V2SI:
24274 case IX86_BUILTIN_VEC_INIT_V4HI:
24275 case IX86_BUILTIN_VEC_INIT_V8QI:
24276 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24278 case IX86_BUILTIN_VEC_EXT_V2DF:
24279 case IX86_BUILTIN_VEC_EXT_V2DI:
24280 case IX86_BUILTIN_VEC_EXT_V4SF:
24281 case IX86_BUILTIN_VEC_EXT_V4SI:
24282 case IX86_BUILTIN_VEC_EXT_V8HI:
24283 case IX86_BUILTIN_VEC_EXT_V2SI:
24284 case IX86_BUILTIN_VEC_EXT_V4HI:
24285 case IX86_BUILTIN_VEC_EXT_V16QI:
24286 return ix86_expand_vec_ext_builtin (exp, target);
24288 case IX86_BUILTIN_VEC_SET_V2DI:
24289 case IX86_BUILTIN_VEC_SET_V4SF:
24290 case IX86_BUILTIN_VEC_SET_V4SI:
24291 case IX86_BUILTIN_VEC_SET_V8HI:
24292 case IX86_BUILTIN_VEC_SET_V4HI:
24293 case IX86_BUILTIN_VEC_SET_V16QI:
24294 return ix86_expand_vec_set_builtin (exp);
24296 case IX86_BUILTIN_INFQ:
24298 REAL_VALUE_TYPE inf;
24302 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24304 tmp = validize_mem (force_const_mem (mode, tmp));
24307 target = gen_reg_rtx (mode);
24309 emit_move_insn (target, tmp);
24317 for (i = 0, d = bdesc_special_args;
24318 i < ARRAY_SIZE (bdesc_special_args);
24320 if (d->code == fcode)
24321 return ix86_expand_special_args_builtin (d, exp, target);
24323 for (i = 0, d = bdesc_args;
24324 i < ARRAY_SIZE (bdesc_args);
24326 if (d->code == fcode)
24329 case IX86_BUILTIN_FABSQ:
24330 case IX86_BUILTIN_COPYSIGNQ:
24332 /* Emit a normal call if SSE2 isn't available. */
24333 return expand_call (exp, target, ignore);
24335 return ix86_expand_args_builtin (d, exp, target);
24338 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24339 if (d->code == fcode)
24340 return ix86_expand_sse_comi (d, exp, target);
24342 for (i = 0, d = bdesc_pcmpestr;
24343 i < ARRAY_SIZE (bdesc_pcmpestr);
24345 if (d->code == fcode)
24346 return ix86_expand_sse_pcmpestr (d, exp, target);
24348 for (i = 0, d = bdesc_pcmpistr;
24349 i < ARRAY_SIZE (bdesc_pcmpistr);
24351 if (d->code == fcode)
24352 return ix86_expand_sse_pcmpistr (d, exp, target);
24354 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24355 if (d->code == fcode)
24356 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24357 (enum multi_arg_type)d->flag,
24360 gcc_unreachable ();
24363 /* Returns a function decl for a vectorized version of the builtin function
24364 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24365 if it is not available. */
24368 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24371 enum machine_mode in_mode, out_mode;
24374 if (TREE_CODE (type_out) != VECTOR_TYPE
24375 || TREE_CODE (type_in) != VECTOR_TYPE)
24378 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24379 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24380 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24381 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24385 case BUILT_IN_SQRT:
24386 if (out_mode == DFmode && out_n == 2
24387 && in_mode == DFmode && in_n == 2)
24388 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24391 case BUILT_IN_SQRTF:
24392 if (out_mode == SFmode && out_n == 4
24393 && in_mode == SFmode && in_n == 4)
24394 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24397 case BUILT_IN_LRINT:
24398 if (out_mode == SImode && out_n == 4
24399 && in_mode == DFmode && in_n == 2)
24400 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24403 case BUILT_IN_LRINTF:
24404 if (out_mode == SImode && out_n == 4
24405 && in_mode == SFmode && in_n == 4)
24406 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24413 /* Dispatch to a handler for a vectorization library. */
24414 if (ix86_veclib_handler)
24415 return (*ix86_veclib_handler)(fn, type_out, type_in);
24420 /* Handler for an SVML-style interface to
24421 a library with vectorized intrinsics. */
24424 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24427 tree fntype, new_fndecl, args;
24430 enum machine_mode el_mode, in_mode;
24433 /* The SVML is suitable for unsafe math only. */
24434 if (!flag_unsafe_math_optimizations)
24437 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24438 n = TYPE_VECTOR_SUBPARTS (type_out);
24439 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24440 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24441 if (el_mode != in_mode
24449 case BUILT_IN_LOG10:
24451 case BUILT_IN_TANH:
24453 case BUILT_IN_ATAN:
24454 case BUILT_IN_ATAN2:
24455 case BUILT_IN_ATANH:
24456 case BUILT_IN_CBRT:
24457 case BUILT_IN_SINH:
24459 case BUILT_IN_ASINH:
24460 case BUILT_IN_ASIN:
24461 case BUILT_IN_COSH:
24463 case BUILT_IN_ACOSH:
24464 case BUILT_IN_ACOS:
24465 if (el_mode != DFmode || n != 2)
24469 case BUILT_IN_EXPF:
24470 case BUILT_IN_LOGF:
24471 case BUILT_IN_LOG10F:
24472 case BUILT_IN_POWF:
24473 case BUILT_IN_TANHF:
24474 case BUILT_IN_TANF:
24475 case BUILT_IN_ATANF:
24476 case BUILT_IN_ATAN2F:
24477 case BUILT_IN_ATANHF:
24478 case BUILT_IN_CBRTF:
24479 case BUILT_IN_SINHF:
24480 case BUILT_IN_SINF:
24481 case BUILT_IN_ASINHF:
24482 case BUILT_IN_ASINF:
24483 case BUILT_IN_COSHF:
24484 case BUILT_IN_COSF:
24485 case BUILT_IN_ACOSHF:
24486 case BUILT_IN_ACOSF:
24487 if (el_mode != SFmode || n != 4)
24495 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24497 if (fn == BUILT_IN_LOGF)
24498 strcpy (name, "vmlsLn4");
24499 else if (fn == BUILT_IN_LOG)
24500 strcpy (name, "vmldLn2");
24503 sprintf (name, "vmls%s", bname+10);
24504 name[strlen (name)-1] = '4';
24507 sprintf (name, "vmld%s2", bname+10);
24509 /* Convert to uppercase. */
24513 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24514 args = TREE_CHAIN (args))
24518 fntype = build_function_type_list (type_out, type_in, NULL);
24520 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24522 /* Build a function declaration for the vectorized function. */
24523 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24524 TREE_PUBLIC (new_fndecl) = 1;
24525 DECL_EXTERNAL (new_fndecl) = 1;
24526 DECL_IS_NOVOPS (new_fndecl) = 1;
24527 TREE_READONLY (new_fndecl) = 1;
24532 /* Handler for an ACML-style interface to
24533 a library with vectorized intrinsics. */
24536 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24538 char name[20] = "__vr.._";
24539 tree fntype, new_fndecl, args;
24542 enum machine_mode el_mode, in_mode;
24545 /* The ACML is 64bits only and suitable for unsafe math only as
24546 it does not correctly support parts of IEEE with the required
24547 precision such as denormals. */
24549 || !flag_unsafe_math_optimizations)
24552 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24553 n = TYPE_VECTOR_SUBPARTS (type_out);
24554 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24555 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24556 if (el_mode != in_mode
24566 case BUILT_IN_LOG2:
24567 case BUILT_IN_LOG10:
24570 if (el_mode != DFmode
24575 case BUILT_IN_SINF:
24576 case BUILT_IN_COSF:
24577 case BUILT_IN_EXPF:
24578 case BUILT_IN_POWF:
24579 case BUILT_IN_LOGF:
24580 case BUILT_IN_LOG2F:
24581 case BUILT_IN_LOG10F:
24584 if (el_mode != SFmode
24593 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24594 sprintf (name + 7, "%s", bname+10);
24597 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24598 args = TREE_CHAIN (args))
24602 fntype = build_function_type_list (type_out, type_in, NULL);
24604 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24606 /* Build a function declaration for the vectorized function. */
24607 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24608 TREE_PUBLIC (new_fndecl) = 1;
24609 DECL_EXTERNAL (new_fndecl) = 1;
24610 DECL_IS_NOVOPS (new_fndecl) = 1;
24611 TREE_READONLY (new_fndecl) = 1;
24617 /* Returns a decl of a function that implements conversion of an integer vector
24618 into a floating-point vector, or vice-versa. TYPE is the type of the integer
24619 side of the conversion.
24620 Return NULL_TREE if it is not available. */
24623 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24625 if (TREE_CODE (type) != VECTOR_TYPE)
24631 switch (TYPE_MODE (type))
24634 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24639 case FIX_TRUNC_EXPR:
24640 switch (TYPE_MODE (type))
24643 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24653 /* Returns a code for a target-specific builtin that implements
24654 reciprocal of the function, or NULL_TREE if not available. */
24657 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24658 bool sqrt ATTRIBUTE_UNUSED)
24660 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
24661 && flag_finite_math_only && !flag_trapping_math
24662 && flag_unsafe_math_optimizations))
24666 /* Machine dependent builtins. */
24669 /* Vectorized version of sqrt to rsqrt conversion. */
24670 case IX86_BUILTIN_SQRTPS_NR:
24671 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24677 /* Normal builtins. */
24680 /* Sqrt to rsqrt conversion. */
24681 case BUILT_IN_SQRTF:
24682 return ix86_builtins[IX86_BUILTIN_RSQRTF];
24689 /* Store OPERAND to the memory after reload is completed. This means
24690 that we can't easily use assign_stack_local. */
24692 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24696 gcc_assert (reload_completed);
24697 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24699 result = gen_rtx_MEM (mode,
24700 gen_rtx_PLUS (Pmode,
24702 GEN_INT (-RED_ZONE_SIZE)));
24703 emit_move_insn (result, operand);
24705 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24711 operand = gen_lowpart (DImode, operand);
24715 gen_rtx_SET (VOIDmode,
24716 gen_rtx_MEM (DImode,
24717 gen_rtx_PRE_DEC (DImode,
24718 stack_pointer_rtx)),
24722 gcc_unreachable ();
24724 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24733 split_di (&operand, 1, operands, operands + 1);
24735 gen_rtx_SET (VOIDmode,
24736 gen_rtx_MEM (SImode,
24737 gen_rtx_PRE_DEC (Pmode,
24738 stack_pointer_rtx)),
24741 gen_rtx_SET (VOIDmode,
24742 gen_rtx_MEM (SImode,
24743 gen_rtx_PRE_DEC (Pmode,
24744 stack_pointer_rtx)),
24749 /* Store HImodes as SImodes. */
24750 operand = gen_lowpart (SImode, operand);
24754 gen_rtx_SET (VOIDmode,
24755 gen_rtx_MEM (GET_MODE (operand),
24756 gen_rtx_PRE_DEC (SImode,
24757 stack_pointer_rtx)),
24761 gcc_unreachable ();
24763 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24768 /* Free operand from the memory. */
24770 ix86_free_from_memory (enum machine_mode mode)
24772 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24776 if (mode == DImode || TARGET_64BIT)
24780 /* Use LEA to deallocate stack space. In peephole2 it will be converted
24781 to pop or add instruction if registers are available. */
24782 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24783 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24788 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24789 QImode must go into class Q_REGS.
24790 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
24791 movdf to do mem-to-mem moves through integer regs. */
24793 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24795 enum machine_mode mode = GET_MODE (x);
24797 /* We're only allowed to return a subclass of CLASS. Many of the
24798 following checks fail for NO_REGS, so eliminate that early. */
24799 if (regclass == NO_REGS)
24802 /* All classes can load zeros. */
24803 if (x == CONST0_RTX (mode))
24806 /* Force constants into memory if we are loading a (nonzero) constant into
24807 an MMX or SSE register. This is because there are no MMX/SSE instructions
24808 to load from a constant. */
24810 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24813 /* Prefer SSE regs only, if we can use them for math. */
24814 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24815 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24817 /* Floating-point constants need more complex checks. */
24818 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24820 /* General regs can load everything. */
24821 if (reg_class_subset_p (regclass, GENERAL_REGS))
24824 /* Floats can load 0 and 1 plus some others. Note that we eliminated
24825 zero above. We only want to wind up preferring 80387 registers if
24826 we plan on doing computation with them. */
24828 && standard_80387_constant_p (x))
24830 /* Limit class to non-sse. */
24831 if (regclass == FLOAT_SSE_REGS)
24833 if (regclass == FP_TOP_SSE_REGS)
24835 if (regclass == FP_SECOND_SSE_REGS)
24836 return FP_SECOND_REG;
24837 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
24844 /* Generally when we see PLUS here, it's the function invariant
24845 (plus soft-fp const_int). Which can only be computed into general
24847 if (GET_CODE (x) == PLUS)
24848 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
24850 /* QImode constants are easy to load, but non-constant QImode data
24851 must go into Q_REGS. */
24852 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
24854 if (reg_class_subset_p (regclass, Q_REGS))
24856 if (reg_class_subset_p (Q_REGS, regclass))
24864 /* Discourage putting floating-point values in SSE registers unless
24865 SSE math is being used, and likewise for the 387 registers. */
24867 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
24869 enum machine_mode mode = GET_MODE (x);
24871 /* Restrict the output reload class to the register bank that we are doing
24872 math on. If we would like not to return a subset of CLASS, reject this
24873 alternative: if reload cannot do this, it will still use its choice. */
24874 mode = GET_MODE (x);
24875 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24876 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
24878 if (X87_FLOAT_MODE_P (mode))
24880 if (regclass == FP_TOP_SSE_REGS)
24882 else if (regclass == FP_SECOND_SSE_REGS)
24883 return FP_SECOND_REG;
24885 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
24891 static enum reg_class
24892 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
24893 enum machine_mode mode,
24894 secondary_reload_info *sri ATTRIBUTE_UNUSED)
24896 /* QImode spills from non-QI registers require
24897 intermediate register on 32bit targets. */
24898 if (!in_p && mode == QImode && !TARGET_64BIT
24899 && (rclass == GENERAL_REGS
24900 || rclass == LEGACY_REGS
24901 || rclass == INDEX_REGS))
24910 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
24911 regno = true_regnum (x);
24913 /* Return Q_REGS if the operand is in memory. */
24921 /* If we are copying between general and FP registers, we need a memory
24922 location. The same is true for SSE and MMX registers.
24924 To optimize register_move_cost performance, allow inline variant.
24926 The macro can't work reliably when one of the CLASSES is class containing
24927 registers from multiple units (SSE, MMX, integer). We avoid this by never
24928 combining those units in single alternative in the machine description.
24929 Ensure that this constraint holds to avoid unexpected surprises.
24931 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
24932 enforce these sanity checks. */
24935 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24936 enum machine_mode mode, int strict)
24938 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
24939 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
24940 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
24941 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
24942 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
24943 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
24945 gcc_assert (!strict);
24949 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
24952 /* ??? This is a lie. We do have moves between mmx/general, and for
24953 mmx/sse2. But by saying we need secondary memory we discourage the
24954 register allocator from using the mmx registers unless needed. */
24955 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
24958 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
24960 /* SSE1 doesn't have any direct moves from other classes. */
24964 /* If the target says that inter-unit moves are more expensive
24965 than moving through memory, then don't generate them. */
24966 if (!TARGET_INTER_UNIT_MOVES)
24969 /* Between SSE and general, we have moves no larger than word size. */
24970 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
24978 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24979 enum machine_mode mode, int strict)
24981 return inline_secondary_memory_needed (class1, class2, mode, strict);
24984 /* Return true if the registers in CLASS cannot represent the change from
24985 modes FROM to TO. */
24988 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
24989 enum reg_class regclass)
24994 /* x87 registers can't do subreg at all, as all values are reformatted
24995 to extended precision. */
24996 if (MAYBE_FLOAT_CLASS_P (regclass))
24999 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25001 /* Vector registers do not support QI or HImode loads. If we don't
25002 disallow a change to these modes, reload will assume it's ok to
25003 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25004 the vec_dupv4hi pattern. */
25005 if (GET_MODE_SIZE (from) < 4)
25008 /* Vector registers do not support subreg with nonzero offsets, which
25009 are otherwise valid for integer registers. Since we can't see
25010 whether we have a nonzero offset from here, prohibit all
25011 nonparadoxical subregs changing size. */
25012 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25019 /* Return the cost of moving data of mode M between a
25020 register and memory. A value of 2 is the default; this cost is
25021 relative to those in `REGISTER_MOVE_COST'.
25023 This function is used extensively by register_move_cost that is used to
25024 build tables at startup. Make it inline in this case.
25025 When IN is 2, return maximum of in and out move cost.
25027 If moving between registers and memory is more expensive than
25028 between two registers, you should define this macro to express the
25031 Model also increased moving costs of QImode registers in non
25035 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25039 if (FLOAT_CLASS_P (regclass))
25057 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25058 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25060 if (SSE_CLASS_P (regclass))
25063 switch (GET_MODE_SIZE (mode))
25078 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25079 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25081 if (MMX_CLASS_P (regclass))
25084 switch (GET_MODE_SIZE (mode))
25096 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25097 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25099 switch (GET_MODE_SIZE (mode))
25102 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25105 return ix86_cost->int_store[0];
25106 if (TARGET_PARTIAL_REG_DEPENDENCY
25107 && optimize_function_for_speed_p (cfun))
25108 cost = ix86_cost->movzbl_load;
25110 cost = ix86_cost->int_load[0];
25112 return MAX (cost, ix86_cost->int_store[0]);
25118 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25120 return ix86_cost->movzbl_load;
25122 return ix86_cost->int_store[0] + 4;
25127 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25128 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25130 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25131 if (mode == TFmode)
25134 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25136 cost = ix86_cost->int_load[2];
25138 cost = ix86_cost->int_store[2];
25139 return (cost * (((int) GET_MODE_SIZE (mode)
25140 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25145 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25147 return inline_memory_move_cost (mode, regclass, in);
25151 /* Return the cost of moving data from a register in class CLASS1 to
25152 one in class CLASS2.
25154 It is not required that the cost always equal 2 when FROM is the same as TO;
25155 on some machines it is expensive to move between registers if they are not
25156 general registers. */
25159 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25160 enum reg_class class2)
25162 /* In case we require secondary memory, compute cost of the store followed
25163 by load. In order to avoid bad register allocation choices, we need
25164 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25166 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25170 cost += inline_memory_move_cost (mode, class1, 2);
25171 cost += inline_memory_move_cost (mode, class2, 2);
25173 /* In case of copying from general_purpose_register we may emit multiple
25174 stores followed by single load causing memory size mismatch stall.
25175 Count this as arbitrarily high cost of 20. */
25176 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25179 /* In the case of FP/MMX moves, the registers actually overlap, and we
25180 have to switch modes in order to treat them differently. */
25181 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25182 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25188 /* Moves between SSE/MMX and integer unit are expensive. */
25189 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25190 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25192 /* ??? By keeping returned value relatively high, we limit the number
25193 of moves between integer and MMX/SSE registers for all targets.
25194 Additionally, high value prevents problem with x86_modes_tieable_p(),
25195 where integer modes in MMX/SSE registers are not tieable
25196 because of missing QImode and HImode moves to, from or between
25197 MMX/SSE registers. */
25198 return MAX (8, ix86_cost->mmxsse_to_integer);
25200 if (MAYBE_FLOAT_CLASS_P (class1))
25201 return ix86_cost->fp_move;
25202 if (MAYBE_SSE_CLASS_P (class1))
25203 return ix86_cost->sse_move;
25204 if (MAYBE_MMX_CLASS_P (class1))
25205 return ix86_cost->mmx_move;
25209 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25212 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25214 /* Flags and only flags can only hold CCmode values. */
25215 if (CC_REGNO_P (regno))
25216 return GET_MODE_CLASS (mode) == MODE_CC;
25217 if (GET_MODE_CLASS (mode) == MODE_CC
25218 || GET_MODE_CLASS (mode) == MODE_RANDOM
25219 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25221 if (FP_REGNO_P (regno))
25222 return VALID_FP_MODE_P (mode);
25223 if (SSE_REGNO_P (regno))
25225 /* We implement the move patterns for all vector modes into and
25226 out of SSE registers, even when no operation instructions
25227 are available. OImode move is available only when AVX is
25229 return ((TARGET_AVX && mode == OImode)
25230 || VALID_AVX256_REG_MODE (mode)
25231 || VALID_SSE_REG_MODE (mode)
25232 || VALID_SSE2_REG_MODE (mode)
25233 || VALID_MMX_REG_MODE (mode)
25234 || VALID_MMX_REG_MODE_3DNOW (mode));
25236 if (MMX_REGNO_P (regno))
25238 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25239 so if the register is available at all, then we can move data of
25240 the given mode into or out of it. */
25241 return (VALID_MMX_REG_MODE (mode)
25242 || VALID_MMX_REG_MODE_3DNOW (mode));
25245 if (mode == QImode)
25247 /* Take care for QImode values - they can be in non-QI regs,
25248 but then they do cause partial register stalls. */
25249 if (regno < 4 || TARGET_64BIT)
25251 if (!TARGET_PARTIAL_REG_STALL)
25253 return reload_in_progress || reload_completed;
25255 /* We handle both integer and floats in the general purpose registers. */
25256 else if (VALID_INT_MODE_P (mode))
25258 else if (VALID_FP_MODE_P (mode))
25260 else if (VALID_DFP_MODE_P (mode))
25262 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25263 on to use that value in smaller contexts, this can easily force a
25264 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25265 supporting DImode, allow it. */
25266 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25272 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25273 tieable integer mode. */
25276 ix86_tieable_integer_mode_p (enum machine_mode mode)
25285 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25288 return TARGET_64BIT;
25295 /* Return true if MODE1 is accessible in a register that can hold MODE2
25296 without copying. That is, all register classes that can hold MODE2
25297 can also hold MODE1. */
25300 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25302 if (mode1 == mode2)
25305 if (ix86_tieable_integer_mode_p (mode1)
25306 && ix86_tieable_integer_mode_p (mode2))
25309 /* MODE2 being XFmode implies fp stack or general regs, which means we
25310 can tie any smaller floating point modes to it. Note that we do not
25311 tie this with TFmode. */
25312 if (mode2 == XFmode)
25313 return mode1 == SFmode || mode1 == DFmode;
25315 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25316 that we can tie it with SFmode. */
25317 if (mode2 == DFmode)
25318 return mode1 == SFmode;
25320 /* If MODE2 is only appropriate for an SSE register, then tie with
25321 any other mode acceptable to SSE registers. */
25322 if (GET_MODE_SIZE (mode2) == 16
25323 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25324 return (GET_MODE_SIZE (mode1) == 16
25325 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25327 /* If MODE2 is appropriate for an MMX register, then tie
25328 with any other mode acceptable to MMX registers. */
25329 if (GET_MODE_SIZE (mode2) == 8
25330 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25331 return (GET_MODE_SIZE (mode1) == 8
25332 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25337 /* Compute a (partial) cost for rtx X. Return true if the complete
25338 cost has been computed, and false if subexpressions should be
25339 scanned. In either case, *TOTAL contains the cost result. */
25342 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25344 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25345 enum machine_mode mode = GET_MODE (x);
25346 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25354 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25356 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25358 else if (flag_pic && SYMBOLIC_CONST (x)
25360 || (!GET_CODE (x) != LABEL_REF
25361 && (GET_CODE (x) != SYMBOL_REF
25362 || !SYMBOL_REF_LOCAL_P (x)))))
25369 if (mode == VOIDmode)
25372 switch (standard_80387_constant_p (x))
25377 default: /* Other constants */
25382 /* Start with (MEM (SYMBOL_REF)), since that's where
25383 it'll probably end up. Add a penalty for size. */
25384 *total = (COSTS_N_INSNS (1)
25385 + (flag_pic != 0 && !TARGET_64BIT)
25386 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25392 /* The zero extensions is often completely free on x86_64, so make
25393 it as cheap as possible. */
25394 if (TARGET_64BIT && mode == DImode
25395 && GET_MODE (XEXP (x, 0)) == SImode)
25397 else if (TARGET_ZERO_EXTEND_WITH_AND)
25398 *total = cost->add;
25400 *total = cost->movzx;
25404 *total = cost->movsx;
25408 if (CONST_INT_P (XEXP (x, 1))
25409 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25411 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25414 *total = cost->add;
25417 if ((value == 2 || value == 3)
25418 && cost->lea <= cost->shift_const)
25420 *total = cost->lea;
25430 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25432 if (CONST_INT_P (XEXP (x, 1)))
25434 if (INTVAL (XEXP (x, 1)) > 32)
25435 *total = cost->shift_const + COSTS_N_INSNS (2);
25437 *total = cost->shift_const * 2;
25441 if (GET_CODE (XEXP (x, 1)) == AND)
25442 *total = cost->shift_var * 2;
25444 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25449 if (CONST_INT_P (XEXP (x, 1)))
25450 *total = cost->shift_const;
25452 *total = cost->shift_var;
25457 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25459 /* ??? SSE scalar cost should be used here. */
25460 *total = cost->fmul;
25463 else if (X87_FLOAT_MODE_P (mode))
25465 *total = cost->fmul;
25468 else if (FLOAT_MODE_P (mode))
25470 /* ??? SSE vector cost should be used here. */
25471 *total = cost->fmul;
25476 rtx op0 = XEXP (x, 0);
25477 rtx op1 = XEXP (x, 1);
25479 if (CONST_INT_P (XEXP (x, 1)))
25481 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25482 for (nbits = 0; value != 0; value &= value - 1)
25486 /* This is arbitrary. */
25489 /* Compute costs correctly for widening multiplication. */
25490 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25491 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25492 == GET_MODE_SIZE (mode))
25494 int is_mulwiden = 0;
25495 enum machine_mode inner_mode = GET_MODE (op0);
25497 if (GET_CODE (op0) == GET_CODE (op1))
25498 is_mulwiden = 1, op1 = XEXP (op1, 0);
25499 else if (CONST_INT_P (op1))
25501 if (GET_CODE (op0) == SIGN_EXTEND)
25502 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25505 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25509 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25512 *total = (cost->mult_init[MODE_INDEX (mode)]
25513 + nbits * cost->mult_bit
25514 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25523 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25524 /* ??? SSE cost should be used here. */
25525 *total = cost->fdiv;
25526 else if (X87_FLOAT_MODE_P (mode))
25527 *total = cost->fdiv;
25528 else if (FLOAT_MODE_P (mode))
25529 /* ??? SSE vector cost should be used here. */
25530 *total = cost->fdiv;
25532 *total = cost->divide[MODE_INDEX (mode)];
25536 if (GET_MODE_CLASS (mode) == MODE_INT
25537 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25539 if (GET_CODE (XEXP (x, 0)) == PLUS
25540 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25541 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25542 && CONSTANT_P (XEXP (x, 1)))
25544 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25545 if (val == 2 || val == 4 || val == 8)
25547 *total = cost->lea;
25548 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25549 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25550 outer_code, speed);
25551 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25555 else if (GET_CODE (XEXP (x, 0)) == MULT
25556 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25558 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25559 if (val == 2 || val == 4 || val == 8)
25561 *total = cost->lea;
25562 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25563 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25567 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25569 *total = cost->lea;
25570 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25571 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25572 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25579 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25581 /* ??? SSE cost should be used here. */
25582 *total = cost->fadd;
25585 else if (X87_FLOAT_MODE_P (mode))
25587 *total = cost->fadd;
25590 else if (FLOAT_MODE_P (mode))
25592 /* ??? SSE vector cost should be used here. */
25593 *total = cost->fadd;
25601 if (!TARGET_64BIT && mode == DImode)
25603 *total = (cost->add * 2
25604 + (rtx_cost (XEXP (x, 0), outer_code, speed)
25605 << (GET_MODE (XEXP (x, 0)) != DImode))
25606 + (rtx_cost (XEXP (x, 1), outer_code, speed)
25607 << (GET_MODE (XEXP (x, 1)) != DImode)));
25613 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25615 /* ??? SSE cost should be used here. */
25616 *total = cost->fchs;
25619 else if (X87_FLOAT_MODE_P (mode))
25621 *total = cost->fchs;
25624 else if (FLOAT_MODE_P (mode))
25626 /* ??? SSE vector cost should be used here. */
25627 *total = cost->fchs;
25633 if (!TARGET_64BIT && mode == DImode)
25634 *total = cost->add * 2;
25636 *total = cost->add;
25640 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25641 && XEXP (XEXP (x, 0), 1) == const1_rtx
25642 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25643 && XEXP (x, 1) == const0_rtx)
25645 /* This kind of construct is implemented using test[bwl].
25646 Treat it as if we had an AND. */
25647 *total = (cost->add
25648 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
25649 + rtx_cost (const1_rtx, outer_code, speed));
25655 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25660 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25661 /* ??? SSE cost should be used here. */
25662 *total = cost->fabs;
25663 else if (X87_FLOAT_MODE_P (mode))
25664 *total = cost->fabs;
25665 else if (FLOAT_MODE_P (mode))
25666 /* ??? SSE vector cost should be used here. */
25667 *total = cost->fabs;
25671 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25672 /* ??? SSE cost should be used here. */
25673 *total = cost->fsqrt;
25674 else if (X87_FLOAT_MODE_P (mode))
25675 *total = cost->fsqrt;
25676 else if (FLOAT_MODE_P (mode))
25677 /* ??? SSE vector cost should be used here. */
25678 *total = cost->fsqrt;
25682 if (XINT (x, 1) == UNSPEC_TP)
25693 static int current_machopic_label_num;
25695 /* Given a symbol name and its associated stub, write out the
25696 definition of the stub. */
25699 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25701 unsigned int length;
25702 char *binder_name, *symbol_name, lazy_ptr_name[32];
25703 int label = ++current_machopic_label_num;
25705 /* For 64-bit we shouldn't get here. */
25706 gcc_assert (!TARGET_64BIT);
25708 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
25709 symb = (*targetm.strip_name_encoding) (symb);
25711 length = strlen (stub);
25712 binder_name = XALLOCAVEC (char, length + 32);
25713 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25715 length = strlen (symb);
25716 symbol_name = XALLOCAVEC (char, length + 32);
25717 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25719 sprintf (lazy_ptr_name, "L%d$lz", label);
25722 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25724 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25726 fprintf (file, "%s:\n", stub);
25727 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25731 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25732 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25733 fprintf (file, "\tjmp\t*%%edx\n");
25736 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25738 fprintf (file, "%s:\n", binder_name);
25742 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25743 fprintf (file, "\tpushl\t%%eax\n");
25746 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25748 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
25750 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25751 fprintf (file, "%s:\n", lazy_ptr_name);
25752 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25753 fprintf (file, "\t.long %s\n", binder_name);
25757 darwin_x86_file_end (void)
25759 darwin_file_end ();
25762 #endif /* TARGET_MACHO */
25764 /* Order the registers for register allocator. */
25767 x86_order_regs_for_local_alloc (void)
25772 /* First allocate the local general purpose registers. */
25773 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25774 if (GENERAL_REGNO_P (i) && call_used_regs[i])
25775 reg_alloc_order [pos++] = i;
25777 /* Global general purpose registers. */
25778 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25779 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25780 reg_alloc_order [pos++] = i;
25782 /* x87 registers come first in case we are doing FP math
25784 if (!TARGET_SSE_MATH)
25785 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25786 reg_alloc_order [pos++] = i;
25788 /* SSE registers. */
25789 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25790 reg_alloc_order [pos++] = i;
25791 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25792 reg_alloc_order [pos++] = i;
25794 /* x87 registers. */
25795 if (TARGET_SSE_MATH)
25796 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25797 reg_alloc_order [pos++] = i;
25799 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25800 reg_alloc_order [pos++] = i;
25802 /* Initialize the rest of array as we do not allocate some registers
25804 while (pos < FIRST_PSEUDO_REGISTER)
25805 reg_alloc_order [pos++] = 0;
25808 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25809 struct attribute_spec.handler. */
25811 ix86_handle_abi_attribute (tree *node, tree name,
25812 tree args ATTRIBUTE_UNUSED,
25813 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25815 if (TREE_CODE (*node) != FUNCTION_TYPE
25816 && TREE_CODE (*node) != METHOD_TYPE
25817 && TREE_CODE (*node) != FIELD_DECL
25818 && TREE_CODE (*node) != TYPE_DECL)
25820 warning (OPT_Wattributes, "%qs attribute only applies to functions",
25821 IDENTIFIER_POINTER (name));
25822 *no_add_attrs = true;
25827 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
25828 IDENTIFIER_POINTER (name));
25829 *no_add_attrs = true;
25833 /* Can combine regparm with all attributes but fastcall. */
25834 if (is_attribute_p ("ms_abi", name))
25836 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
25838 error ("ms_abi and sysv_abi attributes are not compatible");
25843 else if (is_attribute_p ("sysv_abi", name))
25845 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
25847 error ("ms_abi and sysv_abi attributes are not compatible");
25856 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
25857 struct attribute_spec.handler. */
25859 ix86_handle_struct_attribute (tree *node, tree name,
25860 tree args ATTRIBUTE_UNUSED,
25861 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25864 if (DECL_P (*node))
25866 if (TREE_CODE (*node) == TYPE_DECL)
25867 type = &TREE_TYPE (*node);
25872 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
25873 || TREE_CODE (*type) == UNION_TYPE)))
25875 warning (OPT_Wattributes, "%qs attribute ignored",
25876 IDENTIFIER_POINTER (name));
25877 *no_add_attrs = true;
25880 else if ((is_attribute_p ("ms_struct", name)
25881 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
25882 || ((is_attribute_p ("gcc_struct", name)
25883 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
25885 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
25886 IDENTIFIER_POINTER (name));
25887 *no_add_attrs = true;
25894 ix86_ms_bitfield_layout_p (const_tree record_type)
25896 return (TARGET_MS_BITFIELD_LAYOUT &&
25897 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
25898 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
25901 /* Returns an expression indicating where the this parameter is
25902 located on entry to the FUNCTION. */
25905 x86_this_parameter (tree function)
25907 tree type = TREE_TYPE (function);
25908 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
25913 const int *parm_regs;
25915 if (ix86_function_type_abi (type) == MS_ABI)
25916 parm_regs = x86_64_ms_abi_int_parameter_registers;
25918 parm_regs = x86_64_int_parameter_registers;
25919 return gen_rtx_REG (DImode, parm_regs[aggr]);
25922 nregs = ix86_function_regparm (type, function);
25924 if (nregs > 0 && !stdarg_p (type))
25928 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
25929 regno = aggr ? DX_REG : CX_REG;
25937 return gen_rtx_MEM (SImode,
25938 plus_constant (stack_pointer_rtx, 4));
25941 return gen_rtx_REG (SImode, regno);
25944 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
25947 /* Determine whether x86_output_mi_thunk can succeed. */
25950 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
25951 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
25952 HOST_WIDE_INT vcall_offset, const_tree function)
25954 /* 64-bit can handle anything. */
25958 /* For 32-bit, everything's fine if we have one free register. */
25959 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
25962 /* Need a free register for vcall_offset. */
25966 /* Need a free register for GOT references. */
25967 if (flag_pic && !(*targetm.binds_local_p) (function))
25970 /* Otherwise ok. */
25974 /* Output the assembler code for a thunk function. THUNK_DECL is the
25975 declaration for the thunk function itself, FUNCTION is the decl for
25976 the target function. DELTA is an immediate constant offset to be
25977 added to THIS. If VCALL_OFFSET is nonzero, the word at
25978 *(*this + vcall_offset) should be added to THIS. */
25981 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
25982 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
25983 HOST_WIDE_INT vcall_offset, tree function)
25986 rtx this_param = x86_this_parameter (function);
25989 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
25990 pull it in now and let DELTA benefit. */
25991 if (REG_P (this_param))
25992 this_reg = this_param;
25993 else if (vcall_offset)
25995 /* Put the this parameter into %eax. */
25996 xops[0] = this_param;
25997 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
25998 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26001 this_reg = NULL_RTX;
26003 /* Adjust the this parameter by a fixed constant. */
26006 xops[0] = GEN_INT (delta);
26007 xops[1] = this_reg ? this_reg : this_param;
26010 if (!x86_64_general_operand (xops[0], DImode))
26012 tmp = gen_rtx_REG (DImode, R10_REG);
26014 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26016 xops[1] = this_param;
26018 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26021 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26024 /* Adjust the this parameter by a value stored in the vtable. */
26028 tmp = gen_rtx_REG (DImode, R10_REG);
26031 int tmp_regno = CX_REG;
26032 if (lookup_attribute ("fastcall",
26033 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26034 tmp_regno = AX_REG;
26035 tmp = gen_rtx_REG (SImode, tmp_regno);
26038 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26040 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26042 /* Adjust the this parameter. */
26043 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26044 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26046 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26047 xops[0] = GEN_INT (vcall_offset);
26049 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26050 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26052 xops[1] = this_reg;
26053 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26056 /* If necessary, drop THIS back to its stack slot. */
26057 if (this_reg && this_reg != this_param)
26059 xops[0] = this_reg;
26060 xops[1] = this_param;
26061 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26064 xops[0] = XEXP (DECL_RTL (function), 0);
26067 if (!flag_pic || (*targetm.binds_local_p) (function))
26068 output_asm_insn ("jmp\t%P0", xops);
26069 /* All thunks should be in the same object as their target,
26070 and thus binds_local_p should be true. */
26071 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26072 gcc_unreachable ();
26075 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26076 tmp = gen_rtx_CONST (Pmode, tmp);
26077 tmp = gen_rtx_MEM (QImode, tmp);
26079 output_asm_insn ("jmp\t%A0", xops);
26084 if (!flag_pic || (*targetm.binds_local_p) (function))
26085 output_asm_insn ("jmp\t%P0", xops);
26090 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26091 tmp = (gen_rtx_SYMBOL_REF
26093 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26094 tmp = gen_rtx_MEM (QImode, tmp);
26096 output_asm_insn ("jmp\t%0", xops);
26099 #endif /* TARGET_MACHO */
26101 tmp = gen_rtx_REG (SImode, CX_REG);
26102 output_set_got (tmp, NULL_RTX);
26105 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26106 output_asm_insn ("jmp\t{*}%1", xops);
26112 x86_file_start (void)
26114 default_file_start ();
26116 darwin_file_start ();
26118 if (X86_FILE_START_VERSION_DIRECTIVE)
26119 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26120 if (X86_FILE_START_FLTUSED)
26121 fputs ("\t.global\t__fltused\n", asm_out_file);
26122 if (ix86_asm_dialect == ASM_INTEL)
26123 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26127 x86_field_alignment (tree field, int computed)
26129 enum machine_mode mode;
26130 tree type = TREE_TYPE (field);
26132 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26134 mode = TYPE_MODE (strip_array_types (type));
26135 if (mode == DFmode || mode == DCmode
26136 || GET_MODE_CLASS (mode) == MODE_INT
26137 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26138 return MIN (32, computed);
26142 /* Output assembler code to FILE to increment profiler label # LABELNO
26143 for profiling a function entry. */
26145 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26149 #ifndef NO_PROFILE_COUNTERS
26150 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26153 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26154 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26156 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26160 #ifndef NO_PROFILE_COUNTERS
26161 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26162 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26164 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26168 #ifndef NO_PROFILE_COUNTERS
26169 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26170 PROFILE_COUNT_REGISTER);
26172 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26176 /* We don't have exact information about the insn sizes, but we may assume
26177 quite safely that we are informed about all 1 byte insns and memory
26178 address sizes. This is enough to eliminate unnecessary padding in
26182 min_insn_size (rtx insn)
26186 if (!INSN_P (insn) || !active_insn_p (insn))
26189 /* Discard alignments we've emit and jump instructions. */
26190 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26191 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26194 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26195 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26198 /* Important case - calls are always 5 bytes.
26199 It is common to have many calls in the row. */
26201 && symbolic_reference_mentioned_p (PATTERN (insn))
26202 && !SIBLING_CALL_P (insn))
26204 if (get_attr_length (insn) <= 1)
26207 /* For normal instructions we may rely on the sizes of addresses
26208 and the presence of symbol to require 4 bytes of encoding.
26209 This is not the case for jumps where references are PC relative. */
26210 if (!JUMP_P (insn))
26212 l = get_attr_length_address (insn);
26213 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26222 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26226 ix86_avoid_jump_misspredicts (void)
26228 rtx insn, start = get_insns ();
26229 int nbytes = 0, njumps = 0;
26232 /* Look for all minimal intervals of instructions containing 4 jumps.
26233 The intervals are bounded by START and INSN. NBYTES is the total
26234 size of instructions in the interval including INSN and not including
26235 START. When the NBYTES is smaller than 16 bytes, it is possible
26236 that the end of START and INSN ends up in the same 16byte page.
26238 The smallest offset in the page INSN can start is the case where START
26239 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26240 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26242 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26245 nbytes += min_insn_size (insn);
26247 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26248 INSN_UID (insn), min_insn_size (insn));
26250 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26251 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26259 start = NEXT_INSN (start);
26260 if ((JUMP_P (start)
26261 && GET_CODE (PATTERN (start)) != ADDR_VEC
26262 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26264 njumps--, isjump = 1;
26267 nbytes -= min_insn_size (start);
26269 gcc_assert (njumps >= 0);
26271 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26272 INSN_UID (start), INSN_UID (insn), nbytes);
26274 if (njumps == 3 && isjump && nbytes < 16)
26276 int padsize = 15 - nbytes + min_insn_size (insn);
26279 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26280 INSN_UID (insn), padsize);
26281 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26286 /* AMD Athlon works faster
26287 when RET is not destination of conditional jump or directly preceded
26288 by other jump instruction. We avoid the penalty by inserting NOP just
26289 before the RET instructions in such cases. */
26291 ix86_pad_returns (void)
26296 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26298 basic_block bb = e->src;
26299 rtx ret = BB_END (bb);
26301 bool replace = false;
26303 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26304 || optimize_bb_for_size_p (bb))
26306 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26307 if (active_insn_p (prev) || LABEL_P (prev))
26309 if (prev && LABEL_P (prev))
26314 FOR_EACH_EDGE (e, ei, bb->preds)
26315 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26316 && !(e->flags & EDGE_FALLTHRU))
26321 prev = prev_active_insn (ret);
26323 && ((JUMP_P (prev) && any_condjump_p (prev))
26326 /* Empty functions get branch mispredict even when the jump destination
26327 is not visible to us. */
26328 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26333 emit_insn_before (gen_return_internal_long (), ret);
26339 /* Implement machine specific optimizations. We implement padding of returns
26340 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26344 if (TARGET_PAD_RETURNS && optimize
26345 && optimize_function_for_speed_p (cfun))
26346 ix86_pad_returns ();
26347 if (TARGET_FOUR_JUMP_LIMIT && optimize
26348 && optimize_function_for_speed_p (cfun))
26349 ix86_avoid_jump_misspredicts ();
26352 /* Return nonzero when QImode register that must be represented via REX prefix
26355 x86_extended_QIreg_mentioned_p (rtx insn)
26358 extract_insn_cached (insn);
26359 for (i = 0; i < recog_data.n_operands; i++)
26360 if (REG_P (recog_data.operand[i])
26361 && REGNO (recog_data.operand[i]) >= 4)
26366 /* Return nonzero when P points to register encoded via REX prefix.
26367 Called via for_each_rtx. */
26369 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26371 unsigned int regno;
26374 regno = REGNO (*p);
26375 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26378 /* Return true when INSN mentions register that must be encoded using REX
26381 x86_extended_reg_mentioned_p (rtx insn)
26383 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26384 extended_reg_mentioned_1, NULL);
26387 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26388 optabs would emit if we didn't have TFmode patterns. */
26391 x86_emit_floatuns (rtx operands[2])
26393 rtx neglab, donelab, i0, i1, f0, in, out;
26394 enum machine_mode mode, inmode;
26396 inmode = GET_MODE (operands[1]);
26397 gcc_assert (inmode == SImode || inmode == DImode);
26400 in = force_reg (inmode, operands[1]);
26401 mode = GET_MODE (out);
26402 neglab = gen_label_rtx ();
26403 donelab = gen_label_rtx ();
26404 f0 = gen_reg_rtx (mode);
26406 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26408 expand_float (out, in, 0);
26410 emit_jump_insn (gen_jump (donelab));
26413 emit_label (neglab);
26415 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26417 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26419 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26421 expand_float (f0, i0, 0);
26423 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26425 emit_label (donelab);
26428 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26429 with all elements equal to VAR. Return true if successful. */
26432 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26433 rtx target, rtx val)
26435 enum machine_mode hmode, smode, wsmode, wvmode;
26450 val = force_reg (GET_MODE_INNER (mode), val);
26451 x = gen_rtx_VEC_DUPLICATE (mode, val);
26452 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26458 if (TARGET_SSE || TARGET_3DNOW_A)
26460 val = gen_lowpart (SImode, val);
26461 x = gen_rtx_TRUNCATE (HImode, val);
26462 x = gen_rtx_VEC_DUPLICATE (mode, x);
26463 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26485 /* Extend HImode to SImode using a paradoxical SUBREG. */
26486 tmp1 = gen_reg_rtx (SImode);
26487 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26488 /* Insert the SImode value as low element of V4SImode vector. */
26489 tmp2 = gen_reg_rtx (V4SImode);
26490 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26491 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26492 CONST0_RTX (V4SImode),
26494 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26495 /* Cast the V4SImode vector back to a V8HImode vector. */
26496 tmp1 = gen_reg_rtx (V8HImode);
26497 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26498 /* Duplicate the low short through the whole low SImode word. */
26499 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26500 /* Cast the V8HImode vector back to a V4SImode vector. */
26501 tmp2 = gen_reg_rtx (V4SImode);
26502 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26503 /* Replicate the low element of the V4SImode vector. */
26504 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26505 /* Cast the V2SImode back to V8HImode, and store in target. */
26506 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26517 /* Extend QImode to SImode using a paradoxical SUBREG. */
26518 tmp1 = gen_reg_rtx (SImode);
26519 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26520 /* Insert the SImode value as low element of V4SImode vector. */
26521 tmp2 = gen_reg_rtx (V4SImode);
26522 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26523 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26524 CONST0_RTX (V4SImode),
26526 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26527 /* Cast the V4SImode vector back to a V16QImode vector. */
26528 tmp1 = gen_reg_rtx (V16QImode);
26529 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26530 /* Duplicate the low byte through the whole low SImode word. */
26531 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26532 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26533 /* Cast the V16QImode vector back to a V4SImode vector. */
26534 tmp2 = gen_reg_rtx (V4SImode);
26535 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26536 /* Replicate the low element of the V4SImode vector. */
26537 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26538 /* Cast the V2SImode back to V16QImode, and store in target. */
26539 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26547 /* Replicate the value once into the next wider mode and recurse. */
26548 val = convert_modes (wsmode, smode, val, true);
26549 x = expand_simple_binop (wsmode, ASHIFT, val,
26550 GEN_INT (GET_MODE_BITSIZE (smode)),
26551 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26552 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26554 x = gen_reg_rtx (wvmode);
26555 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26556 gcc_unreachable ();
26557 emit_move_insn (target, gen_lowpart (mode, x));
26580 rtx tmp = gen_reg_rtx (hmode);
26581 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26582 emit_insn (gen_rtx_SET (VOIDmode, target,
26583 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
26592 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26593 whose ONE_VAR element is VAR, and other elements are zero. Return true
26597 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26598 rtx target, rtx var, int one_var)
26600 enum machine_mode vsimode;
26603 bool use_vector_set = false;
26608 /* For SSE4.1, we normally use vector set. But if the second
26609 element is zero and inter-unit moves are OK, we use movq
26611 use_vector_set = (TARGET_64BIT
26613 && !(TARGET_INTER_UNIT_MOVES
26619 use_vector_set = TARGET_SSE4_1;
26622 use_vector_set = TARGET_SSE2;
26625 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26633 use_vector_set = TARGET_AVX;
26639 if (use_vector_set)
26641 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26642 var = force_reg (GET_MODE_INNER (mode), var);
26643 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26659 var = force_reg (GET_MODE_INNER (mode), var);
26660 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26661 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26666 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26667 new_target = gen_reg_rtx (mode);
26669 new_target = target;
26670 var = force_reg (GET_MODE_INNER (mode), var);
26671 x = gen_rtx_VEC_DUPLICATE (mode, var);
26672 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26673 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26676 /* We need to shuffle the value to the correct position, so
26677 create a new pseudo to store the intermediate result. */
26679 /* With SSE2, we can use the integer shuffle insns. */
26680 if (mode != V4SFmode && TARGET_SSE2)
26682 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26684 GEN_INT (one_var == 1 ? 0 : 1),
26685 GEN_INT (one_var == 2 ? 0 : 1),
26686 GEN_INT (one_var == 3 ? 0 : 1)));
26687 if (target != new_target)
26688 emit_move_insn (target, new_target);
26692 /* Otherwise convert the intermediate result to V4SFmode and
26693 use the SSE1 shuffle instructions. */
26694 if (mode != V4SFmode)
26696 tmp = gen_reg_rtx (V4SFmode);
26697 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26702 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26704 GEN_INT (one_var == 1 ? 0 : 1),
26705 GEN_INT (one_var == 2 ? 0+4 : 1+4),
26706 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26708 if (mode != V4SFmode)
26709 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
26710 else if (tmp != target)
26711 emit_move_insn (target, tmp);
26713 else if (target != new_target)
26714 emit_move_insn (target, new_target);
26719 vsimode = V4SImode;
26725 vsimode = V2SImode;
26731 /* Zero extend the variable element to SImode and recurse. */
26732 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
26734 x = gen_reg_rtx (vsimode);
26735 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
26737 gcc_unreachable ();
26739 emit_move_insn (target, gen_lowpart (mode, x));
26747 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26748 consisting of the values in VALS. It is known that all elements
26749 except ONE_VAR are constants. Return true if successful. */
26752 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
26753 rtx target, rtx vals, int one_var)
26755 rtx var = XVECEXP (vals, 0, one_var);
26756 enum machine_mode wmode;
26759 const_vec = copy_rtx (vals);
26760 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
26761 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
26769 /* For the two element vectors, it's just as easy to use
26770 the general case. */
26794 /* There's no way to set one QImode entry easily. Combine
26795 the variable value with its adjacent constant value, and
26796 promote to an HImode set. */
26797 x = XVECEXP (vals, 0, one_var ^ 1);
26800 var = convert_modes (HImode, QImode, var, true);
26801 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
26802 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26803 x = GEN_INT (INTVAL (x) & 0xff);
26807 var = convert_modes (HImode, QImode, var, true);
26808 x = gen_int_mode (INTVAL (x) << 8, HImode);
26810 if (x != const0_rtx)
26811 var = expand_simple_binop (HImode, IOR, var, x, var,
26812 1, OPTAB_LIB_WIDEN);
26814 x = gen_reg_rtx (wmode);
26815 emit_move_insn (x, gen_lowpart (wmode, const_vec));
26816 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
26818 emit_move_insn (target, gen_lowpart (mode, x));
26825 emit_move_insn (target, const_vec);
26826 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26830 /* A subroutine of ix86_expand_vector_init_general. Use vector
26831 concatenate to handle the most general case: all values variable,
26832 and none identical. */
26835 ix86_expand_vector_init_concat (enum machine_mode mode,
26836 rtx target, rtx *ops, int n)
26838 enum machine_mode cmode, hmode = VOIDmode;
26839 rtx first[8], second[4];
26879 gcc_unreachable ();
26882 if (!register_operand (ops[1], cmode))
26883 ops[1] = force_reg (cmode, ops[1]);
26884 if (!register_operand (ops[0], cmode))
26885 ops[0] = force_reg (cmode, ops[0]);
26886 emit_insn (gen_rtx_SET (VOIDmode, target,
26887 gen_rtx_VEC_CONCAT (mode, ops[0],
26907 gcc_unreachable ();
26923 gcc_unreachable ();
26928 /* FIXME: We process inputs backward to help RA. PR 36222. */
26931 for (; i > 0; i -= 2, j--)
26933 first[j] = gen_reg_rtx (cmode);
26934 v = gen_rtvec (2, ops[i - 1], ops[i]);
26935 ix86_expand_vector_init (false, first[j],
26936 gen_rtx_PARALLEL (cmode, v));
26942 gcc_assert (hmode != VOIDmode);
26943 for (i = j = 0; i < n; i += 2, j++)
26945 second[j] = gen_reg_rtx (hmode);
26946 ix86_expand_vector_init_concat (hmode, second [j],
26950 ix86_expand_vector_init_concat (mode, target, second, n);
26953 ix86_expand_vector_init_concat (mode, target, first, n);
26957 gcc_unreachable ();
26961 /* A subroutine of ix86_expand_vector_init_general. Use vector
26962 interleave to handle the most general case: all values variable,
26963 and none identical. */
26966 ix86_expand_vector_init_interleave (enum machine_mode mode,
26967 rtx target, rtx *ops, int n)
26969 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
26972 rtx (*gen_load_even) (rtx, rtx, rtx);
26973 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
26974 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
26979 gen_load_even = gen_vec_setv8hi;
26980 gen_interleave_first_low = gen_vec_interleave_lowv4si;
26981 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26982 inner_mode = HImode;
26983 first_imode = V4SImode;
26984 second_imode = V2DImode;
26985 third_imode = VOIDmode;
26988 gen_load_even = gen_vec_setv16qi;
26989 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
26990 gen_interleave_second_low = gen_vec_interleave_lowv4si;
26991 inner_mode = QImode;
26992 first_imode = V8HImode;
26993 second_imode = V4SImode;
26994 third_imode = V2DImode;
26997 gcc_unreachable ();
27000 for (i = 0; i < n; i++)
27002 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27003 op0 = gen_reg_rtx (SImode);
27004 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27006 /* Insert the SImode value as low element of V4SImode vector. */
27007 op1 = gen_reg_rtx (V4SImode);
27008 op0 = gen_rtx_VEC_MERGE (V4SImode,
27009 gen_rtx_VEC_DUPLICATE (V4SImode,
27011 CONST0_RTX (V4SImode),
27013 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27015 /* Cast the V4SImode vector back to a vector in orignal mode. */
27016 op0 = gen_reg_rtx (mode);
27017 emit_move_insn (op0, gen_lowpart (mode, op1));
27019 /* Load even elements into the second positon. */
27020 emit_insn ((*gen_load_even) (op0,
27021 force_reg (inner_mode,
27025 /* Cast vector to FIRST_IMODE vector. */
27026 ops[i] = gen_reg_rtx (first_imode);
27027 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27030 /* Interleave low FIRST_IMODE vectors. */
27031 for (i = j = 0; i < n; i += 2, j++)
27033 op0 = gen_reg_rtx (first_imode);
27034 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27036 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27037 ops[j] = gen_reg_rtx (second_imode);
27038 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27041 /* Interleave low SECOND_IMODE vectors. */
27042 switch (second_imode)
27045 for (i = j = 0; i < n / 2; i += 2, j++)
27047 op0 = gen_reg_rtx (second_imode);
27048 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27051 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27053 ops[j] = gen_reg_rtx (third_imode);
27054 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27056 second_imode = V2DImode;
27057 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27061 op0 = gen_reg_rtx (second_imode);
27062 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27065 /* Cast the SECOND_IMODE vector back to a vector on original
27067 emit_insn (gen_rtx_SET (VOIDmode, target,
27068 gen_lowpart (mode, op0)));
27072 gcc_unreachable ();
27076 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27077 all values variable, and none identical. */
27080 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27081 rtx target, rtx vals)
27083 rtx ops[32], op0, op1;
27084 enum machine_mode half_mode = VOIDmode;
27091 if (!mmx_ok && !TARGET_SSE)
27103 n = GET_MODE_NUNITS (mode);
27104 for (i = 0; i < n; i++)
27105 ops[i] = XVECEXP (vals, 0, i);
27106 ix86_expand_vector_init_concat (mode, target, ops, n);
27110 half_mode = V16QImode;
27114 half_mode = V8HImode;
27118 n = GET_MODE_NUNITS (mode);
27119 for (i = 0; i < n; i++)
27120 ops[i] = XVECEXP (vals, 0, i);
27121 op0 = gen_reg_rtx (half_mode);
27122 op1 = gen_reg_rtx (half_mode);
27123 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27125 ix86_expand_vector_init_interleave (half_mode, op1,
27126 &ops [n >> 1], n >> 2);
27127 emit_insn (gen_rtx_SET (VOIDmode, target,
27128 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27132 if (!TARGET_SSE4_1)
27140 /* Don't use ix86_expand_vector_init_interleave if we can't
27141 move from GPR to SSE register directly. */
27142 if (!TARGET_INTER_UNIT_MOVES)
27145 n = GET_MODE_NUNITS (mode);
27146 for (i = 0; i < n; i++)
27147 ops[i] = XVECEXP (vals, 0, i);
27148 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27156 gcc_unreachable ();
27160 int i, j, n_elts, n_words, n_elt_per_word;
27161 enum machine_mode inner_mode;
27162 rtx words[4], shift;
27164 inner_mode = GET_MODE_INNER (mode);
27165 n_elts = GET_MODE_NUNITS (mode);
27166 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27167 n_elt_per_word = n_elts / n_words;
27168 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27170 for (i = 0; i < n_words; ++i)
27172 rtx word = NULL_RTX;
27174 for (j = 0; j < n_elt_per_word; ++j)
27176 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27177 elt = convert_modes (word_mode, inner_mode, elt, true);
27183 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27184 word, 1, OPTAB_LIB_WIDEN);
27185 word = expand_simple_binop (word_mode, IOR, word, elt,
27186 word, 1, OPTAB_LIB_WIDEN);
27194 emit_move_insn (target, gen_lowpart (mode, words[0]));
27195 else if (n_words == 2)
27197 rtx tmp = gen_reg_rtx (mode);
27198 emit_clobber (tmp);
27199 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27200 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27201 emit_move_insn (target, tmp);
27203 else if (n_words == 4)
27205 rtx tmp = gen_reg_rtx (V4SImode);
27206 gcc_assert (word_mode == SImode);
27207 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27208 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27209 emit_move_insn (target, gen_lowpart (mode, tmp));
27212 gcc_unreachable ();
27216 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27217 instructions unless MMX_OK is true. */
27220 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27222 enum machine_mode mode = GET_MODE (target);
27223 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27224 int n_elts = GET_MODE_NUNITS (mode);
27225 int n_var = 0, one_var = -1;
27226 bool all_same = true, all_const_zero = true;
27230 for (i = 0; i < n_elts; ++i)
27232 x = XVECEXP (vals, 0, i);
27233 if (!(CONST_INT_P (x)
27234 || GET_CODE (x) == CONST_DOUBLE
27235 || GET_CODE (x) == CONST_FIXED))
27236 n_var++, one_var = i;
27237 else if (x != CONST0_RTX (inner_mode))
27238 all_const_zero = false;
27239 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27243 /* Constants are best loaded from the constant pool. */
27246 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27250 /* If all values are identical, broadcast the value. */
27252 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27253 XVECEXP (vals, 0, 0)))
27256 /* Values where only one field is non-constant are best loaded from
27257 the pool and overwritten via move later. */
27261 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27262 XVECEXP (vals, 0, one_var),
27266 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27270 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27274 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27276 enum machine_mode mode = GET_MODE (target);
27277 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27278 enum machine_mode half_mode;
27279 bool use_vec_merge = false;
27281 static rtx (*gen_extract[6][2]) (rtx, rtx)
27283 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27284 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27285 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27286 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27287 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27288 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27290 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27292 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27293 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27294 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27295 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27296 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27297 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27307 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27308 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27310 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27312 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27313 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27319 use_vec_merge = TARGET_SSE4_1;
27327 /* For the two element vectors, we implement a VEC_CONCAT with
27328 the extraction of the other element. */
27330 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27331 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27334 op0 = val, op1 = tmp;
27336 op0 = tmp, op1 = val;
27338 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27339 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27344 use_vec_merge = TARGET_SSE4_1;
27351 use_vec_merge = true;
27355 /* tmp = target = A B C D */
27356 tmp = copy_to_reg (target);
27357 /* target = A A B B */
27358 emit_insn (gen_sse_unpcklps (target, target, target));
27359 /* target = X A B B */
27360 ix86_expand_vector_set (false, target, val, 0);
27361 /* target = A X C D */
27362 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27363 GEN_INT (1), GEN_INT (0),
27364 GEN_INT (2+4), GEN_INT (3+4)));
27368 /* tmp = target = A B C D */
27369 tmp = copy_to_reg (target);
27370 /* tmp = X B C D */
27371 ix86_expand_vector_set (false, tmp, val, 0);
27372 /* target = A B X D */
27373 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27374 GEN_INT (0), GEN_INT (1),
27375 GEN_INT (0+4), GEN_INT (3+4)));
27379 /* tmp = target = A B C D */
27380 tmp = copy_to_reg (target);
27381 /* tmp = X B C D */
27382 ix86_expand_vector_set (false, tmp, val, 0);
27383 /* target = A B X D */
27384 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27385 GEN_INT (0), GEN_INT (1),
27386 GEN_INT (2+4), GEN_INT (0+4)));
27390 gcc_unreachable ();
27395 use_vec_merge = TARGET_SSE4_1;
27399 /* Element 0 handled by vec_merge below. */
27402 use_vec_merge = true;
27408 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27409 store into element 0, then shuffle them back. */
27413 order[0] = GEN_INT (elt);
27414 order[1] = const1_rtx;
27415 order[2] = const2_rtx;
27416 order[3] = GEN_INT (3);
27417 order[elt] = const0_rtx;
27419 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27420 order[1], order[2], order[3]));
27422 ix86_expand_vector_set (false, target, val, 0);
27424 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27425 order[1], order[2], order[3]));
27429 /* For SSE1, we have to reuse the V4SF code. */
27430 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27431 gen_lowpart (SFmode, val), elt);
27436 use_vec_merge = TARGET_SSE2;
27439 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27443 use_vec_merge = TARGET_SSE4_1;
27450 half_mode = V16QImode;
27456 half_mode = V8HImode;
27462 half_mode = V4SImode;
27468 half_mode = V2DImode;
27474 half_mode = V4SFmode;
27480 half_mode = V2DFmode;
27486 /* Compute offset. */
27490 gcc_assert (i <= 1);
27492 /* Extract the half. */
27493 tmp = gen_reg_rtx (half_mode);
27494 emit_insn ((*gen_extract[j][i]) (tmp, target));
27496 /* Put val in tmp at elt. */
27497 ix86_expand_vector_set (false, tmp, val, elt);
27500 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27509 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27510 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27511 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27515 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27517 emit_move_insn (mem, target);
27519 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27520 emit_move_insn (tmp, val);
27522 emit_move_insn (target, mem);
27527 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27529 enum machine_mode mode = GET_MODE (vec);
27530 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27531 bool use_vec_extr = false;
27544 use_vec_extr = true;
27548 use_vec_extr = TARGET_SSE4_1;
27560 tmp = gen_reg_rtx (mode);
27561 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27562 GEN_INT (elt), GEN_INT (elt),
27563 GEN_INT (elt+4), GEN_INT (elt+4)));
27567 tmp = gen_reg_rtx (mode);
27568 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27572 gcc_unreachable ();
27575 use_vec_extr = true;
27580 use_vec_extr = TARGET_SSE4_1;
27594 tmp = gen_reg_rtx (mode);
27595 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27596 GEN_INT (elt), GEN_INT (elt),
27597 GEN_INT (elt), GEN_INT (elt)));
27601 tmp = gen_reg_rtx (mode);
27602 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
27606 gcc_unreachable ();
27609 use_vec_extr = true;
27614 /* For SSE1, we have to reuse the V4SF code. */
27615 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27616 gen_lowpart (V4SFmode, vec), elt);
27622 use_vec_extr = TARGET_SSE2;
27625 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27629 use_vec_extr = TARGET_SSE4_1;
27633 /* ??? Could extract the appropriate HImode element and shift. */
27640 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27641 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27643 /* Let the rtl optimizers know about the zero extension performed. */
27644 if (inner_mode == QImode || inner_mode == HImode)
27646 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27647 target = gen_lowpart (SImode, target);
27650 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27654 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27656 emit_move_insn (mem, vec);
27658 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27659 emit_move_insn (target, tmp);
27663 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
27664 pattern to reduce; DEST is the destination; IN is the input vector. */
27667 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27669 rtx tmp1, tmp2, tmp3;
27671 tmp1 = gen_reg_rtx (V4SFmode);
27672 tmp2 = gen_reg_rtx (V4SFmode);
27673 tmp3 = gen_reg_rtx (V4SFmode);
27675 emit_insn (gen_sse_movhlps (tmp1, in, in));
27676 emit_insn (fn (tmp2, tmp1, in));
27678 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27679 GEN_INT (1), GEN_INT (1),
27680 GEN_INT (1+4), GEN_INT (1+4)));
27681 emit_insn (fn (dest, tmp2, tmp3));
27684 /* Target hook for scalar_mode_supported_p. */
27686 ix86_scalar_mode_supported_p (enum machine_mode mode)
27688 if (DECIMAL_FLOAT_MODE_P (mode))
27690 else if (mode == TFmode)
27693 return default_scalar_mode_supported_p (mode);
27696 /* Implements target hook vector_mode_supported_p. */
27698 ix86_vector_mode_supported_p (enum machine_mode mode)
27700 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27702 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27704 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27706 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
27708 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
27713 /* Target hook for c_mode_for_suffix. */
27714 static enum machine_mode
27715 ix86_c_mode_for_suffix (char suffix)
27725 /* Worker function for TARGET_MD_ASM_CLOBBERS.
27727 We do this in the new i386 backend to maintain source compatibility
27728 with the old cc0-based compiler. */
27731 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
27732 tree inputs ATTRIBUTE_UNUSED,
27735 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
27737 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
27742 /* Implements target vector targetm.asm.encode_section_info. This
27743 is not used by netware. */
27745 static void ATTRIBUTE_UNUSED
27746 ix86_encode_section_info (tree decl, rtx rtl, int first)
27748 default_encode_section_info (decl, rtl, first);
27750 if (TREE_CODE (decl) == VAR_DECL
27751 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
27752 && ix86_in_large_data_p (decl))
27753 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
27756 /* Worker function for REVERSE_CONDITION. */
27759 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
27761 return (mode != CCFPmode && mode != CCFPUmode
27762 ? reverse_condition (code)
27763 : reverse_condition_maybe_unordered (code));
27766 /* Output code to perform an x87 FP register move, from OPERANDS[1]
27770 output_387_reg_move (rtx insn, rtx *operands)
27772 if (REG_P (operands[0]))
27774 if (REG_P (operands[1])
27775 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27777 if (REGNO (operands[0]) == FIRST_STACK_REG)
27778 return output_387_ffreep (operands, 0);
27779 return "fstp\t%y0";
27781 if (STACK_TOP_P (operands[0]))
27782 return "fld%z1\t%y1";
27785 else if (MEM_P (operands[0]))
27787 gcc_assert (REG_P (operands[1]));
27788 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27789 return "fstp%z0\t%y0";
27792 /* There is no non-popping store to memory for XFmode.
27793 So if we need one, follow the store with a load. */
27794 if (GET_MODE (operands[0]) == XFmode)
27795 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
27797 return "fst%z0\t%y0";
27804 /* Output code to perform a conditional jump to LABEL, if C2 flag in
27805 FP status register is set. */
27808 ix86_emit_fp_unordered_jump (rtx label)
27810 rtx reg = gen_reg_rtx (HImode);
27813 emit_insn (gen_x86_fnstsw_1 (reg));
27815 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
27817 emit_insn (gen_x86_sahf_1 (reg));
27819 temp = gen_rtx_REG (CCmode, FLAGS_REG);
27820 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
27824 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
27826 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27827 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
27830 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
27831 gen_rtx_LABEL_REF (VOIDmode, label),
27833 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
27835 emit_jump_insn (temp);
27836 predict_jump (REG_BR_PROB_BASE * 10 / 100);
27839 /* Output code to perform a log1p XFmode calculation. */
27841 void ix86_emit_i387_log1p (rtx op0, rtx op1)
27843 rtx label1 = gen_label_rtx ();
27844 rtx label2 = gen_label_rtx ();
27846 rtx tmp = gen_reg_rtx (XFmode);
27847 rtx tmp2 = gen_reg_rtx (XFmode);
27849 emit_insn (gen_absxf2 (tmp, op1));
27850 emit_insn (gen_cmpxf (tmp,
27851 CONST_DOUBLE_FROM_REAL_VALUE (
27852 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
27854 emit_jump_insn (gen_bge (label1));
27856 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27857 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
27858 emit_jump (label2);
27860 emit_label (label1);
27861 emit_move_insn (tmp, CONST1_RTX (XFmode));
27862 emit_insn (gen_addxf3 (tmp, op1, tmp));
27863 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27864 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
27866 emit_label (label2);
27869 /* Output code to perform a Newton-Rhapson approximation of a single precision
27870 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
27872 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
27874 rtx x0, x1, e0, e1, two;
27876 x0 = gen_reg_rtx (mode);
27877 e0 = gen_reg_rtx (mode);
27878 e1 = gen_reg_rtx (mode);
27879 x1 = gen_reg_rtx (mode);
27881 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
27883 if (VECTOR_MODE_P (mode))
27884 two = ix86_build_const_vector (SFmode, true, two);
27886 two = force_reg (mode, two);
27888 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
27890 /* x0 = rcp(b) estimate */
27891 emit_insn (gen_rtx_SET (VOIDmode, x0,
27892 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
27895 emit_insn (gen_rtx_SET (VOIDmode, e0,
27896 gen_rtx_MULT (mode, x0, b)));
27898 emit_insn (gen_rtx_SET (VOIDmode, e1,
27899 gen_rtx_MINUS (mode, two, e0)));
27901 emit_insn (gen_rtx_SET (VOIDmode, x1,
27902 gen_rtx_MULT (mode, x0, e1)));
27904 emit_insn (gen_rtx_SET (VOIDmode, res,
27905 gen_rtx_MULT (mode, a, x1)));
27908 /* Output code to perform a Newton-Rhapson approximation of a
27909 single precision floating point [reciprocal] square root. */
27911 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
27914 rtx x0, e0, e1, e2, e3, mthree, mhalf;
27917 x0 = gen_reg_rtx (mode);
27918 e0 = gen_reg_rtx (mode);
27919 e1 = gen_reg_rtx (mode);
27920 e2 = gen_reg_rtx (mode);
27921 e3 = gen_reg_rtx (mode);
27923 real_from_integer (&r, VOIDmode, -3, -1, 0);
27924 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27926 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
27927 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27929 if (VECTOR_MODE_P (mode))
27931 mthree = ix86_build_const_vector (SFmode, true, mthree);
27932 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
27935 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
27936 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
27938 /* x0 = rsqrt(a) estimate */
27939 emit_insn (gen_rtx_SET (VOIDmode, x0,
27940 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
27943 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
27948 zero = gen_reg_rtx (mode);
27949 mask = gen_reg_rtx (mode);
27951 zero = force_reg (mode, CONST0_RTX(mode));
27952 emit_insn (gen_rtx_SET (VOIDmode, mask,
27953 gen_rtx_NE (mode, zero, a)));
27955 emit_insn (gen_rtx_SET (VOIDmode, x0,
27956 gen_rtx_AND (mode, x0, mask)));
27960 emit_insn (gen_rtx_SET (VOIDmode, e0,
27961 gen_rtx_MULT (mode, x0, a)));
27963 emit_insn (gen_rtx_SET (VOIDmode, e1,
27964 gen_rtx_MULT (mode, e0, x0)));
27967 mthree = force_reg (mode, mthree);
27968 emit_insn (gen_rtx_SET (VOIDmode, e2,
27969 gen_rtx_PLUS (mode, e1, mthree)));
27971 mhalf = force_reg (mode, mhalf);
27973 /* e3 = -.5 * x0 */
27974 emit_insn (gen_rtx_SET (VOIDmode, e3,
27975 gen_rtx_MULT (mode, x0, mhalf)));
27977 /* e3 = -.5 * e0 */
27978 emit_insn (gen_rtx_SET (VOIDmode, e3,
27979 gen_rtx_MULT (mode, e0, mhalf)));
27980 /* ret = e2 * e3 */
27981 emit_insn (gen_rtx_SET (VOIDmode, res,
27982 gen_rtx_MULT (mode, e2, e3)));
27985 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
27987 static void ATTRIBUTE_UNUSED
27988 i386_solaris_elf_named_section (const char *name, unsigned int flags,
27991 /* With Binutils 2.15, the "@unwind" marker must be specified on
27992 every occurrence of the ".eh_frame" section, not just the first
27995 && strcmp (name, ".eh_frame") == 0)
27997 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
27998 flags & SECTION_WRITE ? "aw" : "a");
28001 default_elf_asm_named_section (name, flags, decl);
28004 /* Return the mangling of TYPE if it is an extended fundamental type. */
28006 static const char *
28007 ix86_mangle_type (const_tree type)
28009 type = TYPE_MAIN_VARIANT (type);
28011 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28012 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28015 switch (TYPE_MODE (type))
28018 /* __float128 is "g". */
28021 /* "long double" or __float80 is "e". */
28028 /* For 32-bit code we can save PIC register setup by using
28029 __stack_chk_fail_local hidden function instead of calling
28030 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28031 register, so it is better to call __stack_chk_fail directly. */
28034 ix86_stack_protect_fail (void)
28036 return TARGET_64BIT
28037 ? default_external_stack_protect_fail ()
28038 : default_hidden_stack_protect_fail ();
28041 /* Select a format to encode pointers in exception handling data. CODE
28042 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28043 true if the symbol may be affected by dynamic relocations.
28045 ??? All x86 object file formats are capable of representing this.
28046 After all, the relocation needed is the same as for the call insn.
28047 Whether or not a particular assembler allows us to enter such, I
28048 guess we'll have to see. */
28050 asm_preferred_eh_data_format (int code, int global)
28054 int type = DW_EH_PE_sdata8;
28056 || ix86_cmodel == CM_SMALL_PIC
28057 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28058 type = DW_EH_PE_sdata4;
28059 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28061 if (ix86_cmodel == CM_SMALL
28062 || (ix86_cmodel == CM_MEDIUM && code))
28063 return DW_EH_PE_udata4;
28064 return DW_EH_PE_absptr;
28067 /* Expand copysign from SIGN to the positive value ABS_VALUE
28068 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28071 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28073 enum machine_mode mode = GET_MODE (sign);
28074 rtx sgn = gen_reg_rtx (mode);
28075 if (mask == NULL_RTX)
28077 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28078 if (!VECTOR_MODE_P (mode))
28080 /* We need to generate a scalar mode mask in this case. */
28081 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28082 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28083 mask = gen_reg_rtx (mode);
28084 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28088 mask = gen_rtx_NOT (mode, mask);
28089 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28090 gen_rtx_AND (mode, mask, sign)));
28091 emit_insn (gen_rtx_SET (VOIDmode, result,
28092 gen_rtx_IOR (mode, abs_value, sgn)));
28095 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28096 mask for masking out the sign-bit is stored in *SMASK, if that is
28099 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28101 enum machine_mode mode = GET_MODE (op0);
28104 xa = gen_reg_rtx (mode);
28105 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28106 if (!VECTOR_MODE_P (mode))
28108 /* We need to generate a scalar mode mask in this case. */
28109 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28110 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28111 mask = gen_reg_rtx (mode);
28112 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28114 emit_insn (gen_rtx_SET (VOIDmode, xa,
28115 gen_rtx_AND (mode, op0, mask)));
28123 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28124 swapping the operands if SWAP_OPERANDS is true. The expanded
28125 code is a forward jump to a newly created label in case the
28126 comparison is true. The generated label rtx is returned. */
28128 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28129 bool swap_operands)
28140 label = gen_label_rtx ();
28141 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28142 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28143 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28144 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28145 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28146 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28147 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28148 JUMP_LABEL (tmp) = label;
28153 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28154 using comparison code CODE. Operands are swapped for the comparison if
28155 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28157 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28158 bool swap_operands)
28160 enum machine_mode mode = GET_MODE (op0);
28161 rtx mask = gen_reg_rtx (mode);
28170 if (mode == DFmode)
28171 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28172 gen_rtx_fmt_ee (code, mode, op0, op1)));
28174 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28175 gen_rtx_fmt_ee (code, mode, op0, op1)));
28180 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28181 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28183 ix86_gen_TWO52 (enum machine_mode mode)
28185 REAL_VALUE_TYPE TWO52r;
28188 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28189 TWO52 = const_double_from_real_value (TWO52r, mode);
28190 TWO52 = force_reg (mode, TWO52);
28195 /* Expand SSE sequence for computing lround from OP1 storing
28198 ix86_expand_lround (rtx op0, rtx op1)
28200 /* C code for the stuff we're doing below:
28201 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28204 enum machine_mode mode = GET_MODE (op1);
28205 const struct real_format *fmt;
28206 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28209 /* load nextafter (0.5, 0.0) */
28210 fmt = REAL_MODE_FORMAT (mode);
28211 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28212 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28214 /* adj = copysign (0.5, op1) */
28215 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28216 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28218 /* adj = op1 + adj */
28219 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28221 /* op0 = (imode)adj */
28222 expand_fix (op0, adj, 0);
28225 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28228 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28230 /* C code for the stuff we're doing below (for do_floor):
28232 xi -= (double)xi > op1 ? 1 : 0;
28235 enum machine_mode fmode = GET_MODE (op1);
28236 enum machine_mode imode = GET_MODE (op0);
28237 rtx ireg, freg, label, tmp;
28239 /* reg = (long)op1 */
28240 ireg = gen_reg_rtx (imode);
28241 expand_fix (ireg, op1, 0);
28243 /* freg = (double)reg */
28244 freg = gen_reg_rtx (fmode);
28245 expand_float (freg, ireg, 0);
28247 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28248 label = ix86_expand_sse_compare_and_jump (UNLE,
28249 freg, op1, !do_floor);
28250 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28251 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28252 emit_move_insn (ireg, tmp);
28254 emit_label (label);
28255 LABEL_NUSES (label) = 1;
28257 emit_move_insn (op0, ireg);
28260 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28261 result in OPERAND0. */
28263 ix86_expand_rint (rtx operand0, rtx operand1)
28265 /* C code for the stuff we're doing below:
28266 xa = fabs (operand1);
28267 if (!isless (xa, 2**52))
28269 xa = xa + 2**52 - 2**52;
28270 return copysign (xa, operand1);
28272 enum machine_mode mode = GET_MODE (operand0);
28273 rtx res, xa, label, TWO52, mask;
28275 res = gen_reg_rtx (mode);
28276 emit_move_insn (res, operand1);
28278 /* xa = abs (operand1) */
28279 xa = ix86_expand_sse_fabs (res, &mask);
28281 /* if (!isless (xa, TWO52)) goto label; */
28282 TWO52 = ix86_gen_TWO52 (mode);
28283 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28285 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28286 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28288 ix86_sse_copysign_to_positive (res, xa, res, mask);
28290 emit_label (label);
28291 LABEL_NUSES (label) = 1;
28293 emit_move_insn (operand0, res);
28296 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28299 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28301 /* C code for the stuff we expand below.
28302 double xa = fabs (x), x2;
28303 if (!isless (xa, TWO52))
28305 xa = xa + TWO52 - TWO52;
28306 x2 = copysign (xa, x);
28315 enum machine_mode mode = GET_MODE (operand0);
28316 rtx xa, TWO52, tmp, label, one, res, mask;
28318 TWO52 = ix86_gen_TWO52 (mode);
28320 /* Temporary for holding the result, initialized to the input
28321 operand to ease control flow. */
28322 res = gen_reg_rtx (mode);
28323 emit_move_insn (res, operand1);
28325 /* xa = abs (operand1) */
28326 xa = ix86_expand_sse_fabs (res, &mask);
28328 /* if (!isless (xa, TWO52)) goto label; */
28329 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28331 /* xa = xa + TWO52 - TWO52; */
28332 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28333 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28335 /* xa = copysign (xa, operand1) */
28336 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28338 /* generate 1.0 or -1.0 */
28339 one = force_reg (mode,
28340 const_double_from_real_value (do_floor
28341 ? dconst1 : dconstm1, mode));
28343 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28344 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28345 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28346 gen_rtx_AND (mode, one, tmp)));
28347 /* We always need to subtract here to preserve signed zero. */
28348 tmp = expand_simple_binop (mode, MINUS,
28349 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28350 emit_move_insn (res, tmp);
28352 emit_label (label);
28353 LABEL_NUSES (label) = 1;
28355 emit_move_insn (operand0, res);
28358 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28361 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28363 /* C code for the stuff we expand below.
28364 double xa = fabs (x), x2;
28365 if (!isless (xa, TWO52))
28367 x2 = (double)(long)x;
28374 if (HONOR_SIGNED_ZEROS (mode))
28375 return copysign (x2, x);
28378 enum machine_mode mode = GET_MODE (operand0);
28379 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28381 TWO52 = ix86_gen_TWO52 (mode);
28383 /* Temporary for holding the result, initialized to the input
28384 operand to ease control flow. */
28385 res = gen_reg_rtx (mode);
28386 emit_move_insn (res, operand1);
28388 /* xa = abs (operand1) */
28389 xa = ix86_expand_sse_fabs (res, &mask);
28391 /* if (!isless (xa, TWO52)) goto label; */
28392 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28394 /* xa = (double)(long)x */
28395 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28396 expand_fix (xi, res, 0);
28397 expand_float (xa, xi, 0);
28400 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28402 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28403 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28404 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28405 gen_rtx_AND (mode, one, tmp)));
28406 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28407 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28408 emit_move_insn (res, tmp);
28410 if (HONOR_SIGNED_ZEROS (mode))
28411 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28413 emit_label (label);
28414 LABEL_NUSES (label) = 1;
28416 emit_move_insn (operand0, res);
28419 /* Expand SSE sequence for computing round from OPERAND1 storing
28420 into OPERAND0. Sequence that works without relying on DImode truncation
28421 via cvttsd2siq that is only available on 64bit targets. */
28423 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28425 /* C code for the stuff we expand below.
28426 double xa = fabs (x), xa2, x2;
28427 if (!isless (xa, TWO52))
28429 Using the absolute value and copying back sign makes
28430 -0.0 -> -0.0 correct.
28431 xa2 = xa + TWO52 - TWO52;
28436 else if (dxa > 0.5)
28438 x2 = copysign (xa2, x);
28441 enum machine_mode mode = GET_MODE (operand0);
28442 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28444 TWO52 = ix86_gen_TWO52 (mode);
28446 /* Temporary for holding the result, initialized to the input
28447 operand to ease control flow. */
28448 res = gen_reg_rtx (mode);
28449 emit_move_insn (res, operand1);
28451 /* xa = abs (operand1) */
28452 xa = ix86_expand_sse_fabs (res, &mask);
28454 /* if (!isless (xa, TWO52)) goto label; */
28455 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28457 /* xa2 = xa + TWO52 - TWO52; */
28458 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28459 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28461 /* dxa = xa2 - xa; */
28462 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28464 /* generate 0.5, 1.0 and -0.5 */
28465 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28466 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28467 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28471 tmp = gen_reg_rtx (mode);
28472 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28473 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28474 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28475 gen_rtx_AND (mode, one, tmp)));
28476 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28477 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28478 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28479 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28480 gen_rtx_AND (mode, one, tmp)));
28481 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28483 /* res = copysign (xa2, operand1) */
28484 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28486 emit_label (label);
28487 LABEL_NUSES (label) = 1;
28489 emit_move_insn (operand0, res);
28492 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28495 ix86_expand_trunc (rtx operand0, rtx operand1)
28497 /* C code for SSE variant we expand below.
28498 double xa = fabs (x), x2;
28499 if (!isless (xa, TWO52))
28501 x2 = (double)(long)x;
28502 if (HONOR_SIGNED_ZEROS (mode))
28503 return copysign (x2, x);
28506 enum machine_mode mode = GET_MODE (operand0);
28507 rtx xa, xi, TWO52, label, res, mask;
28509 TWO52 = ix86_gen_TWO52 (mode);
28511 /* Temporary for holding the result, initialized to the input
28512 operand to ease control flow. */
28513 res = gen_reg_rtx (mode);
28514 emit_move_insn (res, operand1);
28516 /* xa = abs (operand1) */
28517 xa = ix86_expand_sse_fabs (res, &mask);
28519 /* if (!isless (xa, TWO52)) goto label; */
28520 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28522 /* x = (double)(long)x */
28523 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28524 expand_fix (xi, res, 0);
28525 expand_float (res, xi, 0);
28527 if (HONOR_SIGNED_ZEROS (mode))
28528 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28530 emit_label (label);
28531 LABEL_NUSES (label) = 1;
28533 emit_move_insn (operand0, res);
28536 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28539 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28541 enum machine_mode mode = GET_MODE (operand0);
28542 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28544 /* C code for SSE variant we expand below.
28545 double xa = fabs (x), x2;
28546 if (!isless (xa, TWO52))
28548 xa2 = xa + TWO52 - TWO52;
28552 x2 = copysign (xa2, x);
28556 TWO52 = ix86_gen_TWO52 (mode);
28558 /* Temporary for holding the result, initialized to the input
28559 operand to ease control flow. */
28560 res = gen_reg_rtx (mode);
28561 emit_move_insn (res, operand1);
28563 /* xa = abs (operand1) */
28564 xa = ix86_expand_sse_fabs (res, &smask);
28566 /* if (!isless (xa, TWO52)) goto label; */
28567 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28569 /* res = xa + TWO52 - TWO52; */
28570 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28571 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28572 emit_move_insn (res, tmp);
28575 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28577 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28578 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28579 emit_insn (gen_rtx_SET (VOIDmode, mask,
28580 gen_rtx_AND (mode, mask, one)));
28581 tmp = expand_simple_binop (mode, MINUS,
28582 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28583 emit_move_insn (res, tmp);
28585 /* res = copysign (res, operand1) */
28586 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28588 emit_label (label);
28589 LABEL_NUSES (label) = 1;
28591 emit_move_insn (operand0, res);
28594 /* Expand SSE sequence for computing round from OPERAND1 storing
28597 ix86_expand_round (rtx operand0, rtx operand1)
28599 /* C code for the stuff we're doing below:
28600 double xa = fabs (x);
28601 if (!isless (xa, TWO52))
28603 xa = (double)(long)(xa + nextafter (0.5, 0.0));
28604 return copysign (xa, x);
28606 enum machine_mode mode = GET_MODE (operand0);
28607 rtx res, TWO52, xa, label, xi, half, mask;
28608 const struct real_format *fmt;
28609 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28611 /* Temporary for holding the result, initialized to the input
28612 operand to ease control flow. */
28613 res = gen_reg_rtx (mode);
28614 emit_move_insn (res, operand1);
28616 TWO52 = ix86_gen_TWO52 (mode);
28617 xa = ix86_expand_sse_fabs (res, &mask);
28618 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28620 /* load nextafter (0.5, 0.0) */
28621 fmt = REAL_MODE_FORMAT (mode);
28622 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28623 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28625 /* xa = xa + 0.5 */
28626 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28627 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28629 /* xa = (double)(int64_t)xa */
28630 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28631 expand_fix (xi, xa, 0);
28632 expand_float (xa, xi, 0);
28634 /* res = copysign (xa, operand1) */
28635 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28637 emit_label (label);
28638 LABEL_NUSES (label) = 1;
28640 emit_move_insn (operand0, res);
28644 /* Validate whether a SSE5 instruction is valid or not.
28645 OPERANDS is the array of operands.
28646 NUM is the number of operands.
28647 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
28648 NUM_MEMORY is the maximum number of memory operands to accept.
28649 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
28652 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
28653 bool uses_oc0, int num_memory, bool commutative)
28659 /* Count the number of memory arguments */
28662 for (i = 0; i < num; i++)
28664 enum machine_mode mode = GET_MODE (operands[i]);
28665 if (register_operand (operands[i], mode))
28668 else if (memory_operand (operands[i], mode))
28670 mem_mask |= (1 << i);
28676 rtx pattern = PATTERN (insn);
28678 /* allow 0 for pcmov */
28679 if (GET_CODE (pattern) != SET
28680 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
28682 || operands[i] != CONST0_RTX (mode))
28687 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
28688 a memory operation. */
28689 if (num_memory < 0)
28691 num_memory = -num_memory;
28692 if ((mem_mask & (1 << (num-1))) != 0)
28694 mem_mask &= ~(1 << (num-1));
28699 /* If there were no memory operations, allow the insn */
28703 /* Do not allow the destination register to be a memory operand. */
28704 else if (mem_mask & (1 << 0))
28707 /* If there are too many memory operations, disallow the instruction. While
28708 the hardware only allows 1 memory reference, before register allocation
28709 for some insns, we allow two memory operations sometimes in order to allow
28710 code like the following to be optimized:
28712 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
28714 or similar cases that are vectorized into using the fmaddss
28716 else if (mem_count > num_memory)
28719 /* Don't allow more than one memory operation if not optimizing. */
28720 else if (mem_count > 1 && !optimize)
28723 else if (num == 4 && mem_count == 1)
28725 /* formats (destination is the first argument), example fmaddss:
28726 xmm1, xmm1, xmm2, xmm3/mem
28727 xmm1, xmm1, xmm2/mem, xmm3
28728 xmm1, xmm2, xmm3/mem, xmm1
28729 xmm1, xmm2/mem, xmm3, xmm1 */
28731 return ((mem_mask == (1 << 1))
28732 || (mem_mask == (1 << 2))
28733 || (mem_mask == (1 << 3)));
28735 /* format, example pmacsdd:
28736 xmm1, xmm2, xmm3/mem, xmm1 */
28738 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
28740 return (mem_mask == (1 << 2));
28743 else if (num == 4 && num_memory == 2)
28745 /* If there are two memory operations, we can load one of the memory ops
28746 into the destination register. This is for optimizing the
28747 multiply/add ops, which the combiner has optimized both the multiply
28748 and the add insns to have a memory operation. We have to be careful
28749 that the destination doesn't overlap with the inputs. */
28750 rtx op0 = operands[0];
28752 if (reg_mentioned_p (op0, operands[1])
28753 || reg_mentioned_p (op0, operands[2])
28754 || reg_mentioned_p (op0, operands[3]))
28757 /* formats (destination is the first argument), example fmaddss:
28758 xmm1, xmm1, xmm2, xmm3/mem
28759 xmm1, xmm1, xmm2/mem, xmm3
28760 xmm1, xmm2, xmm3/mem, xmm1
28761 xmm1, xmm2/mem, xmm3, xmm1
28763 For the oc0 case, we will load either operands[1] or operands[3] into
28764 operands[0], so any combination of 2 memory operands is ok. */
28768 /* format, example pmacsdd:
28769 xmm1, xmm2, xmm3/mem, xmm1
28771 For the integer multiply/add instructions be more restrictive and
28772 require operands[2] and operands[3] to be the memory operands. */
28774 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
28776 return (mem_mask == ((1 << 2) | (1 << 3)));
28779 else if (num == 3 && num_memory == 1)
28781 /* formats, example protb:
28782 xmm1, xmm2, xmm3/mem
28783 xmm1, xmm2/mem, xmm3 */
28785 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
28787 /* format, example comeq:
28788 xmm1, xmm2, xmm3/mem */
28790 return (mem_mask == (1 << 2));
28794 gcc_unreachable ();
28800 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
28801 hardware will allow by using the destination register to load one of the
28802 memory operations. Presently this is used by the multiply/add routines to
28803 allow 2 memory references. */
28806 ix86_expand_sse5_multiple_memory (rtx operands[],
28808 enum machine_mode mode)
28810 rtx op0 = operands[0];
28812 || memory_operand (op0, mode)
28813 || reg_mentioned_p (op0, operands[1])
28814 || reg_mentioned_p (op0, operands[2])
28815 || reg_mentioned_p (op0, operands[3]))
28816 gcc_unreachable ();
28818 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
28819 the destination register. */
28820 if (memory_operand (operands[1], mode))
28822 emit_move_insn (op0, operands[1]);
28825 else if (memory_operand (operands[3], mode))
28827 emit_move_insn (op0, operands[3]);
28831 gcc_unreachable ();
28837 /* Table of valid machine attributes. */
28838 static const struct attribute_spec ix86_attribute_table[] =
28840 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28841 /* Stdcall attribute says callee is responsible for popping arguments
28842 if they are not variable. */
28843 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28844 /* Fastcall attribute says callee is responsible for popping arguments
28845 if they are not variable. */
28846 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28847 /* Cdecl attribute says the callee is a normal C declaration */
28848 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28849 /* Regparm attribute specifies how many integer arguments are to be
28850 passed in registers. */
28851 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
28852 /* Sseregparm attribute says we are using x86_64 calling conventions
28853 for FP arguments. */
28854 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28855 /* force_align_arg_pointer says this function realigns the stack at entry. */
28856 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28857 false, true, true, ix86_handle_cconv_attribute },
28858 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28859 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28860 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28861 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
28863 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28864 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28865 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28866 SUBTARGET_ATTRIBUTE_TABLE,
28868 /* ms_abi and sysv_abi calling convention function attributes. */
28869 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28870 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28872 { NULL, 0, 0, false, false, false, NULL }
28875 /* Implement targetm.vectorize.builtin_vectorization_cost. */
28877 x86_builtin_vectorization_cost (bool runtime_test)
28879 /* If the branch of the runtime test is taken - i.e. - the vectorized
28880 version is skipped - this incurs a misprediction cost (because the
28881 vectorized version is expected to be the fall-through). So we subtract
28882 the latency of a mispredicted branch from the costs that are incured
28883 when the vectorized version is executed.
28885 TODO: The values in individual target tables have to be tuned or new
28886 fields may be needed. For eg. on K8, the default branch path is the
28887 not-taken path. If the taken path is predicted correctly, the minimum
28888 penalty of going down the taken-path is 1 cycle. If the taken-path is
28889 not predicted correctly, then the minimum penalty is 10 cycles. */
28893 return (-(ix86_cost->cond_taken_branch_cost));
28899 /* This function returns the calling abi specific va_list type node.
28900 It returns the FNDECL specific va_list type. */
28903 ix86_fn_abi_va_list (tree fndecl)
28908 return va_list_type_node;
28909 gcc_assert (fndecl != NULL_TREE);
28910 abi = ix86_function_abi ((const_tree) fndecl);
28913 return ms_va_list_type_node;
28915 return sysv_va_list_type_node;
28918 /* Returns the canonical va_list type specified by TYPE. If there
28919 is no valid TYPE provided, it return NULL_TREE. */
28922 ix86_canonical_va_list_type (tree type)
28926 /* Resolve references and pointers to va_list type. */
28927 if (INDIRECT_REF_P (type))
28928 type = TREE_TYPE (type);
28929 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
28930 type = TREE_TYPE (type);
28934 wtype = va_list_type_node;
28935 gcc_assert (wtype != NULL_TREE);
28937 if (TREE_CODE (wtype) == ARRAY_TYPE)
28939 /* If va_list is an array type, the argument may have decayed
28940 to a pointer type, e.g. by being passed to another function.
28941 In that case, unwrap both types so that we can compare the
28942 underlying records. */
28943 if (TREE_CODE (htype) == ARRAY_TYPE
28944 || POINTER_TYPE_P (htype))
28946 wtype = TREE_TYPE (wtype);
28947 htype = TREE_TYPE (htype);
28950 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28951 return va_list_type_node;
28952 wtype = sysv_va_list_type_node;
28953 gcc_assert (wtype != NULL_TREE);
28955 if (TREE_CODE (wtype) == ARRAY_TYPE)
28957 /* If va_list is an array type, the argument may have decayed
28958 to a pointer type, e.g. by being passed to another function.
28959 In that case, unwrap both types so that we can compare the
28960 underlying records. */
28961 if (TREE_CODE (htype) == ARRAY_TYPE
28962 || POINTER_TYPE_P (htype))
28964 wtype = TREE_TYPE (wtype);
28965 htype = TREE_TYPE (htype);
28968 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28969 return sysv_va_list_type_node;
28970 wtype = ms_va_list_type_node;
28971 gcc_assert (wtype != NULL_TREE);
28973 if (TREE_CODE (wtype) == ARRAY_TYPE)
28975 /* If va_list is an array type, the argument may have decayed
28976 to a pointer type, e.g. by being passed to another function.
28977 In that case, unwrap both types so that we can compare the
28978 underlying records. */
28979 if (TREE_CODE (htype) == ARRAY_TYPE
28980 || POINTER_TYPE_P (htype))
28982 wtype = TREE_TYPE (wtype);
28983 htype = TREE_TYPE (htype);
28986 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28987 return ms_va_list_type_node;
28990 return std_canonical_va_list_type (type);
28993 /* Iterate through the target-specific builtin types for va_list.
28994 IDX denotes the iterator, *PTREE is set to the result type of
28995 the va_list builtin, and *PNAME to its internal type.
28996 Returns zero if there is no element for this index, otherwise
28997 IDX should be increased upon the next call.
28998 Note, do not iterate a base builtin's name like __builtin_va_list.
28999 Used from c_common_nodes_and_builtins. */
29002 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29008 *ptree = ms_va_list_type_node;
29009 *pname = "__builtin_ms_va_list";
29012 *ptree = sysv_va_list_type_node;
29013 *pname = "__builtin_sysv_va_list";
29021 /* Initialize the GCC target structure. */
29022 #undef TARGET_RETURN_IN_MEMORY
29023 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29025 #undef TARGET_ATTRIBUTE_TABLE
29026 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29027 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29028 # undef TARGET_MERGE_DECL_ATTRIBUTES
29029 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29032 #undef TARGET_COMP_TYPE_ATTRIBUTES
29033 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29035 #undef TARGET_INIT_BUILTINS
29036 #define TARGET_INIT_BUILTINS ix86_init_builtins
29037 #undef TARGET_EXPAND_BUILTIN
29038 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29040 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29041 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29042 ix86_builtin_vectorized_function
29044 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29045 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29047 #undef TARGET_BUILTIN_RECIPROCAL
29048 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29050 #undef TARGET_ASM_FUNCTION_EPILOGUE
29051 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29053 #undef TARGET_ENCODE_SECTION_INFO
29054 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29055 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29057 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29060 #undef TARGET_ASM_OPEN_PAREN
29061 #define TARGET_ASM_OPEN_PAREN ""
29062 #undef TARGET_ASM_CLOSE_PAREN
29063 #define TARGET_ASM_CLOSE_PAREN ""
29065 #undef TARGET_ASM_ALIGNED_HI_OP
29066 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29067 #undef TARGET_ASM_ALIGNED_SI_OP
29068 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29070 #undef TARGET_ASM_ALIGNED_DI_OP
29071 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29074 #undef TARGET_ASM_UNALIGNED_HI_OP
29075 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29076 #undef TARGET_ASM_UNALIGNED_SI_OP
29077 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29078 #undef TARGET_ASM_UNALIGNED_DI_OP
29079 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29081 #undef TARGET_SCHED_ADJUST_COST
29082 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29083 #undef TARGET_SCHED_ISSUE_RATE
29084 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29085 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29086 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29087 ia32_multipass_dfa_lookahead
29089 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29090 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29093 #undef TARGET_HAVE_TLS
29094 #define TARGET_HAVE_TLS true
29096 #undef TARGET_CANNOT_FORCE_CONST_MEM
29097 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29098 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29099 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29101 #undef TARGET_DELEGITIMIZE_ADDRESS
29102 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29104 #undef TARGET_MS_BITFIELD_LAYOUT_P
29105 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29108 #undef TARGET_BINDS_LOCAL_P
29109 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29111 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29112 #undef TARGET_BINDS_LOCAL_P
29113 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29116 #undef TARGET_ASM_OUTPUT_MI_THUNK
29117 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29118 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29119 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29121 #undef TARGET_ASM_FILE_START
29122 #define TARGET_ASM_FILE_START x86_file_start
29124 #undef TARGET_DEFAULT_TARGET_FLAGS
29125 #define TARGET_DEFAULT_TARGET_FLAGS \
29127 | TARGET_SUBTARGET_DEFAULT \
29128 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29130 #undef TARGET_HANDLE_OPTION
29131 #define TARGET_HANDLE_OPTION ix86_handle_option
29133 #undef TARGET_RTX_COSTS
29134 #define TARGET_RTX_COSTS ix86_rtx_costs
29135 #undef TARGET_ADDRESS_COST
29136 #define TARGET_ADDRESS_COST ix86_address_cost
29138 #undef TARGET_FIXED_CONDITION_CODE_REGS
29139 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29140 #undef TARGET_CC_MODES_COMPATIBLE
29141 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29143 #undef TARGET_MACHINE_DEPENDENT_REORG
29144 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29146 #undef TARGET_BUILD_BUILTIN_VA_LIST
29147 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29149 #undef TARGET_FN_ABI_VA_LIST
29150 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29152 #undef TARGET_CANONICAL_VA_LIST_TYPE
29153 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29155 #undef TARGET_EXPAND_BUILTIN_VA_START
29156 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29158 #undef TARGET_MD_ASM_CLOBBERS
29159 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29161 #undef TARGET_PROMOTE_PROTOTYPES
29162 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29163 #undef TARGET_STRUCT_VALUE_RTX
29164 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29165 #undef TARGET_SETUP_INCOMING_VARARGS
29166 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29167 #undef TARGET_MUST_PASS_IN_STACK
29168 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29169 #undef TARGET_PASS_BY_REFERENCE
29170 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29171 #undef TARGET_INTERNAL_ARG_POINTER
29172 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29173 #undef TARGET_UPDATE_STACK_BOUNDARY
29174 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29175 #undef TARGET_GET_DRAP_RTX
29176 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29177 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29178 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29179 #undef TARGET_STRICT_ARGUMENT_NAMING
29180 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29182 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29183 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29185 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29186 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29188 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29189 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29191 #undef TARGET_C_MODE_FOR_SUFFIX
29192 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29195 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29196 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29199 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29200 #undef TARGET_INSERT_ATTRIBUTES
29201 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29204 #undef TARGET_MANGLE_TYPE
29205 #define TARGET_MANGLE_TYPE ix86_mangle_type
29207 #undef TARGET_STACK_PROTECT_FAIL
29208 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29210 #undef TARGET_FUNCTION_VALUE
29211 #define TARGET_FUNCTION_VALUE ix86_function_value
29213 #undef TARGET_SECONDARY_RELOAD
29214 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29216 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29217 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29219 #undef TARGET_SET_CURRENT_FUNCTION
29220 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29222 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29223 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29225 #undef TARGET_OPTION_SAVE
29226 #define TARGET_OPTION_SAVE ix86_function_specific_save
29228 #undef TARGET_OPTION_RESTORE
29229 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29231 #undef TARGET_OPTION_PRINT
29232 #define TARGET_OPTION_PRINT ix86_function_specific_print
29234 #undef TARGET_OPTION_CAN_INLINE_P
29235 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29237 #undef TARGET_EXPAND_TO_RTL_HOOK
29238 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
29240 struct gcc_target targetm = TARGET_INITIALIZER;
29242 #include "gt-i386.h"