1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1968 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1969 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1970 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 /* Define a set of ISAs which aren't available when a given ISA is
1973 disabled. MMX and SSE ISAs are handled separately. */
1975 #define OPTION_MASK_ISA_MMX_UNSET \
1976 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1977 #define OPTION_MASK_ISA_3DNOW_UNSET \
1978 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1981 #define OPTION_MASK_ISA_SSE_UNSET \
1982 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1983 #define OPTION_MASK_ISA_SSE2_UNSET \
1984 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1985 #define OPTION_MASK_ISA_SSE3_UNSET \
1986 (OPTION_MASK_ISA_SSE3 \
1987 | OPTION_MASK_ISA_SSSE3_UNSET \
1988 | OPTION_MASK_ISA_SSE4A_UNSET )
1989 #define OPTION_MASK_ISA_SSSE3_UNSET \
1990 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1991 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1992 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1994 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1995 #define OPTION_MASK_ISA_AVX_UNSET \
1996 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1997 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1999 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2001 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2003 #define OPTION_MASK_ISA_SSE4A_UNSET \
2004 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2005 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2006 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2007 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2008 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2009 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2010 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2011 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2013 /* Vectorization library interface and handlers. */
2014 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2015 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2016 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2018 /* Processor target table, indexed by processor number */
2021 const struct processor_costs *cost; /* Processor costs */
2022 const int align_loop; /* Default alignments. */
2023 const int align_loop_max_skip;
2024 const int align_jump;
2025 const int align_jump_max_skip;
2026 const int align_func;
2029 static const struct ptt processor_target_table[PROCESSOR_max] =
2031 {&i386_cost, 4, 3, 4, 3, 4},
2032 {&i486_cost, 16, 15, 16, 15, 16},
2033 {&pentium_cost, 16, 7, 16, 7, 16},
2034 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2035 {&geode_cost, 0, 0, 0, 0, 0},
2036 {&k6_cost, 32, 7, 32, 7, 32},
2037 {&athlon_cost, 16, 7, 16, 7, 16},
2038 {&pentium4_cost, 0, 0, 0, 0, 0},
2039 {&k8_cost, 16, 7, 16, 7, 16},
2040 {&nocona_cost, 0, 0, 0, 0, 0},
2041 {&core2_cost, 16, 10, 16, 10, 16},
2042 {&generic32_cost, 16, 7, 16, 7, 16},
2043 {&generic64_cost, 16, 10, 16, 10, 16},
2044 {&amdfam10_cost, 32, 24, 32, 7, 32},
2045 {&atom_cost, 16, 7, 16, 7, 16}
2048 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2074 /* Implement TARGET_HANDLE_OPTION. */
2077 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2084 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2085 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2089 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2097 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2098 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2102 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2113 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2114 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2118 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2126 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2127 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2131 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2139 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2140 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2144 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2152 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2153 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2157 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2166 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2170 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2178 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2179 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2183 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2191 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2192 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2196 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2204 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2205 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2209 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2216 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2220 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2240 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2245 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2253 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2254 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2258 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2266 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2267 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2271 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2279 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2280 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2284 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2292 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2293 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2297 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2305 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2306 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2310 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2318 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2319 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2323 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2333 /* Return a string the documents the current -m options. The caller is
2334 responsible for freeing the string. */
2337 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2338 const char *fpmath, bool add_nl_p)
2340 struct ix86_target_opts
2342 const char *option; /* option string */
2343 int mask; /* isa mask options */
2346 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2347 preceding options while match those first. */
2348 static struct ix86_target_opts isa_opts[] =
2350 { "-m64", OPTION_MASK_ISA_64BIT },
2351 { "-msse5", OPTION_MASK_ISA_SSE5 },
2352 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2353 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2354 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2355 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2356 { "-msse3", OPTION_MASK_ISA_SSE3 },
2357 { "-msse2", OPTION_MASK_ISA_SSE2 },
2358 { "-msse", OPTION_MASK_ISA_SSE },
2359 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2360 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2361 { "-mmmx", OPTION_MASK_ISA_MMX },
2362 { "-mabm", OPTION_MASK_ISA_ABM },
2363 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2364 { "-maes", OPTION_MASK_ISA_AES },
2365 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2369 static struct ix86_target_opts flag_opts[] =
2371 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2372 { "-m80387", MASK_80387 },
2373 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2374 { "-malign-double", MASK_ALIGN_DOUBLE },
2375 { "-mcld", MASK_CLD },
2376 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2377 { "-mieee-fp", MASK_IEEE_FP },
2378 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2379 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2380 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2381 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2382 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2383 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2384 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2385 { "-mno-red-zone", MASK_NO_RED_ZONE },
2386 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2387 { "-mrecip", MASK_RECIP },
2388 { "-mrtd", MASK_RTD },
2389 { "-msseregparm", MASK_SSEREGPARM },
2390 { "-mstack-arg-probe", MASK_STACK_PROBE },
2391 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2394 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2397 char target_other[40];
2406 memset (opts, '\0', sizeof (opts));
2408 /* Add -march= option. */
2411 opts[num][0] = "-march=";
2412 opts[num++][1] = arch;
2415 /* Add -mtune= option. */
2418 opts[num][0] = "-mtune=";
2419 opts[num++][1] = tune;
2422 /* Pick out the options in isa options. */
2423 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2425 if ((isa & isa_opts[i].mask) != 0)
2427 opts[num++][0] = isa_opts[i].option;
2428 isa &= ~ isa_opts[i].mask;
2432 if (isa && add_nl_p)
2434 opts[num++][0] = isa_other;
2435 sprintf (isa_other, "(other isa: 0x%x)", isa);
2438 /* Add flag options. */
2439 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2441 if ((flags & flag_opts[i].mask) != 0)
2443 opts[num++][0] = flag_opts[i].option;
2444 flags &= ~ flag_opts[i].mask;
2448 if (flags && add_nl_p)
2450 opts[num++][0] = target_other;
2451 sprintf (target_other, "(other flags: 0x%x)", isa);
2454 /* Add -fpmath= option. */
2457 opts[num][0] = "-mfpmath=";
2458 opts[num++][1] = fpmath;
2465 gcc_assert (num < ARRAY_SIZE (opts));
2467 /* Size the string. */
2469 sep_len = (add_nl_p) ? 3 : 1;
2470 for (i = 0; i < num; i++)
2473 for (j = 0; j < 2; j++)
2475 len += strlen (opts[i][j]);
2478 /* Build the string. */
2479 ret = ptr = (char *) xmalloc (len);
2482 for (i = 0; i < num; i++)
2486 for (j = 0; j < 2; j++)
2487 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2494 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2502 for (j = 0; j < 2; j++)
2505 memcpy (ptr, opts[i][j], len2[j]);
2507 line_len += len2[j];
2512 gcc_assert (ret + len >= ptr);
2517 /* Function that is callable from the debugger to print the current
2520 ix86_debug_options (void)
2522 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2523 ix86_arch_string, ix86_tune_string,
2524 ix86_fpmath_string, true);
2528 fprintf (stderr, "%s\n\n", opts);
2532 fprintf (stderr, "<no options>\n\n");
2537 /* Sometimes certain combinations of command options do not make
2538 sense on a particular target machine. You can define a macro
2539 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2540 defined, is executed once just after all the command options have
2543 Don't use this macro to turn on various extra optimizations for
2544 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2547 override_options (bool main_args_p)
2550 unsigned int ix86_arch_mask, ix86_tune_mask;
2555 /* Comes from final.c -- no real reason to change it. */
2556 #define MAX_CODE_ALIGN 16
2564 PTA_PREFETCH_SSE = 1 << 4,
2566 PTA_3DNOW_A = 1 << 6,
2570 PTA_POPCNT = 1 << 10,
2572 PTA_SSE4A = 1 << 12,
2573 PTA_NO_SAHF = 1 << 13,
2574 PTA_SSE4_1 = 1 << 14,
2575 PTA_SSE4_2 = 1 << 15,
2578 PTA_PCLMUL = 1 << 18,
2585 const char *const name; /* processor name or nickname. */
2586 const enum processor_type processor;
2587 const enum attr_cpu schedule;
2588 const unsigned /*enum pta_flags*/ flags;
2590 const processor_alias_table[] =
2592 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2593 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2594 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2595 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2596 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2597 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2598 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2599 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2600 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2601 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2602 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2603 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2604 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2606 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2608 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2609 PTA_MMX | PTA_SSE | PTA_SSE2},
2610 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2611 PTA_MMX |PTA_SSE | PTA_SSE2},
2612 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2613 PTA_MMX | PTA_SSE | PTA_SSE2},
2614 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2615 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2616 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2617 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2618 | PTA_CX16 | PTA_NO_SAHF},
2619 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2620 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2621 | PTA_SSSE3 | PTA_CX16},
2622 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2623 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2624 | PTA_SSSE3 | PTA_CX16},
2625 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2626 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2627 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2628 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2629 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2630 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2631 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2632 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2633 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2634 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2635 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2636 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2637 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2638 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2639 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2640 {"x86-64", PROCESSOR_K8, CPU_K8,
2641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2642 {"k8", PROCESSOR_K8, CPU_K8,
2643 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2644 | PTA_SSE2 | PTA_NO_SAHF},
2645 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2646 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2647 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2648 {"opteron", PROCESSOR_K8, CPU_K8,
2649 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2650 | PTA_SSE2 | PTA_NO_SAHF},
2651 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2652 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2653 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2654 {"athlon64", PROCESSOR_K8, CPU_K8,
2655 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2656 | PTA_SSE2 | PTA_NO_SAHF},
2657 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2658 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2659 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2660 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2661 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2662 | PTA_SSE2 | PTA_NO_SAHF},
2663 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2664 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2665 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2666 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2667 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2668 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2669 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2670 0 /* flags are only used for -march switch. */ },
2671 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2672 PTA_64BIT /* flags are only used for -march switch. */ },
2675 int const pta_size = ARRAY_SIZE (processor_alias_table);
2677 /* Set up prefix/suffix so the error messages refer to either the command
2678 line argument, or the attribute(target). */
2687 prefix = "option(\"";
2692 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2693 SUBTARGET_OVERRIDE_OPTIONS;
2696 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2697 SUBSUBTARGET_OVERRIDE_OPTIONS;
2700 /* -fPIC is the default for x86_64. */
2701 if (TARGET_MACHO && TARGET_64BIT)
2704 /* Set the default values for switches whose default depends on TARGET_64BIT
2705 in case they weren't overwritten by command line options. */
2708 /* Mach-O doesn't support omitting the frame pointer for now. */
2709 if (flag_omit_frame_pointer == 2)
2710 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2711 if (flag_asynchronous_unwind_tables == 2)
2712 flag_asynchronous_unwind_tables = 1;
2713 if (flag_pcc_struct_return == 2)
2714 flag_pcc_struct_return = 0;
2718 if (flag_omit_frame_pointer == 2)
2719 flag_omit_frame_pointer = 0;
2720 if (flag_asynchronous_unwind_tables == 2)
2721 flag_asynchronous_unwind_tables = 0;
2722 if (flag_pcc_struct_return == 2)
2723 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2726 /* Need to check -mtune=generic first. */
2727 if (ix86_tune_string)
2729 if (!strcmp (ix86_tune_string, "generic")
2730 || !strcmp (ix86_tune_string, "i686")
2731 /* As special support for cross compilers we read -mtune=native
2732 as -mtune=generic. With native compilers we won't see the
2733 -mtune=native, as it was changed by the driver. */
2734 || !strcmp (ix86_tune_string, "native"))
2737 ix86_tune_string = "generic64";
2739 ix86_tune_string = "generic32";
2741 /* If this call is for setting the option attribute, allow the
2742 generic32/generic64 that was previously set. */
2743 else if (!main_args_p
2744 && (!strcmp (ix86_tune_string, "generic32")
2745 || !strcmp (ix86_tune_string, "generic64")))
2747 else if (!strncmp (ix86_tune_string, "generic", 7))
2748 error ("bad value (%s) for %stune=%s %s",
2749 ix86_tune_string, prefix, suffix, sw);
2753 if (ix86_arch_string)
2754 ix86_tune_string = ix86_arch_string;
2755 if (!ix86_tune_string)
2757 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2758 ix86_tune_defaulted = 1;
2761 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2762 need to use a sensible tune option. */
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "x86-64")
2765 || !strcmp (ix86_tune_string, "i686"))
2768 ix86_tune_string = "generic64";
2770 ix86_tune_string = "generic32";
2773 if (ix86_stringop_string)
2775 if (!strcmp (ix86_stringop_string, "rep_byte"))
2776 stringop_alg = rep_prefix_1_byte;
2777 else if (!strcmp (ix86_stringop_string, "libcall"))
2778 stringop_alg = libcall;
2779 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2780 stringop_alg = rep_prefix_4_byte;
2781 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2783 /* rep; movq isn't available in 32-bit code. */
2784 stringop_alg = rep_prefix_8_byte;
2785 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2786 stringop_alg = loop_1_byte;
2787 else if (!strcmp (ix86_stringop_string, "loop"))
2788 stringop_alg = loop;
2789 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2790 stringop_alg = unrolled_loop;
2792 error ("bad value (%s) for %sstringop-strategy=%s %s",
2793 ix86_stringop_string, prefix, suffix, sw);
2795 if (!strcmp (ix86_tune_string, "x86-64"))
2796 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2797 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2798 prefix, suffix, prefix, suffix, prefix, suffix);
2800 if (!ix86_arch_string)
2801 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2803 ix86_arch_specified = 1;
2805 if (!strcmp (ix86_arch_string, "generic"))
2806 error ("generic CPU can be used only for %stune=%s %s",
2807 prefix, suffix, sw);
2808 if (!strncmp (ix86_arch_string, "generic", 7))
2809 error ("bad value (%s) for %sarch=%s %s",
2810 ix86_arch_string, prefix, suffix, sw);
2812 /* Validate -mabi= value. */
2813 if (ix86_abi_string)
2815 if (strcmp (ix86_abi_string, "sysv") == 0)
2816 ix86_abi = SYSV_ABI;
2817 else if (strcmp (ix86_abi_string, "ms") == 0)
2820 error ("unknown ABI (%s) for %sabi=%s %s",
2821 ix86_abi_string, prefix, suffix, sw);
2824 ix86_abi = DEFAULT_ABI;
2826 if (ix86_cmodel_string != 0)
2828 if (!strcmp (ix86_cmodel_string, "small"))
2829 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2830 else if (!strcmp (ix86_cmodel_string, "medium"))
2831 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2832 else if (!strcmp (ix86_cmodel_string, "large"))
2833 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2835 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2836 else if (!strcmp (ix86_cmodel_string, "32"))
2837 ix86_cmodel = CM_32;
2838 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2839 ix86_cmodel = CM_KERNEL;
2841 error ("bad value (%s) for %scmodel=%s %s",
2842 ix86_cmodel_string, prefix, suffix, sw);
2846 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2847 use of rip-relative addressing. This eliminates fixups that
2848 would otherwise be needed if this object is to be placed in a
2849 DLL, and is essentially just as efficient as direct addressing. */
2850 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2851 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2852 else if (TARGET_64BIT)
2853 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2855 ix86_cmodel = CM_32;
2857 if (ix86_asm_string != 0)
2860 && !strcmp (ix86_asm_string, "intel"))
2861 ix86_asm_dialect = ASM_INTEL;
2862 else if (!strcmp (ix86_asm_string, "att"))
2863 ix86_asm_dialect = ASM_ATT;
2865 error ("bad value (%s) for %sasm=%s %s",
2866 ix86_asm_string, prefix, suffix, sw);
2868 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2869 error ("code model %qs not supported in the %s bit mode",
2870 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2871 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2872 sorry ("%i-bit mode not compiled in",
2873 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2875 for (i = 0; i < pta_size; i++)
2876 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2878 ix86_schedule = processor_alias_table[i].schedule;
2879 ix86_arch = processor_alias_table[i].processor;
2880 /* Default cpu tuning to the architecture. */
2881 ix86_tune = ix86_arch;
2883 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2884 error ("CPU you selected does not support x86-64 "
2887 if (processor_alias_table[i].flags & PTA_MMX
2888 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2889 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2890 if (processor_alias_table[i].flags & PTA_3DNOW
2891 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2892 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2893 if (processor_alias_table[i].flags & PTA_3DNOW_A
2894 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2895 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2896 if (processor_alias_table[i].flags & PTA_SSE
2897 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2898 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2899 if (processor_alias_table[i].flags & PTA_SSE2
2900 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2901 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2902 if (processor_alias_table[i].flags & PTA_SSE3
2903 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2904 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2905 if (processor_alias_table[i].flags & PTA_SSSE3
2906 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2907 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2908 if (processor_alias_table[i].flags & PTA_SSE4_1
2909 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2910 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2911 if (processor_alias_table[i].flags & PTA_SSE4_2
2912 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2913 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2914 if (processor_alias_table[i].flags & PTA_AVX
2915 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2916 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2917 if (processor_alias_table[i].flags & PTA_FMA
2918 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2919 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2920 if (processor_alias_table[i].flags & PTA_SSE4A
2921 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2922 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2923 if (processor_alias_table[i].flags & PTA_SSE5
2924 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2925 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2926 if (processor_alias_table[i].flags & PTA_ABM
2927 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2928 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2929 if (processor_alias_table[i].flags & PTA_CX16
2930 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2931 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2932 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2933 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2934 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2935 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2936 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2937 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2938 if (processor_alias_table[i].flags & PTA_AES
2939 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2940 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2941 if (processor_alias_table[i].flags & PTA_PCLMUL
2942 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2943 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2944 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2945 x86_prefetch_sse = true;
2951 error ("bad value (%s) for %sarch=%s %s",
2952 ix86_arch_string, prefix, suffix, sw);
2954 ix86_arch_mask = 1u << ix86_arch;
2955 for (i = 0; i < X86_ARCH_LAST; ++i)
2956 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2958 for (i = 0; i < pta_size; i++)
2959 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2961 ix86_schedule = processor_alias_table[i].schedule;
2962 ix86_tune = processor_alias_table[i].processor;
2963 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2965 if (ix86_tune_defaulted)
2967 ix86_tune_string = "x86-64";
2968 for (i = 0; i < pta_size; i++)
2969 if (! strcmp (ix86_tune_string,
2970 processor_alias_table[i].name))
2972 ix86_schedule = processor_alias_table[i].schedule;
2973 ix86_tune = processor_alias_table[i].processor;
2976 error ("CPU you selected does not support x86-64 "
2979 /* Intel CPUs have always interpreted SSE prefetch instructions as
2980 NOPs; so, we can enable SSE prefetch instructions even when
2981 -mtune (rather than -march) points us to a processor that has them.
2982 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2983 higher processors. */
2985 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2986 x86_prefetch_sse = true;
2990 error ("bad value (%s) for %stune=%s %s",
2991 ix86_tune_string, prefix, suffix, sw);
2993 ix86_tune_mask = 1u << ix86_tune;
2994 for (i = 0; i < X86_TUNE_LAST; ++i)
2995 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2998 ix86_cost = &ix86_size_cost;
3000 ix86_cost = processor_target_table[ix86_tune].cost;
3002 /* Arrange to set up i386_stack_locals for all functions. */
3003 init_machine_status = ix86_init_machine_status;
3005 /* Validate -mregparm= value. */
3006 if (ix86_regparm_string)
3009 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3010 i = atoi (ix86_regparm_string);
3011 if (i < 0 || i > REGPARM_MAX)
3012 error ("%sregparm=%d%s is not between 0 and %d",
3013 prefix, i, suffix, REGPARM_MAX);
3018 ix86_regparm = REGPARM_MAX;
3020 /* If the user has provided any of the -malign-* options,
3021 warn and use that value only if -falign-* is not set.
3022 Remove this code in GCC 3.2 or later. */
3023 if (ix86_align_loops_string)
3025 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3026 prefix, suffix, suffix);
3027 if (align_loops == 0)
3029 i = atoi (ix86_align_loops_string);
3030 if (i < 0 || i > MAX_CODE_ALIGN)
3031 error ("%salign-loops=%d%s is not between 0 and %d",
3032 prefix, i, suffix, MAX_CODE_ALIGN);
3034 align_loops = 1 << i;
3038 if (ix86_align_jumps_string)
3040 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3041 prefix, suffix, suffix);
3042 if (align_jumps == 0)
3044 i = atoi (ix86_align_jumps_string);
3045 if (i < 0 || i > MAX_CODE_ALIGN)
3046 error ("%salign-loops=%d%s is not between 0 and %d",
3047 prefix, i, suffix, MAX_CODE_ALIGN);
3049 align_jumps = 1 << i;
3053 if (ix86_align_funcs_string)
3055 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3056 prefix, suffix, suffix);
3057 if (align_functions == 0)
3059 i = atoi (ix86_align_funcs_string);
3060 if (i < 0 || i > MAX_CODE_ALIGN)
3061 error ("%salign-loops=%d%s is not between 0 and %d",
3062 prefix, i, suffix, MAX_CODE_ALIGN);
3064 align_functions = 1 << i;
3068 /* Default align_* from the processor table. */
3069 if (align_loops == 0)
3071 align_loops = processor_target_table[ix86_tune].align_loop;
3072 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3074 if (align_jumps == 0)
3076 align_jumps = processor_target_table[ix86_tune].align_jump;
3077 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3079 if (align_functions == 0)
3081 align_functions = processor_target_table[ix86_tune].align_func;
3084 /* Validate -mbranch-cost= value, or provide default. */
3085 ix86_branch_cost = ix86_cost->branch_cost;
3086 if (ix86_branch_cost_string)
3088 i = atoi (ix86_branch_cost_string);
3090 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3092 ix86_branch_cost = i;
3094 if (ix86_section_threshold_string)
3096 i = atoi (ix86_section_threshold_string);
3098 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3100 ix86_section_threshold = i;
3103 if (ix86_tls_dialect_string)
3105 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3106 ix86_tls_dialect = TLS_DIALECT_GNU;
3107 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3108 ix86_tls_dialect = TLS_DIALECT_GNU2;
3109 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3110 ix86_tls_dialect = TLS_DIALECT_SUN;
3112 error ("bad value (%s) for %stls-dialect=%s %s",
3113 ix86_tls_dialect_string, prefix, suffix, sw);
3116 if (ix87_precision_string)
3118 i = atoi (ix87_precision_string);
3119 if (i != 32 && i != 64 && i != 80)
3120 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3125 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3127 /* Enable by default the SSE and MMX builtins. Do allow the user to
3128 explicitly disable any of these. In particular, disabling SSE and
3129 MMX for kernel code is extremely useful. */
3130 if (!ix86_arch_specified)
3132 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3133 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3136 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3140 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3142 if (!ix86_arch_specified)
3144 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3146 /* i386 ABI does not specify red zone. It still makes sense to use it
3147 when programmer takes care to stack from being destroyed. */
3148 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3149 target_flags |= MASK_NO_RED_ZONE;
3152 /* Keep nonleaf frame pointers. */
3153 if (flag_omit_frame_pointer)
3154 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3155 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3156 flag_omit_frame_pointer = 1;
3158 /* If we're doing fast math, we don't care about comparison order
3159 wrt NaNs. This lets us use a shorter comparison sequence. */
3160 if (flag_finite_math_only)
3161 target_flags &= ~MASK_IEEE_FP;
3163 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3164 since the insns won't need emulation. */
3165 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3166 target_flags &= ~MASK_NO_FANCY_MATH_387;
3168 /* Likewise, if the target doesn't have a 387, or we've specified
3169 software floating point, don't use 387 inline intrinsics. */
3171 target_flags |= MASK_NO_FANCY_MATH_387;
3173 /* Turn on MMX builtins for -msse. */
3176 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3177 x86_prefetch_sse = true;
3180 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3181 if (TARGET_SSE4_2 || TARGET_ABM)
3182 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3184 /* Validate -mpreferred-stack-boundary= value or default it to
3185 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3186 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3187 if (ix86_preferred_stack_boundary_string)
3189 i = atoi (ix86_preferred_stack_boundary_string);
3190 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3191 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3192 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3194 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3197 /* Set the default value for -mstackrealign. */
3198 if (ix86_force_align_arg_pointer == -1)
3199 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3201 /* Validate -mincoming-stack-boundary= value or default it to
3202 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3203 if (ix86_force_align_arg_pointer)
3204 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3206 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3207 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3208 if (ix86_incoming_stack_boundary_string)
3210 i = atoi (ix86_incoming_stack_boundary_string);
3211 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3212 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3213 i, TARGET_64BIT ? 4 : 2);
3216 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3217 ix86_incoming_stack_boundary
3218 = ix86_user_incoming_stack_boundary;
3222 /* Accept -msseregparm only if at least SSE support is enabled. */
3223 if (TARGET_SSEREGPARM
3225 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3227 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3228 if (ix86_fpmath_string != 0)
3230 if (! strcmp (ix86_fpmath_string, "387"))
3231 ix86_fpmath = FPMATH_387;
3232 else if (! strcmp (ix86_fpmath_string, "sse"))
3236 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3237 ix86_fpmath = FPMATH_387;
3240 ix86_fpmath = FPMATH_SSE;
3242 else if (! strcmp (ix86_fpmath_string, "387,sse")
3243 || ! strcmp (ix86_fpmath_string, "387+sse")
3244 || ! strcmp (ix86_fpmath_string, "sse,387")
3245 || ! strcmp (ix86_fpmath_string, "sse+387")
3246 || ! strcmp (ix86_fpmath_string, "both"))
3250 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3251 ix86_fpmath = FPMATH_387;
3253 else if (!TARGET_80387)
3255 warning (0, "387 instruction set disabled, using SSE arithmetics");
3256 ix86_fpmath = FPMATH_SSE;
3259 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3262 error ("bad value (%s) for %sfpmath=%s %s",
3263 ix86_fpmath_string, prefix, suffix, sw);
3266 /* If the i387 is disabled, then do not return values in it. */
3268 target_flags &= ~MASK_FLOAT_RETURNS;
3270 /* Use external vectorized library in vectorizing intrinsics. */
3271 if (ix86_veclibabi_string)
3273 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3274 ix86_veclib_handler = ix86_veclibabi_svml;
3275 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3276 ix86_veclib_handler = ix86_veclibabi_acml;
3278 error ("unknown vectorization library ABI type (%s) for "
3279 "%sveclibabi=%s %s", ix86_veclibabi_string,
3280 prefix, suffix, sw);
3283 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3284 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3286 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3288 /* ??? Unwind info is not correct around the CFG unless either a frame
3289 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3290 unwind info generation to be aware of the CFG and propagating states
3292 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3293 || flag_exceptions || flag_non_call_exceptions)
3294 && flag_omit_frame_pointer
3295 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3297 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3298 warning (0, "unwind tables currently require either a frame pointer "
3299 "or %saccumulate-outgoing-args%s for correctness",
3301 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3304 /* If stack probes are required, the space used for large function
3305 arguments on the stack must also be probed, so enable
3306 -maccumulate-outgoing-args so this happens in the prologue. */
3307 if (TARGET_STACK_PROBE
3308 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3310 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3311 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3312 "for correctness", prefix, suffix);
3313 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3316 /* For sane SSE instruction set generation we need fcomi instruction.
3317 It is safe to enable all CMOVE instructions. */
3321 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3324 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3325 p = strchr (internal_label_prefix, 'X');
3326 internal_label_prefix_len = p - internal_label_prefix;
3330 /* When scheduling description is not available, disable scheduler pass
3331 so it won't slow down the compilation and make x87 code slower. */
3332 if (!TARGET_SCHEDULE)
3333 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3335 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3336 set_param_value ("simultaneous-prefetches",
3337 ix86_cost->simultaneous_prefetches);
3338 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3339 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3340 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3341 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3342 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3343 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3345 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3346 can be optimized to ap = __builtin_next_arg (0). */
3348 targetm.expand_builtin_va_start = NULL;
3352 ix86_gen_leave = gen_leave_rex64;
3353 ix86_gen_pop1 = gen_popdi1;
3354 ix86_gen_add3 = gen_adddi3;
3355 ix86_gen_sub3 = gen_subdi3;
3356 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3357 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3358 ix86_gen_monitor = gen_sse3_monitor64;
3359 ix86_gen_andsp = gen_anddi3;
3363 ix86_gen_leave = gen_leave;
3364 ix86_gen_pop1 = gen_popsi1;
3365 ix86_gen_add3 = gen_addsi3;
3366 ix86_gen_sub3 = gen_subsi3;
3367 ix86_gen_sub3_carry = gen_subsi3_carry;
3368 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3369 ix86_gen_monitor = gen_sse3_monitor;
3370 ix86_gen_andsp = gen_andsi3;
3374 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3376 target_flags |= MASK_CLD & ~target_flags_explicit;
3379 /* Save the initial options in case the user does function specific options */
3381 target_option_default_node = target_option_current_node
3382 = build_target_option_node ();
3385 /* Save the current options */
3388 ix86_function_specific_save (struct cl_target_option *ptr)
3390 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3391 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3392 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3396 ptr->arch = ix86_arch;
3397 ptr->schedule = ix86_schedule;
3398 ptr->tune = ix86_tune;
3399 ptr->fpmath = ix86_fpmath;
3400 ptr->branch_cost = ix86_branch_cost;
3401 ptr->tune_defaulted = ix86_tune_defaulted;
3402 ptr->arch_specified = ix86_arch_specified;
3403 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3404 ptr->target_flags_explicit = target_flags_explicit;
3407 /* Restore the current options */
3410 ix86_function_specific_restore (struct cl_target_option *ptr)
3412 enum processor_type old_tune = ix86_tune;
3413 enum processor_type old_arch = ix86_arch;
3414 unsigned int ix86_arch_mask, ix86_tune_mask;
3417 ix86_arch = (enum processor_type) ptr->arch;
3418 ix86_schedule = (enum attr_cpu) ptr->schedule;
3419 ix86_tune = (enum processor_type) ptr->tune;
3420 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3421 ix86_branch_cost = ptr->branch_cost;
3422 ix86_tune_defaulted = ptr->tune_defaulted;
3423 ix86_arch_specified = ptr->arch_specified;
3424 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3425 target_flags_explicit = ptr->target_flags_explicit;
3427 /* Recreate the arch feature tests if the arch changed */
3428 if (old_arch != ix86_arch)
3430 ix86_arch_mask = 1u << ix86_arch;
3431 for (i = 0; i < X86_ARCH_LAST; ++i)
3432 ix86_arch_features[i]
3433 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3436 /* Recreate the tune optimization tests */
3437 if (old_tune != ix86_tune)
3439 ix86_tune_mask = 1u << ix86_tune;
3440 for (i = 0; i < X86_TUNE_LAST; ++i)
3441 ix86_tune_features[i]
3442 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3446 /* Print the current options */
3449 ix86_function_specific_print (FILE *file, int indent,
3450 struct cl_target_option *ptr)
3453 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3454 NULL, NULL, NULL, false);
3456 fprintf (file, "%*sarch = %d (%s)\n",
3459 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3460 ? cpu_names[ptr->arch]
3463 fprintf (file, "%*stune = %d (%s)\n",
3466 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3467 ? cpu_names[ptr->tune]
3470 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3471 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3472 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3473 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3477 fprintf (file, "%*s%s\n", indent, "", target_string);
3478 free (target_string);
3483 /* Inner function to process the attribute((target(...))), take an argument and
3484 set the current options from the argument. If we have a list, recursively go
3488 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3493 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3494 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3495 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3496 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3511 enum ix86_opt_type type;
3516 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3517 IX86_ATTR_ISA ("abm", OPT_mabm),
3518 IX86_ATTR_ISA ("aes", OPT_maes),
3519 IX86_ATTR_ISA ("avx", OPT_mavx),
3520 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3521 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3522 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3523 IX86_ATTR_ISA ("sse", OPT_msse),
3524 IX86_ATTR_ISA ("sse2", OPT_msse2),
3525 IX86_ATTR_ISA ("sse3", OPT_msse3),
3526 IX86_ATTR_ISA ("sse4", OPT_msse4),
3527 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3528 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3529 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3530 IX86_ATTR_ISA ("sse5", OPT_msse5),
3531 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3533 /* string options */
3534 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3535 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3536 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3539 IX86_ATTR_YES ("cld",
3543 IX86_ATTR_NO ("fancy-math-387",
3544 OPT_mfancy_math_387,
3545 MASK_NO_FANCY_MATH_387),
3547 IX86_ATTR_NO ("fused-madd",
3549 MASK_NO_FUSED_MADD),
3551 IX86_ATTR_YES ("ieee-fp",
3555 IX86_ATTR_YES ("inline-all-stringops",
3556 OPT_minline_all_stringops,
3557 MASK_INLINE_ALL_STRINGOPS),
3559 IX86_ATTR_YES ("inline-stringops-dynamically",
3560 OPT_minline_stringops_dynamically,
3561 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3563 IX86_ATTR_NO ("align-stringops",
3564 OPT_mno_align_stringops,
3565 MASK_NO_ALIGN_STRINGOPS),
3567 IX86_ATTR_YES ("recip",
3573 /* If this is a list, recurse to get the options. */
3574 if (TREE_CODE (args) == TREE_LIST)
3578 for (; args; args = TREE_CHAIN (args))
3579 if (TREE_VALUE (args)
3580 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3586 else if (TREE_CODE (args) != STRING_CST)
3589 /* Handle multiple arguments separated by commas. */
3590 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3592 while (next_optstr && *next_optstr != '\0')
3594 char *p = next_optstr;
3596 char *comma = strchr (next_optstr, ',');
3597 const char *opt_string;
3598 size_t len, opt_len;
3603 enum ix86_opt_type type = ix86_opt_unknown;
3609 len = comma - next_optstr;
3610 next_optstr = comma + 1;
3618 /* Recognize no-xxx. */
3619 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3628 /* Find the option. */
3631 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3633 type = attrs[i].type;
3634 opt_len = attrs[i].len;
3635 if (ch == attrs[i].string[0]
3636 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3637 && memcmp (p, attrs[i].string, opt_len) == 0)
3640 mask = attrs[i].mask;
3641 opt_string = attrs[i].string;
3646 /* Process the option. */
3649 error ("attribute(target(\"%s\")) is unknown", orig_p);
3653 else if (type == ix86_opt_isa)
3654 ix86_handle_option (opt, p, opt_set_p);
3656 else if (type == ix86_opt_yes || type == ix86_opt_no)
3658 if (type == ix86_opt_no)
3659 opt_set_p = !opt_set_p;
3662 target_flags |= mask;
3664 target_flags &= ~mask;
3667 else if (type == ix86_opt_str)
3671 error ("option(\"%s\") was already specified", opt_string);
3675 p_strings[opt] = xstrdup (p + opt_len);
3685 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3688 ix86_valid_target_attribute_tree (tree args)
3690 const char *orig_arch_string = ix86_arch_string;
3691 const char *orig_tune_string = ix86_tune_string;
3692 const char *orig_fpmath_string = ix86_fpmath_string;
3693 int orig_tune_defaulted = ix86_tune_defaulted;
3694 int orig_arch_specified = ix86_arch_specified;
3695 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3698 struct cl_target_option *def
3699 = TREE_TARGET_OPTION (target_option_default_node);
3701 /* Process each of the options on the chain. */
3702 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3705 /* If the changed options are different from the default, rerun override_options,
3706 and then save the options away. The string options are are attribute options,
3707 and will be undone when we copy the save structure. */
3708 if (ix86_isa_flags != def->ix86_isa_flags
3709 || target_flags != def->target_flags
3710 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3711 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3712 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3714 /* If we are using the default tune= or arch=, undo the string assigned,
3715 and use the default. */
3716 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3717 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3718 else if (!orig_arch_specified)
3719 ix86_arch_string = NULL;
3721 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3722 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3723 else if (orig_tune_defaulted)
3724 ix86_tune_string = NULL;
3726 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3727 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3728 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3729 else if (!TARGET_64BIT && TARGET_SSE)
3730 ix86_fpmath_string = "sse,387";
3732 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3733 override_options (false);
3735 /* Add any builtin functions with the new isa if any. */
3736 ix86_add_new_builtins (ix86_isa_flags);
3738 /* Save the current options unless we are validating options for
3740 t = build_target_option_node ();
3742 ix86_arch_string = orig_arch_string;
3743 ix86_tune_string = orig_tune_string;
3744 ix86_fpmath_string = orig_fpmath_string;
3746 /* Free up memory allocated to hold the strings */
3747 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3748 if (option_strings[i])
3749 free (option_strings[i]);
3755 /* Hook to validate attribute((target("string"))). */
3758 ix86_valid_target_attribute_p (tree fndecl,
3759 tree ARG_UNUSED (name),
3761 int ARG_UNUSED (flags))
3763 struct cl_target_option cur_target;
3765 tree old_optimize = build_optimization_node ();
3766 tree new_target, new_optimize;
3767 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3769 /* If the function changed the optimization levels as well as setting target
3770 options, start with the optimizations specified. */
3771 if (func_optimize && func_optimize != old_optimize)
3772 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3774 /* The target attributes may also change some optimization flags, so update
3775 the optimization options if necessary. */
3776 cl_target_option_save (&cur_target);
3777 new_target = ix86_valid_target_attribute_tree (args);
3778 new_optimize = build_optimization_node ();
3785 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3787 if (old_optimize != new_optimize)
3788 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3791 cl_target_option_restore (&cur_target);
3793 if (old_optimize != new_optimize)
3794 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3800 /* Hook to determine if one function can safely inline another. */
3803 ix86_can_inline_p (tree caller, tree callee)
3806 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3807 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3809 /* If callee has no option attributes, then it is ok to inline. */
3813 /* If caller has no option attributes, but callee does then it is not ok to
3815 else if (!caller_tree)
3820 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3821 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3823 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3824 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3826 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3827 != callee_opts->ix86_isa_flags)
3830 /* See if we have the same non-isa options. */
3831 else if (caller_opts->target_flags != callee_opts->target_flags)
3834 /* See if arch, tune, etc. are the same. */
3835 else if (caller_opts->arch != callee_opts->arch)
3838 else if (caller_opts->tune != callee_opts->tune)
3841 else if (caller_opts->fpmath != callee_opts->fpmath)
3844 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3855 /* Remember the last target of ix86_set_current_function. */
3856 static GTY(()) tree ix86_previous_fndecl;
3858 /* Establish appropriate back-end context for processing the function
3859 FNDECL. The argument might be NULL to indicate processing at top
3860 level, outside of any function scope. */
3862 ix86_set_current_function (tree fndecl)
3864 /* Only change the context if the function changes. This hook is called
3865 several times in the course of compiling a function, and we don't want to
3866 slow things down too much or call target_reinit when it isn't safe. */
3867 if (fndecl && fndecl != ix86_previous_fndecl)
3869 tree old_tree = (ix86_previous_fndecl
3870 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3873 tree new_tree = (fndecl
3874 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3877 ix86_previous_fndecl = fndecl;
3878 if (old_tree == new_tree)
3883 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3889 struct cl_target_option *def
3890 = TREE_TARGET_OPTION (target_option_current_node);
3892 cl_target_option_restore (def);
3899 /* Return true if this goes in large data/bss. */
3902 ix86_in_large_data_p (tree exp)
3904 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3907 /* Functions are never large data. */
3908 if (TREE_CODE (exp) == FUNCTION_DECL)
3911 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3913 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3914 if (strcmp (section, ".ldata") == 0
3915 || strcmp (section, ".lbss") == 0)
3921 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3923 /* If this is an incomplete type with size 0, then we can't put it
3924 in data because it might be too big when completed. */
3925 if (!size || size > ix86_section_threshold)
3932 /* Switch to the appropriate section for output of DECL.
3933 DECL is either a `VAR_DECL' node or a constant of some sort.
3934 RELOC indicates whether forming the initial value of DECL requires
3935 link-time relocations. */
3937 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3941 x86_64_elf_select_section (tree decl, int reloc,
3942 unsigned HOST_WIDE_INT align)
3944 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3945 && ix86_in_large_data_p (decl))
3947 const char *sname = NULL;
3948 unsigned int flags = SECTION_WRITE;
3949 switch (categorize_decl_for_section (decl, reloc))
3954 case SECCAT_DATA_REL:
3955 sname = ".ldata.rel";
3957 case SECCAT_DATA_REL_LOCAL:
3958 sname = ".ldata.rel.local";
3960 case SECCAT_DATA_REL_RO:
3961 sname = ".ldata.rel.ro";
3963 case SECCAT_DATA_REL_RO_LOCAL:
3964 sname = ".ldata.rel.ro.local";
3968 flags |= SECTION_BSS;
3971 case SECCAT_RODATA_MERGE_STR:
3972 case SECCAT_RODATA_MERGE_STR_INIT:
3973 case SECCAT_RODATA_MERGE_CONST:
3977 case SECCAT_SRODATA:
3984 /* We don't split these for medium model. Place them into
3985 default sections and hope for best. */
3987 case SECCAT_EMUTLS_VAR:
3988 case SECCAT_EMUTLS_TMPL:
3993 /* We might get called with string constants, but get_named_section
3994 doesn't like them as they are not DECLs. Also, we need to set
3995 flags in that case. */
3997 return get_section (sname, flags, NULL);
3998 return get_named_section (decl, sname, reloc);
4001 return default_elf_select_section (decl, reloc, align);
4004 /* Build up a unique section name, expressed as a
4005 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4006 RELOC indicates whether the initial value of EXP requires
4007 link-time relocations. */
4009 static void ATTRIBUTE_UNUSED
4010 x86_64_elf_unique_section (tree decl, int reloc)
4012 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4013 && ix86_in_large_data_p (decl))
4015 const char *prefix = NULL;
4016 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4017 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4019 switch (categorize_decl_for_section (decl, reloc))
4022 case SECCAT_DATA_REL:
4023 case SECCAT_DATA_REL_LOCAL:
4024 case SECCAT_DATA_REL_RO:
4025 case SECCAT_DATA_REL_RO_LOCAL:
4026 prefix = one_only ? ".ld" : ".ldata";
4029 prefix = one_only ? ".lb" : ".lbss";
4032 case SECCAT_RODATA_MERGE_STR:
4033 case SECCAT_RODATA_MERGE_STR_INIT:
4034 case SECCAT_RODATA_MERGE_CONST:
4035 prefix = one_only ? ".lr" : ".lrodata";
4037 case SECCAT_SRODATA:
4044 /* We don't split these for medium model. Place them into
4045 default sections and hope for best. */
4047 case SECCAT_EMUTLS_VAR:
4048 prefix = targetm.emutls.var_section;
4050 case SECCAT_EMUTLS_TMPL:
4051 prefix = targetm.emutls.tmpl_section;
4056 const char *name, *linkonce;
4059 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4060 name = targetm.strip_name_encoding (name);
4062 /* If we're using one_only, then there needs to be a .gnu.linkonce
4063 prefix to the section name. */
4064 linkonce = one_only ? ".gnu.linkonce" : "";
4066 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4068 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4072 default_unique_section (decl, reloc);
4075 #ifdef COMMON_ASM_OP
4076 /* This says how to output assembler code to declare an
4077 uninitialized external linkage data object.
4079 For medium model x86-64 we need to use .largecomm opcode for
4082 x86_elf_aligned_common (FILE *file,
4083 const char *name, unsigned HOST_WIDE_INT size,
4086 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4087 && size > (unsigned int)ix86_section_threshold)
4088 fprintf (file, ".largecomm\t");
4090 fprintf (file, "%s", COMMON_ASM_OP);
4091 assemble_name (file, name);
4092 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4093 size, align / BITS_PER_UNIT);
4097 /* Utility function for targets to use in implementing
4098 ASM_OUTPUT_ALIGNED_BSS. */
4101 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4102 const char *name, unsigned HOST_WIDE_INT size,
4105 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4106 && size > (unsigned int)ix86_section_threshold)
4107 switch_to_section (get_named_section (decl, ".lbss", 0));
4109 switch_to_section (bss_section);
4110 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4111 #ifdef ASM_DECLARE_OBJECT_NAME
4112 last_assemble_variable_decl = decl;
4113 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4115 /* Standard thing is just output label for the object. */
4116 ASM_OUTPUT_LABEL (file, name);
4117 #endif /* ASM_DECLARE_OBJECT_NAME */
4118 ASM_OUTPUT_SKIP (file, size ? size : 1);
4122 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4124 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4125 make the problem with not enough registers even worse. */
4126 #ifdef INSN_SCHEDULING
4128 flag_schedule_insns = 0;
4132 /* The Darwin libraries never set errno, so we might as well
4133 avoid calling them when that's the only reason we would. */
4134 flag_errno_math = 0;
4136 /* The default values of these switches depend on the TARGET_64BIT
4137 that is not known at this moment. Mark these values with 2 and
4138 let user the to override these. In case there is no command line option
4139 specifying them, we will set the defaults in override_options. */
4141 flag_omit_frame_pointer = 2;
4142 flag_pcc_struct_return = 2;
4143 flag_asynchronous_unwind_tables = 2;
4144 flag_vect_cost_model = 1;
4145 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4146 SUBTARGET_OPTIMIZATION_OPTIONS;
4150 /* Decide whether we can make a sibling call to a function. DECL is the
4151 declaration of the function being targeted by the call and EXP is the
4152 CALL_EXPR representing the call. */
4155 ix86_function_ok_for_sibcall (tree decl, tree exp)
4160 /* If we are generating position-independent code, we cannot sibcall
4161 optimize any indirect call, or a direct call to a global function,
4162 as the PLT requires %ebx be live. */
4163 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4170 func = TREE_TYPE (CALL_EXPR_FN (exp));
4171 if (POINTER_TYPE_P (func))
4172 func = TREE_TYPE (func);
4175 /* Check that the return value locations are the same. Like
4176 if we are returning floats on the 80387 register stack, we cannot
4177 make a sibcall from a function that doesn't return a float to a
4178 function that does or, conversely, from a function that does return
4179 a float to a function that doesn't; the necessary stack adjustment
4180 would not be executed. This is also the place we notice
4181 differences in the return value ABI. Note that it is ok for one
4182 of the functions to have void return type as long as the return
4183 value of the other is passed in a register. */
4184 a = ix86_function_value (TREE_TYPE (exp), func, false);
4185 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4187 if (STACK_REG_P (a) || STACK_REG_P (b))
4189 if (!rtx_equal_p (a, b))
4192 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4194 else if (!rtx_equal_p (a, b))
4197 /* If this call is indirect, we'll need to be able to use a call-clobbered
4198 register for the address of the target function. Make sure that all
4199 such registers are not used for passing parameters. */
4200 if (!decl && !TARGET_64BIT)
4204 /* We're looking at the CALL_EXPR, we need the type of the function. */
4205 type = CALL_EXPR_FN (exp); /* pointer expression */
4206 type = TREE_TYPE (type); /* pointer type */
4207 type = TREE_TYPE (type); /* function type */
4209 if (ix86_function_regparm (type, NULL) >= 3)
4211 /* ??? Need to count the actual number of registers to be used,
4212 not the possible number of registers. Fix later. */
4217 /* Dllimport'd functions are also called indirectly. */
4218 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4220 && decl && DECL_DLLIMPORT_P (decl)
4221 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4224 /* If we need to align the outgoing stack, then sibcalling would
4225 unalign the stack, which may break the called function. */
4226 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4229 /* Otherwise okay. That also includes certain types of indirect calls. */
4233 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4234 calling convention attributes;
4235 arguments as in struct attribute_spec.handler. */
4238 ix86_handle_cconv_attribute (tree *node, tree name,
4240 int flags ATTRIBUTE_UNUSED,
4243 if (TREE_CODE (*node) != FUNCTION_TYPE
4244 && TREE_CODE (*node) != METHOD_TYPE
4245 && TREE_CODE (*node) != FIELD_DECL
4246 && TREE_CODE (*node) != TYPE_DECL)
4248 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4250 *no_add_attrs = true;
4254 /* Can combine regparm with all attributes but fastcall. */
4255 if (is_attribute_p ("regparm", name))
4259 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4261 error ("fastcall and regparm attributes are not compatible");
4264 cst = TREE_VALUE (args);
4265 if (TREE_CODE (cst) != INTEGER_CST)
4267 warning (OPT_Wattributes,
4268 "%qE attribute requires an integer constant argument",
4270 *no_add_attrs = true;
4272 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4274 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4276 *no_add_attrs = true;
4284 /* Do not warn when emulating the MS ABI. */
4285 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4286 warning (OPT_Wattributes, "%qE attribute ignored",
4288 *no_add_attrs = true;
4292 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4293 if (is_attribute_p ("fastcall", name))
4295 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4297 error ("fastcall and cdecl attributes are not compatible");
4299 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4301 error ("fastcall and stdcall attributes are not compatible");
4303 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4305 error ("fastcall and regparm attributes are not compatible");
4309 /* Can combine stdcall with fastcall (redundant), regparm and
4311 else if (is_attribute_p ("stdcall", name))
4313 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4315 error ("stdcall and cdecl attributes are not compatible");
4317 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4319 error ("stdcall and fastcall attributes are not compatible");
4323 /* Can combine cdecl with regparm and sseregparm. */
4324 else if (is_attribute_p ("cdecl", name))
4326 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4328 error ("stdcall and cdecl attributes are not compatible");
4330 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4332 error ("fastcall and cdecl attributes are not compatible");
4336 /* Can combine sseregparm with all attributes. */
4341 /* Return 0 if the attributes for two types are incompatible, 1 if they
4342 are compatible, and 2 if they are nearly compatible (which causes a
4343 warning to be generated). */
4346 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4348 /* Check for mismatch of non-default calling convention. */
4349 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4351 if (TREE_CODE (type1) != FUNCTION_TYPE
4352 && TREE_CODE (type1) != METHOD_TYPE)
4355 /* Check for mismatched fastcall/regparm types. */
4356 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4357 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4358 || (ix86_function_regparm (type1, NULL)
4359 != ix86_function_regparm (type2, NULL)))
4362 /* Check for mismatched sseregparm types. */
4363 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4364 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4367 /* Check for mismatched return types (cdecl vs stdcall). */
4368 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4369 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4375 /* Return the regparm value for a function with the indicated TYPE and DECL.
4376 DECL may be NULL when calling function indirectly
4377 or considering a libcall. */
4380 ix86_function_regparm (const_tree type, const_tree decl)
4385 static bool error_issued;
4388 return (ix86_function_type_abi (type) == SYSV_ABI
4389 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4391 regparm = ix86_regparm;
4392 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4396 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4398 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4400 /* We can't use regparm(3) for nested functions because
4401 these pass static chain pointer in %ecx register. */
4402 if (!error_issued && regparm == 3
4403 && decl_function_context (decl)
4404 && !DECL_NO_STATIC_CHAIN (decl))
4406 error ("nested functions are limited to 2 register parameters");
4407 error_issued = true;
4415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4418 /* Use register calling convention for local functions when possible. */
4420 && TREE_CODE (decl) == FUNCTION_DECL
4424 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4425 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4428 int local_regparm, globals = 0, regno;
4431 /* Make sure no regparm register is taken by a
4432 fixed register variable. */
4433 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4434 if (fixed_regs[local_regparm])
4437 /* We can't use regparm(3) for nested functions as these use
4438 static chain pointer in third argument. */
4439 if (local_regparm == 3
4440 && decl_function_context (decl)
4441 && !DECL_NO_STATIC_CHAIN (decl))
4444 /* If the function realigns its stackpointer, the prologue will
4445 clobber %ecx. If we've already generated code for the callee,
4446 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4447 scanning the attributes for the self-realigning property. */
4448 f = DECL_STRUCT_FUNCTION (decl);
4449 /* Since current internal arg pointer won't conflict with
4450 parameter passing regs, so no need to change stack
4451 realignment and adjust regparm number.
4453 Each fixed register usage increases register pressure,
4454 so less registers should be used for argument passing.
4455 This functionality can be overriden by an explicit
4457 for (regno = 0; regno <= DI_REG; regno++)
4458 if (fixed_regs[regno])
4462 = globals < local_regparm ? local_regparm - globals : 0;
4464 if (local_regparm > regparm)
4465 regparm = local_regparm;
4472 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4473 DFmode (2) arguments in SSE registers for a function with the
4474 indicated TYPE and DECL. DECL may be NULL when calling function
4475 indirectly or considering a libcall. Otherwise return 0. */
4478 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4480 gcc_assert (!TARGET_64BIT);
4482 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4483 by the sseregparm attribute. */
4484 if (TARGET_SSEREGPARM
4485 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4492 error ("Calling %qD with attribute sseregparm without "
4493 "SSE/SSE2 enabled", decl);
4495 error ("Calling %qT with attribute sseregparm without "
4496 "SSE/SSE2 enabled", type);
4504 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4505 (and DFmode for SSE2) arguments in SSE registers. */
4506 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4508 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4509 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4511 return TARGET_SSE2 ? 2 : 1;
4517 /* Return true if EAX is live at the start of the function. Used by
4518 ix86_expand_prologue to determine if we need special help before
4519 calling allocate_stack_worker. */
4522 ix86_eax_live_at_start_p (void)
4524 /* Cheat. Don't bother working forward from ix86_function_regparm
4525 to the function type to whether an actual argument is located in
4526 eax. Instead just look at cfg info, which is still close enough
4527 to correct at this point. This gives false positives for broken
4528 functions that might use uninitialized data that happens to be
4529 allocated in eax, but who cares? */
4530 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4533 /* Value is the number of bytes of arguments automatically
4534 popped when returning from a subroutine call.
4535 FUNDECL is the declaration node of the function (as a tree),
4536 FUNTYPE is the data type of the function (as a tree),
4537 or for a library call it is an identifier node for the subroutine name.
4538 SIZE is the number of bytes of arguments passed on the stack.
4540 On the 80386, the RTD insn may be used to pop them if the number
4541 of args is fixed, but if the number is variable then the caller
4542 must pop them all. RTD can't be used for library calls now
4543 because the library is compiled with the Unix compiler.
4544 Use of RTD is a selectable option, since it is incompatible with
4545 standard Unix calling sequences. If the option is not selected,
4546 the caller must always pop the args.
4548 The attribute stdcall is equivalent to RTD on a per module basis. */
4551 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4555 /* None of the 64-bit ABIs pop arguments. */
4559 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4561 /* Cdecl functions override -mrtd, and never pop the stack. */
4562 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4564 /* Stdcall and fastcall functions will pop the stack if not
4566 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4567 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4570 if (rtd && ! stdarg_p (funtype))
4574 /* Lose any fake structure return argument if it is passed on the stack. */
4575 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4576 && !KEEP_AGGREGATE_RETURN_POINTER)
4578 int nregs = ix86_function_regparm (funtype, fundecl);
4580 return GET_MODE_SIZE (Pmode);
4586 /* Argument support functions. */
4588 /* Return true when register may be used to pass function parameters. */
4590 ix86_function_arg_regno_p (int regno)
4593 const int *parm_regs;
4598 return (regno < REGPARM_MAX
4599 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4601 return (regno < REGPARM_MAX
4602 || (TARGET_MMX && MMX_REGNO_P (regno)
4603 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4604 || (TARGET_SSE && SSE_REGNO_P (regno)
4605 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4610 if (SSE_REGNO_P (regno) && TARGET_SSE)
4615 if (TARGET_SSE && SSE_REGNO_P (regno)
4616 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4620 /* TODO: The function should depend on current function ABI but
4621 builtins.c would need updating then. Therefore we use the
4624 /* RAX is used as hidden argument to va_arg functions. */
4625 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4628 if (ix86_abi == MS_ABI)
4629 parm_regs = x86_64_ms_abi_int_parameter_registers;
4631 parm_regs = x86_64_int_parameter_registers;
4632 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4633 : X86_64_REGPARM_MAX); i++)
4634 if (regno == parm_regs[i])
4639 /* Return if we do not know how to pass TYPE solely in registers. */
4642 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4644 if (must_pass_in_stack_var_size_or_pad (mode, type))
4647 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4648 The layout_type routine is crafty and tries to trick us into passing
4649 currently unsupported vector types on the stack by using TImode. */
4650 return (!TARGET_64BIT && mode == TImode
4651 && type && TREE_CODE (type) != VECTOR_TYPE);
4654 /* It returns the size, in bytes, of the area reserved for arguments passed
4655 in registers for the function represented by fndecl dependent to the used
4658 ix86_reg_parm_stack_space (const_tree fndecl)
4660 enum calling_abi call_abi = SYSV_ABI;
4661 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4662 call_abi = ix86_function_abi (fndecl);
4664 call_abi = ix86_function_type_abi (fndecl);
4665 if (call_abi == MS_ABI)
4670 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4673 ix86_function_type_abi (const_tree fntype)
4675 if (TARGET_64BIT && fntype != NULL)
4677 enum calling_abi abi = ix86_abi;
4678 if (abi == SYSV_ABI)
4680 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4683 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4690 static enum calling_abi
4691 ix86_function_abi (const_tree fndecl)
4695 return ix86_function_type_abi (TREE_TYPE (fndecl));
4698 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4701 ix86_cfun_abi (void)
4703 if (! cfun || ! TARGET_64BIT)
4705 return cfun->machine->call_abi;
4709 extern void init_regs (void);
4711 /* Implementation of call abi switching target hook. Specific to FNDECL
4712 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4713 for more details. */
4715 ix86_call_abi_override (const_tree fndecl)
4717 if (fndecl == NULL_TREE)
4718 cfun->machine->call_abi = ix86_abi;
4720 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4723 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4724 re-initialization of init_regs each time we switch function context since
4725 this is needed only during RTL expansion. */
4727 ix86_maybe_switch_abi (void)
4730 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4734 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4735 for a call to a function whose data type is FNTYPE.
4736 For a library call, FNTYPE is 0. */
4739 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4740 tree fntype, /* tree ptr for function decl */
4741 rtx libname, /* SYMBOL_REF of library name or 0 */
4744 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4745 memset (cum, 0, sizeof (*cum));
4748 cum->call_abi = ix86_function_abi (fndecl);
4750 cum->call_abi = ix86_function_type_abi (fntype);
4751 /* Set up the number of registers to use for passing arguments. */
4753 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4754 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4755 cum->nregs = ix86_regparm;
4758 if (cum->call_abi != ix86_abi)
4759 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4764 cum->sse_nregs = SSE_REGPARM_MAX;
4767 if (cum->call_abi != ix86_abi)
4768 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4769 : X64_SSE_REGPARM_MAX;
4773 cum->mmx_nregs = MMX_REGPARM_MAX;
4774 cum->warn_avx = true;
4775 cum->warn_sse = true;
4776 cum->warn_mmx = true;
4778 /* Because type might mismatch in between caller and callee, we need to
4779 use actual type of function for local calls.
4780 FIXME: cgraph_analyze can be told to actually record if function uses
4781 va_start so for local functions maybe_vaarg can be made aggressive
4783 FIXME: once typesytem is fixed, we won't need this code anymore. */
4785 fntype = TREE_TYPE (fndecl);
4786 cum->maybe_vaarg = (fntype
4787 ? (!prototype_p (fntype) || stdarg_p (fntype))
4792 /* If there are variable arguments, then we won't pass anything
4793 in registers in 32-bit mode. */
4794 if (stdarg_p (fntype))
4805 /* Use ecx and edx registers if function has fastcall attribute,
4806 else look for regparm information. */
4809 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4815 cum->nregs = ix86_function_regparm (fntype, fndecl);
4818 /* Set up the number of SSE registers used for passing SFmode
4819 and DFmode arguments. Warn for mismatching ABI. */
4820 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4824 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4825 But in the case of vector types, it is some vector mode.
4827 When we have only some of our vector isa extensions enabled, then there
4828 are some modes for which vector_mode_supported_p is false. For these
4829 modes, the generic vector support in gcc will choose some non-vector mode
4830 in order to implement the type. By computing the natural mode, we'll
4831 select the proper ABI location for the operand and not depend on whatever
4832 the middle-end decides to do with these vector types.
4834 The midde-end can't deal with the vector types > 16 bytes. In this
4835 case, we return the original mode and warn ABI change if CUM isn't
4838 static enum machine_mode
4839 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4841 enum machine_mode mode = TYPE_MODE (type);
4843 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4845 HOST_WIDE_INT size = int_size_in_bytes (type);
4846 if ((size == 8 || size == 16 || size == 32)
4847 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4848 && TYPE_VECTOR_SUBPARTS (type) > 1)
4850 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4852 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4853 mode = MIN_MODE_VECTOR_FLOAT;
4855 mode = MIN_MODE_VECTOR_INT;
4857 /* Get the mode which has this inner mode and number of units. */
4858 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4859 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4860 && GET_MODE_INNER (mode) == innermode)
4862 if (size == 32 && !TARGET_AVX)
4864 static bool warnedavx;
4871 warning (0, "AVX vector argument without AVX "
4872 "enabled changes the ABI");
4874 return TYPE_MODE (type);
4887 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4888 this may not agree with the mode that the type system has chosen for the
4889 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4890 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4893 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4898 if (orig_mode != BLKmode)
4899 tmp = gen_rtx_REG (orig_mode, regno);
4902 tmp = gen_rtx_REG (mode, regno);
4903 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4904 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4910 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4911 of this code is to classify each 8bytes of incoming argument by the register
4912 class and assign registers accordingly. */
4914 /* Return the union class of CLASS1 and CLASS2.
4915 See the x86-64 PS ABI for details. */
4917 static enum x86_64_reg_class
4918 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4920 /* Rule #1: If both classes are equal, this is the resulting class. */
4921 if (class1 == class2)
4924 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4926 if (class1 == X86_64_NO_CLASS)
4928 if (class2 == X86_64_NO_CLASS)
4931 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4932 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4933 return X86_64_MEMORY_CLASS;
4935 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4936 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4937 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4938 return X86_64_INTEGERSI_CLASS;
4939 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4940 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4941 return X86_64_INTEGER_CLASS;
4943 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4945 if (class1 == X86_64_X87_CLASS
4946 || class1 == X86_64_X87UP_CLASS
4947 || class1 == X86_64_COMPLEX_X87_CLASS
4948 || class2 == X86_64_X87_CLASS
4949 || class2 == X86_64_X87UP_CLASS
4950 || class2 == X86_64_COMPLEX_X87_CLASS)
4951 return X86_64_MEMORY_CLASS;
4953 /* Rule #6: Otherwise class SSE is used. */
4954 return X86_64_SSE_CLASS;
4957 /* Classify the argument of type TYPE and mode MODE.
4958 CLASSES will be filled by the register class used to pass each word
4959 of the operand. The number of words is returned. In case the parameter
4960 should be passed in memory, 0 is returned. As a special case for zero
4961 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4963 BIT_OFFSET is used internally for handling records and specifies offset
4964 of the offset in bits modulo 256 to avoid overflow cases.
4966 See the x86-64 PS ABI for details.
4970 classify_argument (enum machine_mode mode, const_tree type,
4971 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4973 HOST_WIDE_INT bytes =
4974 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4975 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4977 /* Variable sized entities are always passed/returned in memory. */
4981 if (mode != VOIDmode
4982 && targetm.calls.must_pass_in_stack (mode, type))
4985 if (type && AGGREGATE_TYPE_P (type))
4989 enum x86_64_reg_class subclasses[MAX_CLASSES];
4991 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4995 for (i = 0; i < words; i++)
4996 classes[i] = X86_64_NO_CLASS;
4998 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4999 signalize memory class, so handle it as special case. */
5002 classes[0] = X86_64_NO_CLASS;
5006 /* Classify each field of record and merge classes. */
5007 switch (TREE_CODE (type))
5010 /* And now merge the fields of structure. */
5011 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5013 if (TREE_CODE (field) == FIELD_DECL)
5017 if (TREE_TYPE (field) == error_mark_node)
5020 /* Bitfields are always classified as integer. Handle them
5021 early, since later code would consider them to be
5022 misaligned integers. */
5023 if (DECL_BIT_FIELD (field))
5025 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5026 i < ((int_bit_position (field) + (bit_offset % 64))
5027 + tree_low_cst (DECL_SIZE (field), 0)
5030 merge_classes (X86_64_INTEGER_CLASS,
5037 type = TREE_TYPE (field);
5039 /* Flexible array member is ignored. */
5040 if (TYPE_MODE (type) == BLKmode
5041 && TREE_CODE (type) == ARRAY_TYPE
5042 && TYPE_SIZE (type) == NULL_TREE
5043 && TYPE_DOMAIN (type) != NULL_TREE
5044 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5049 if (!warned && warn_psabi)
5052 inform (input_location,
5053 "The ABI of passing struct with"
5054 " a flexible array member has"
5055 " changed in GCC 4.4");
5059 num = classify_argument (TYPE_MODE (type), type,
5061 (int_bit_position (field)
5062 + bit_offset) % 256);
5065 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5066 for (i = 0; i < num && (i + pos) < words; i++)
5068 merge_classes (subclasses[i], classes[i + pos]);
5075 /* Arrays are handled as small records. */
5078 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5079 TREE_TYPE (type), subclasses, bit_offset);
5083 /* The partial classes are now full classes. */
5084 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5085 subclasses[0] = X86_64_SSE_CLASS;
5086 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5087 && !((bit_offset % 64) == 0 && bytes == 4))
5088 subclasses[0] = X86_64_INTEGER_CLASS;
5090 for (i = 0; i < words; i++)
5091 classes[i] = subclasses[i % num];
5096 case QUAL_UNION_TYPE:
5097 /* Unions are similar to RECORD_TYPE but offset is always 0.
5099 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5101 if (TREE_CODE (field) == FIELD_DECL)
5105 if (TREE_TYPE (field) == error_mark_node)
5108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5109 TREE_TYPE (field), subclasses,
5113 for (i = 0; i < num; i++)
5114 classes[i] = merge_classes (subclasses[i], classes[i]);
5125 /* When size > 16 bytes, if the first one isn't
5126 X86_64_SSE_CLASS or any other ones aren't
5127 X86_64_SSEUP_CLASS, everything should be passed in
5129 if (classes[0] != X86_64_SSE_CLASS)
5132 for (i = 1; i < words; i++)
5133 if (classes[i] != X86_64_SSEUP_CLASS)
5137 /* Final merger cleanup. */
5138 for (i = 0; i < words; i++)
5140 /* If one class is MEMORY, everything should be passed in
5142 if (classes[i] == X86_64_MEMORY_CLASS)
5145 /* The X86_64_SSEUP_CLASS should be always preceded by
5146 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5147 if (classes[i] == X86_64_SSEUP_CLASS
5148 && classes[i - 1] != X86_64_SSE_CLASS
5149 && classes[i - 1] != X86_64_SSEUP_CLASS)
5151 /* The first one should never be X86_64_SSEUP_CLASS. */
5152 gcc_assert (i != 0);
5153 classes[i] = X86_64_SSE_CLASS;
5156 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5157 everything should be passed in memory. */
5158 if (classes[i] == X86_64_X87UP_CLASS
5159 && (classes[i - 1] != X86_64_X87_CLASS))
5163 /* The first one should never be X86_64_X87UP_CLASS. */
5164 gcc_assert (i != 0);
5165 if (!warned && warn_psabi)
5168 inform (input_location,
5169 "The ABI of passing union with long double"
5170 " has changed in GCC 4.4");
5178 /* Compute alignment needed. We align all types to natural boundaries with
5179 exception of XFmode that is aligned to 64bits. */
5180 if (mode != VOIDmode && mode != BLKmode)
5182 int mode_alignment = GET_MODE_BITSIZE (mode);
5185 mode_alignment = 128;
5186 else if (mode == XCmode)
5187 mode_alignment = 256;
5188 if (COMPLEX_MODE_P (mode))
5189 mode_alignment /= 2;
5190 /* Misaligned fields are always returned in memory. */
5191 if (bit_offset % mode_alignment)
5195 /* for V1xx modes, just use the base mode */
5196 if (VECTOR_MODE_P (mode) && mode != V1DImode
5197 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5198 mode = GET_MODE_INNER (mode);
5200 /* Classification of atomic types. */
5205 classes[0] = X86_64_SSE_CLASS;
5208 classes[0] = X86_64_SSE_CLASS;
5209 classes[1] = X86_64_SSEUP_CLASS;
5219 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5223 classes[0] = X86_64_INTEGERSI_CLASS;
5226 else if (size <= 64)
5228 classes[0] = X86_64_INTEGER_CLASS;
5231 else if (size <= 64+32)
5233 classes[0] = X86_64_INTEGER_CLASS;
5234 classes[1] = X86_64_INTEGERSI_CLASS;
5237 else if (size <= 64+64)
5239 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5247 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5251 /* OImode shouldn't be used directly. */
5256 if (!(bit_offset % 64))
5257 classes[0] = X86_64_SSESF_CLASS;
5259 classes[0] = X86_64_SSE_CLASS;
5262 classes[0] = X86_64_SSEDF_CLASS;
5265 classes[0] = X86_64_X87_CLASS;
5266 classes[1] = X86_64_X87UP_CLASS;
5269 classes[0] = X86_64_SSE_CLASS;
5270 classes[1] = X86_64_SSEUP_CLASS;
5273 classes[0] = X86_64_SSE_CLASS;
5274 if (!(bit_offset % 64))
5280 if (!warned && warn_psabi)
5283 inform (input_location,
5284 "The ABI of passing structure with complex float"
5285 " member has changed in GCC 4.4");
5287 classes[1] = X86_64_SSESF_CLASS;
5291 classes[0] = X86_64_SSEDF_CLASS;
5292 classes[1] = X86_64_SSEDF_CLASS;
5295 classes[0] = X86_64_COMPLEX_X87_CLASS;
5298 /* This modes is larger than 16 bytes. */
5306 classes[0] = X86_64_SSE_CLASS;
5307 classes[1] = X86_64_SSEUP_CLASS;
5308 classes[2] = X86_64_SSEUP_CLASS;
5309 classes[3] = X86_64_SSEUP_CLASS;
5317 classes[0] = X86_64_SSE_CLASS;
5318 classes[1] = X86_64_SSEUP_CLASS;
5325 classes[0] = X86_64_SSE_CLASS;
5331 gcc_assert (VECTOR_MODE_P (mode));
5336 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5338 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5339 classes[0] = X86_64_INTEGERSI_CLASS;
5341 classes[0] = X86_64_INTEGER_CLASS;
5342 classes[1] = X86_64_INTEGER_CLASS;
5343 return 1 + (bytes > 8);
5347 /* Examine the argument and return set number of register required in each
5348 class. Return 0 iff parameter should be passed in memory. */
5350 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5351 int *int_nregs, int *sse_nregs)
5353 enum x86_64_reg_class regclass[MAX_CLASSES];
5354 int n = classify_argument (mode, type, regclass, 0);
5360 for (n--; n >= 0; n--)
5361 switch (regclass[n])
5363 case X86_64_INTEGER_CLASS:
5364 case X86_64_INTEGERSI_CLASS:
5367 case X86_64_SSE_CLASS:
5368 case X86_64_SSESF_CLASS:
5369 case X86_64_SSEDF_CLASS:
5372 case X86_64_NO_CLASS:
5373 case X86_64_SSEUP_CLASS:
5375 case X86_64_X87_CLASS:
5376 case X86_64_X87UP_CLASS:
5380 case X86_64_COMPLEX_X87_CLASS:
5381 return in_return ? 2 : 0;
5382 case X86_64_MEMORY_CLASS:
5388 /* Construct container for the argument used by GCC interface. See
5389 FUNCTION_ARG for the detailed description. */
5392 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5393 const_tree type, int in_return, int nintregs, int nsseregs,
5394 const int *intreg, int sse_regno)
5396 /* The following variables hold the static issued_error state. */
5397 static bool issued_sse_arg_error;
5398 static bool issued_sse_ret_error;
5399 static bool issued_x87_ret_error;
5401 enum machine_mode tmpmode;
5403 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5404 enum x86_64_reg_class regclass[MAX_CLASSES];
5408 int needed_sseregs, needed_intregs;
5409 rtx exp[MAX_CLASSES];
5412 n = classify_argument (mode, type, regclass, 0);
5415 if (!examine_argument (mode, type, in_return, &needed_intregs,
5418 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5421 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5422 some less clueful developer tries to use floating-point anyway. */
5423 if (needed_sseregs && !TARGET_SSE)
5427 if (!issued_sse_ret_error)
5429 error ("SSE register return with SSE disabled");
5430 issued_sse_ret_error = true;
5433 else if (!issued_sse_arg_error)
5435 error ("SSE register argument with SSE disabled");
5436 issued_sse_arg_error = true;
5441 /* Likewise, error if the ABI requires us to return values in the
5442 x87 registers and the user specified -mno-80387. */
5443 if (!TARGET_80387 && in_return)
5444 for (i = 0; i < n; i++)
5445 if (regclass[i] == X86_64_X87_CLASS
5446 || regclass[i] == X86_64_X87UP_CLASS
5447 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5449 if (!issued_x87_ret_error)
5451 error ("x87 register return with x87 disabled");
5452 issued_x87_ret_error = true;
5457 /* First construct simple cases. Avoid SCmode, since we want to use
5458 single register to pass this type. */
5459 if (n == 1 && mode != SCmode)
5460 switch (regclass[0])
5462 case X86_64_INTEGER_CLASS:
5463 case X86_64_INTEGERSI_CLASS:
5464 return gen_rtx_REG (mode, intreg[0]);
5465 case X86_64_SSE_CLASS:
5466 case X86_64_SSESF_CLASS:
5467 case X86_64_SSEDF_CLASS:
5468 if (mode != BLKmode)
5469 return gen_reg_or_parallel (mode, orig_mode,
5470 SSE_REGNO (sse_regno));
5472 case X86_64_X87_CLASS:
5473 case X86_64_COMPLEX_X87_CLASS:
5474 return gen_rtx_REG (mode, FIRST_STACK_REG);
5475 case X86_64_NO_CLASS:
5476 /* Zero sized array, struct or class. */
5481 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5482 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5483 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5485 && regclass[0] == X86_64_SSE_CLASS
5486 && regclass[1] == X86_64_SSEUP_CLASS
5487 && regclass[2] == X86_64_SSEUP_CLASS
5488 && regclass[3] == X86_64_SSEUP_CLASS
5490 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5493 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5494 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5495 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5496 && regclass[1] == X86_64_INTEGER_CLASS
5497 && (mode == CDImode || mode == TImode || mode == TFmode)
5498 && intreg[0] + 1 == intreg[1])
5499 return gen_rtx_REG (mode, intreg[0]);
5501 /* Otherwise figure out the entries of the PARALLEL. */
5502 for (i = 0; i < n; i++)
5506 switch (regclass[i])
5508 case X86_64_NO_CLASS:
5510 case X86_64_INTEGER_CLASS:
5511 case X86_64_INTEGERSI_CLASS:
5512 /* Merge TImodes on aligned occasions here too. */
5513 if (i * 8 + 8 > bytes)
5514 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5515 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5519 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5520 if (tmpmode == BLKmode)
5522 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5523 gen_rtx_REG (tmpmode, *intreg),
5527 case X86_64_SSESF_CLASS:
5528 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5529 gen_rtx_REG (SFmode,
5530 SSE_REGNO (sse_regno)),
5534 case X86_64_SSEDF_CLASS:
5535 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5536 gen_rtx_REG (DFmode,
5537 SSE_REGNO (sse_regno)),
5541 case X86_64_SSE_CLASS:
5549 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5559 && regclass[1] == X86_64_SSEUP_CLASS
5560 && regclass[2] == X86_64_SSEUP_CLASS
5561 && regclass[3] == X86_64_SSEUP_CLASS);
5568 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5569 gen_rtx_REG (tmpmode,
5570 SSE_REGNO (sse_regno)),
5579 /* Empty aligned struct, union or class. */
5583 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5584 for (i = 0; i < nexps; i++)
5585 XVECEXP (ret, 0, i) = exp [i];
5589 /* Update the data in CUM to advance over an argument of mode MODE
5590 and data type TYPE. (TYPE is null for libcalls where that information
5591 may not be available.) */
5594 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5595 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5611 cum->words += words;
5612 cum->nregs -= words;
5613 cum->regno += words;
5615 if (cum->nregs <= 0)
5623 /* OImode shouldn't be used directly. */
5627 if (cum->float_in_sse < 2)
5630 if (cum->float_in_sse < 1)
5647 if (!type || !AGGREGATE_TYPE_P (type))
5649 cum->sse_words += words;
5650 cum->sse_nregs -= 1;
5651 cum->sse_regno += 1;
5652 if (cum->sse_nregs <= 0)
5665 if (!type || !AGGREGATE_TYPE_P (type))
5667 cum->mmx_words += words;
5668 cum->mmx_nregs -= 1;
5669 cum->mmx_regno += 1;
5670 if (cum->mmx_nregs <= 0)
5681 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5682 tree type, HOST_WIDE_INT words, int named)
5684 int int_nregs, sse_nregs;
5686 /* Unnamed 256bit vector mode parameters are passed on stack. */
5687 if (!named && VALID_AVX256_REG_MODE (mode))
5690 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5691 cum->words += words;
5692 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5694 cum->nregs -= int_nregs;
5695 cum->sse_nregs -= sse_nregs;
5696 cum->regno += int_nregs;
5697 cum->sse_regno += sse_nregs;
5700 cum->words += words;
5704 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5705 HOST_WIDE_INT words)
5707 /* Otherwise, this should be passed indirect. */
5708 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5710 cum->words += words;
5719 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5720 tree type, int named)
5722 HOST_WIDE_INT bytes, words;
5724 if (mode == BLKmode)
5725 bytes = int_size_in_bytes (type);
5727 bytes = GET_MODE_SIZE (mode);
5728 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5731 mode = type_natural_mode (type, NULL);
5733 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5734 function_arg_advance_ms_64 (cum, bytes, words);
5735 else if (TARGET_64BIT)
5736 function_arg_advance_64 (cum, mode, type, words, named);
5738 function_arg_advance_32 (cum, mode, type, bytes, words);
5741 /* Define where to put the arguments to a function.
5742 Value is zero to push the argument on the stack,
5743 or a hard register in which to store the argument.
5745 MODE is the argument's machine mode.
5746 TYPE is the data type of the argument (as a tree).
5747 This is null for libcalls where that information may
5749 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5750 the preceding args and about the function being called.
5751 NAMED is nonzero if this argument is a named parameter
5752 (otherwise it is an extra parameter matching an ellipsis). */
5755 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5756 enum machine_mode orig_mode, tree type,
5757 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5759 static bool warnedsse, warnedmmx;
5761 /* Avoid the AL settings for the Unix64 ABI. */
5762 if (mode == VOIDmode)
5778 if (words <= cum->nregs)
5780 int regno = cum->regno;
5782 /* Fastcall allocates the first two DWORD (SImode) or
5783 smaller arguments to ECX and EDX if it isn't an
5789 || (type && AGGREGATE_TYPE_P (type)))
5792 /* ECX not EAX is the first allocated register. */
5793 if (regno == AX_REG)
5796 return gen_rtx_REG (mode, regno);
5801 if (cum->float_in_sse < 2)
5804 if (cum->float_in_sse < 1)
5808 /* In 32bit, we pass TImode in xmm registers. */
5815 if (!type || !AGGREGATE_TYPE_P (type))
5817 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5820 warning (0, "SSE vector argument without SSE enabled "
5824 return gen_reg_or_parallel (mode, orig_mode,
5825 cum->sse_regno + FIRST_SSE_REG);
5830 /* OImode shouldn't be used directly. */
5839 if (!type || !AGGREGATE_TYPE_P (type))
5842 return gen_reg_or_parallel (mode, orig_mode,
5843 cum->sse_regno + FIRST_SSE_REG);
5852 if (!type || !AGGREGATE_TYPE_P (type))
5854 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5857 warning (0, "MMX vector argument without MMX enabled "
5861 return gen_reg_or_parallel (mode, orig_mode,
5862 cum->mmx_regno + FIRST_MMX_REG);
5871 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5872 enum machine_mode orig_mode, tree type, int named)
5874 /* Handle a hidden AL argument containing number of registers
5875 for varargs x86-64 functions. */
5876 if (mode == VOIDmode)
5877 return GEN_INT (cum->maybe_vaarg
5878 ? (cum->sse_nregs < 0
5879 ? (cum->call_abi == ix86_abi
5881 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5882 : X64_SSE_REGPARM_MAX))
5897 /* Unnamed 256bit vector mode parameters are passed on stack. */
5903 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5905 &x86_64_int_parameter_registers [cum->regno],
5910 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5911 enum machine_mode orig_mode, int named,
5912 HOST_WIDE_INT bytes)
5916 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5917 We use value of -2 to specify that current function call is MSABI. */
5918 if (mode == VOIDmode)
5919 return GEN_INT (-2);
5921 /* If we've run out of registers, it goes on the stack. */
5922 if (cum->nregs == 0)
5925 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5927 /* Only floating point modes are passed in anything but integer regs. */
5928 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5931 regno = cum->regno + FIRST_SSE_REG;
5936 /* Unnamed floating parameters are passed in both the
5937 SSE and integer registers. */
5938 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5939 t2 = gen_rtx_REG (mode, regno);
5940 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5941 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5942 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5945 /* Handle aggregated types passed in register. */
5946 if (orig_mode == BLKmode)
5948 if (bytes > 0 && bytes <= 8)
5949 mode = (bytes > 4 ? DImode : SImode);
5950 if (mode == BLKmode)
5954 return gen_reg_or_parallel (mode, orig_mode, regno);
5958 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5959 tree type, int named)
5961 enum machine_mode mode = omode;
5962 HOST_WIDE_INT bytes, words;
5964 if (mode == BLKmode)
5965 bytes = int_size_in_bytes (type);
5967 bytes = GET_MODE_SIZE (mode);
5968 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5970 /* To simplify the code below, represent vector types with a vector mode
5971 even if MMX/SSE are not active. */
5972 if (type && TREE_CODE (type) == VECTOR_TYPE)
5973 mode = type_natural_mode (type, cum);
5975 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5976 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5977 else if (TARGET_64BIT)
5978 return function_arg_64 (cum, mode, omode, type, named);
5980 return function_arg_32 (cum, mode, omode, type, bytes, words);
5983 /* A C expression that indicates when an argument must be passed by
5984 reference. If nonzero for an argument, a copy of that argument is
5985 made in memory and a pointer to the argument is passed instead of
5986 the argument itself. The pointer is passed in whatever way is
5987 appropriate for passing a pointer to that type. */
5990 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5991 enum machine_mode mode ATTRIBUTE_UNUSED,
5992 const_tree type, bool named ATTRIBUTE_UNUSED)
5994 /* See Windows x64 Software Convention. */
5995 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5997 int msize = (int) GET_MODE_SIZE (mode);
6000 /* Arrays are passed by reference. */
6001 if (TREE_CODE (type) == ARRAY_TYPE)
6004 if (AGGREGATE_TYPE_P (type))
6006 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6007 are passed by reference. */
6008 msize = int_size_in_bytes (type);
6012 /* __m128 is passed by reference. */
6014 case 1: case 2: case 4: case 8:
6020 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6026 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6029 contains_aligned_value_p (tree type)
6031 enum machine_mode mode = TYPE_MODE (type);
6032 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6036 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6038 if (TYPE_ALIGN (type) < 128)
6041 if (AGGREGATE_TYPE_P (type))
6043 /* Walk the aggregates recursively. */
6044 switch (TREE_CODE (type))
6048 case QUAL_UNION_TYPE:
6052 /* Walk all the structure fields. */
6053 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6055 if (TREE_CODE (field) == FIELD_DECL
6056 && contains_aligned_value_p (TREE_TYPE (field)))
6063 /* Just for use if some languages passes arrays by value. */
6064 if (contains_aligned_value_p (TREE_TYPE (type)))
6075 /* Gives the alignment boundary, in bits, of an argument with the
6076 specified mode and type. */
6079 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6084 /* Since canonical type is used for call, we convert it to
6085 canonical type if needed. */
6086 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6087 type = TYPE_CANONICAL (type);
6088 align = TYPE_ALIGN (type);
6091 align = GET_MODE_ALIGNMENT (mode);
6092 if (align < PARM_BOUNDARY)
6093 align = PARM_BOUNDARY;
6094 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6095 natural boundaries. */
6096 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6098 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6099 make an exception for SSE modes since these require 128bit
6102 The handling here differs from field_alignment. ICC aligns MMX
6103 arguments to 4 byte boundaries, while structure fields are aligned
6104 to 8 byte boundaries. */
6107 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6108 align = PARM_BOUNDARY;
6112 if (!contains_aligned_value_p (type))
6113 align = PARM_BOUNDARY;
6116 if (align > BIGGEST_ALIGNMENT)
6117 align = BIGGEST_ALIGNMENT;
6121 /* Return true if N is a possible register number of function value. */
6124 ix86_function_value_regno_p (int regno)
6131 case FIRST_FLOAT_REG:
6132 /* TODO: The function should depend on current function ABI but
6133 builtins.c would need updating then. Therefore we use the
6135 if (TARGET_64BIT && ix86_abi == MS_ABI)
6137 return TARGET_FLOAT_RETURNS_IN_80387;
6143 if (TARGET_MACHO || TARGET_64BIT)
6151 /* Define how to find the value returned by a function.
6152 VALTYPE is the data type of the value (as a tree).
6153 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6154 otherwise, FUNC is 0. */
6157 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6158 const_tree fntype, const_tree fn)
6162 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6163 we normally prevent this case when mmx is not available. However
6164 some ABIs may require the result to be returned like DImode. */
6165 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6166 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6168 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6169 we prevent this case when sse is not available. However some ABIs
6170 may require the result to be returned like integer TImode. */
6171 else if (mode == TImode
6172 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6173 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6175 /* 32-byte vector modes in %ymm0. */
6176 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6177 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6179 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6180 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6181 regno = FIRST_FLOAT_REG;
6183 /* Most things go in %eax. */
6186 /* Override FP return register with %xmm0 for local functions when
6187 SSE math is enabled or for functions with sseregparm attribute. */
6188 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6190 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6191 if ((sse_level >= 1 && mode == SFmode)
6192 || (sse_level == 2 && mode == DFmode))
6193 regno = FIRST_SSE_REG;
6196 /* OImode shouldn't be used directly. */
6197 gcc_assert (mode != OImode);
6199 return gen_rtx_REG (orig_mode, regno);
6203 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6208 /* Handle libcalls, which don't provide a type node. */
6209 if (valtype == NULL)
6221 return gen_rtx_REG (mode, FIRST_SSE_REG);
6224 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6228 return gen_rtx_REG (mode, AX_REG);
6232 ret = construct_container (mode, orig_mode, valtype, 1,
6233 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6234 x86_64_int_return_registers, 0);
6236 /* For zero sized structures, construct_container returns NULL, but we
6237 need to keep rest of compiler happy by returning meaningful value. */
6239 ret = gen_rtx_REG (orig_mode, AX_REG);
6245 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6247 unsigned int regno = AX_REG;
6251 switch (GET_MODE_SIZE (mode))
6254 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6255 && !COMPLEX_MODE_P (mode))
6256 regno = FIRST_SSE_REG;
6260 if (mode == SFmode || mode == DFmode)
6261 regno = FIRST_SSE_REG;
6267 return gen_rtx_REG (orig_mode, regno);
6271 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6272 enum machine_mode orig_mode, enum machine_mode mode)
6274 const_tree fn, fntype;
6277 if (fntype_or_decl && DECL_P (fntype_or_decl))
6278 fn = fntype_or_decl;
6279 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6281 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6282 return function_value_ms_64 (orig_mode, mode);
6283 else if (TARGET_64BIT)
6284 return function_value_64 (orig_mode, mode, valtype);
6286 return function_value_32 (orig_mode, mode, fntype, fn);
6290 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6291 bool outgoing ATTRIBUTE_UNUSED)
6293 enum machine_mode mode, orig_mode;
6295 orig_mode = TYPE_MODE (valtype);
6296 mode = type_natural_mode (valtype, NULL);
6297 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6301 ix86_libcall_value (enum machine_mode mode)
6303 return ix86_function_value_1 (NULL, NULL, mode, mode);
6306 /* Return true iff type is returned in memory. */
6308 static int ATTRIBUTE_UNUSED
6309 return_in_memory_32 (const_tree type, enum machine_mode mode)
6313 if (mode == BLKmode)
6316 size = int_size_in_bytes (type);
6318 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6321 if (VECTOR_MODE_P (mode) || mode == TImode)
6323 /* User-created vectors small enough to fit in EAX. */
6327 /* MMX/3dNow values are returned in MM0,
6328 except when it doesn't exits. */
6330 return (TARGET_MMX ? 0 : 1);
6332 /* SSE values are returned in XMM0, except when it doesn't exist. */
6334 return (TARGET_SSE ? 0 : 1);
6336 /* AVX values are returned in YMM0, except when it doesn't exist. */
6338 return TARGET_AVX ? 0 : 1;
6347 /* OImode shouldn't be used directly. */
6348 gcc_assert (mode != OImode);
6353 static int ATTRIBUTE_UNUSED
6354 return_in_memory_64 (const_tree type, enum machine_mode mode)
6356 int needed_intregs, needed_sseregs;
6357 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6360 static int ATTRIBUTE_UNUSED
6361 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6363 HOST_WIDE_INT size = int_size_in_bytes (type);
6365 /* __m128 is returned in xmm0. */
6366 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6367 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6370 /* Otherwise, the size must be exactly in [1248]. */
6371 return (size != 1 && size != 2 && size != 4 && size != 8);
6375 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6377 #ifdef SUBTARGET_RETURN_IN_MEMORY
6378 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6380 const enum machine_mode mode = type_natural_mode (type, NULL);
6384 if (ix86_function_type_abi (fntype) == MS_ABI)
6385 return return_in_memory_ms_64 (type, mode);
6387 return return_in_memory_64 (type, mode);
6390 return return_in_memory_32 (type, mode);
6394 /* Return false iff TYPE is returned in memory. This version is used
6395 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6396 but differs notably in that when MMX is available, 8-byte vectors
6397 are returned in memory, rather than in MMX registers. */
6400 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6403 enum machine_mode mode = type_natural_mode (type, NULL);
6406 return return_in_memory_64 (type, mode);
6408 if (mode == BLKmode)
6411 size = int_size_in_bytes (type);
6413 if (VECTOR_MODE_P (mode))
6415 /* Return in memory only if MMX registers *are* available. This
6416 seems backwards, but it is consistent with the existing
6423 else if (mode == TImode)
6425 else if (mode == XFmode)
6431 /* When returning SSE vector types, we have a choice of either
6432 (1) being abi incompatible with a -march switch, or
6433 (2) generating an error.
6434 Given no good solution, I think the safest thing is one warning.
6435 The user won't be able to use -Werror, but....
6437 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6438 called in response to actually generating a caller or callee that
6439 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6440 via aggregate_value_p for general type probing from tree-ssa. */
6443 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6445 static bool warnedsse, warnedmmx;
6447 if (!TARGET_64BIT && type)
6449 /* Look at the return type of the function, not the function type. */
6450 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6452 if (!TARGET_SSE && !warnedsse)
6455 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6458 warning (0, "SSE vector return without SSE enabled "
6463 if (!TARGET_MMX && !warnedmmx)
6465 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6468 warning (0, "MMX vector return without MMX enabled "
6478 /* Create the va_list data type. */
6480 /* Returns the calling convention specific va_list date type.
6481 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6484 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6486 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6488 /* For i386 we use plain pointer to argument area. */
6489 if (!TARGET_64BIT || abi == MS_ABI)
6490 return build_pointer_type (char_type_node);
6492 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6493 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6495 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6496 unsigned_type_node);
6497 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6498 unsigned_type_node);
6499 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6501 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6504 va_list_gpr_counter_field = f_gpr;
6505 va_list_fpr_counter_field = f_fpr;
6507 DECL_FIELD_CONTEXT (f_gpr) = record;
6508 DECL_FIELD_CONTEXT (f_fpr) = record;
6509 DECL_FIELD_CONTEXT (f_ovf) = record;
6510 DECL_FIELD_CONTEXT (f_sav) = record;
6512 TREE_CHAIN (record) = type_decl;
6513 TYPE_NAME (record) = type_decl;
6514 TYPE_FIELDS (record) = f_gpr;
6515 TREE_CHAIN (f_gpr) = f_fpr;
6516 TREE_CHAIN (f_fpr) = f_ovf;
6517 TREE_CHAIN (f_ovf) = f_sav;
6519 layout_type (record);
6521 /* The correct type is an array type of one element. */
6522 return build_array_type (record, build_index_type (size_zero_node));
6525 /* Setup the builtin va_list data type and for 64-bit the additional
6526 calling convention specific va_list data types. */
6529 ix86_build_builtin_va_list (void)
6531 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6533 /* Initialize abi specific va_list builtin types. */
6537 if (ix86_abi == MS_ABI)
6539 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6540 if (TREE_CODE (t) != RECORD_TYPE)
6541 t = build_variant_type_copy (t);
6542 sysv_va_list_type_node = t;
6547 if (TREE_CODE (t) != RECORD_TYPE)
6548 t = build_variant_type_copy (t);
6549 sysv_va_list_type_node = t;
6551 if (ix86_abi != MS_ABI)
6553 t = ix86_build_builtin_va_list_abi (MS_ABI);
6554 if (TREE_CODE (t) != RECORD_TYPE)
6555 t = build_variant_type_copy (t);
6556 ms_va_list_type_node = t;
6561 if (TREE_CODE (t) != RECORD_TYPE)
6562 t = build_variant_type_copy (t);
6563 ms_va_list_type_node = t;
6570 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6573 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6582 int regparm = ix86_regparm;
6584 if (cum->call_abi != ix86_abi)
6585 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6587 /* GPR size of varargs save area. */
6588 if (cfun->va_list_gpr_size)
6589 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6591 ix86_varargs_gpr_size = 0;
6593 /* FPR size of varargs save area. We don't need it if we don't pass
6594 anything in SSE registers. */
6595 if (cum->sse_nregs && cfun->va_list_fpr_size)
6596 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6598 ix86_varargs_fpr_size = 0;
6600 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6603 save_area = frame_pointer_rtx;
6604 set = get_varargs_alias_set ();
6606 for (i = cum->regno;
6608 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6611 mem = gen_rtx_MEM (Pmode,
6612 plus_constant (save_area, i * UNITS_PER_WORD));
6613 MEM_NOTRAP_P (mem) = 1;
6614 set_mem_alias_set (mem, set);
6615 emit_move_insn (mem, gen_rtx_REG (Pmode,
6616 x86_64_int_parameter_registers[i]));
6619 if (ix86_varargs_fpr_size)
6621 /* Now emit code to save SSE registers. The AX parameter contains number
6622 of SSE parameter registers used to call this function. We use
6623 sse_prologue_save insn template that produces computed jump across
6624 SSE saves. We need some preparation work to get this working. */
6626 label = gen_label_rtx ();
6627 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6629 /* Compute address to jump to :
6630 label - eax*4 + nnamed_sse_arguments*4 Or
6631 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6632 tmp_reg = gen_reg_rtx (Pmode);
6633 nsse_reg = gen_reg_rtx (Pmode);
6634 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6635 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6636 gen_rtx_MULT (Pmode, nsse_reg,
6639 /* vmovaps is one byte longer than movaps. */
6641 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6642 gen_rtx_PLUS (Pmode, tmp_reg,
6648 gen_rtx_CONST (DImode,
6649 gen_rtx_PLUS (DImode,
6651 GEN_INT (cum->sse_regno
6652 * (TARGET_AVX ? 5 : 4)))));
6654 emit_move_insn (nsse_reg, label_ref);
6655 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6657 /* Compute address of memory block we save into. We always use pointer
6658 pointing 127 bytes after first byte to store - this is needed to keep
6659 instruction size limited by 4 bytes (5 bytes for AVX) with one
6660 byte displacement. */
6661 tmp_reg = gen_reg_rtx (Pmode);
6662 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6663 plus_constant (save_area,
6664 ix86_varargs_gpr_size + 127)));
6665 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6666 MEM_NOTRAP_P (mem) = 1;
6667 set_mem_alias_set (mem, set);
6668 set_mem_align (mem, BITS_PER_WORD);
6670 /* And finally do the dirty job! */
6671 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6672 GEN_INT (cum->sse_regno), label));
6677 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6679 alias_set_type set = get_varargs_alias_set ();
6682 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6686 mem = gen_rtx_MEM (Pmode,
6687 plus_constant (virtual_incoming_args_rtx,
6688 i * UNITS_PER_WORD));
6689 MEM_NOTRAP_P (mem) = 1;
6690 set_mem_alias_set (mem, set);
6692 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6693 emit_move_insn (mem, reg);
6698 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6699 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6702 CUMULATIVE_ARGS next_cum;
6705 /* This argument doesn't appear to be used anymore. Which is good,
6706 because the old code here didn't suppress rtl generation. */
6707 gcc_assert (!no_rtl);
6712 fntype = TREE_TYPE (current_function_decl);
6714 /* For varargs, we do not want to skip the dummy va_dcl argument.
6715 For stdargs, we do want to skip the last named argument. */
6717 if (stdarg_p (fntype))
6718 function_arg_advance (&next_cum, mode, type, 1);
6720 if (cum->call_abi == MS_ABI)
6721 setup_incoming_varargs_ms_64 (&next_cum);
6723 setup_incoming_varargs_64 (&next_cum);
6726 /* Checks if TYPE is of kind va_list char *. */
6729 is_va_list_char_pointer (tree type)
6733 /* For 32-bit it is always true. */
6736 canonic = ix86_canonical_va_list_type (type);
6737 return (canonic == ms_va_list_type_node
6738 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6741 /* Implement va_start. */
6744 ix86_va_start (tree valist, rtx nextarg)
6746 HOST_WIDE_INT words, n_gpr, n_fpr;
6747 tree f_gpr, f_fpr, f_ovf, f_sav;
6748 tree gpr, fpr, ovf, sav, t;
6751 /* Only 64bit target needs something special. */
6752 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6754 std_expand_builtin_va_start (valist, nextarg);
6758 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6759 f_fpr = TREE_CHAIN (f_gpr);
6760 f_ovf = TREE_CHAIN (f_fpr);
6761 f_sav = TREE_CHAIN (f_ovf);
6763 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6764 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6765 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6766 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6767 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6769 /* Count number of gp and fp argument registers used. */
6770 words = crtl->args.info.words;
6771 n_gpr = crtl->args.info.regno;
6772 n_fpr = crtl->args.info.sse_regno;
6774 if (cfun->va_list_gpr_size)
6776 type = TREE_TYPE (gpr);
6777 t = build2 (MODIFY_EXPR, type,
6778 gpr, build_int_cst (type, n_gpr * 8));
6779 TREE_SIDE_EFFECTS (t) = 1;
6780 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6783 if (TARGET_SSE && cfun->va_list_fpr_size)
6785 type = TREE_TYPE (fpr);
6786 t = build2 (MODIFY_EXPR, type, fpr,
6787 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6788 TREE_SIDE_EFFECTS (t) = 1;
6789 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6792 /* Find the overflow area. */
6793 type = TREE_TYPE (ovf);
6794 t = make_tree (type, crtl->args.internal_arg_pointer);
6796 t = build2 (POINTER_PLUS_EXPR, type, t,
6797 size_int (words * UNITS_PER_WORD));
6798 t = build2 (MODIFY_EXPR, type, ovf, t);
6799 TREE_SIDE_EFFECTS (t) = 1;
6800 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6802 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6804 /* Find the register save area.
6805 Prologue of the function save it right above stack frame. */
6806 type = TREE_TYPE (sav);
6807 t = make_tree (type, frame_pointer_rtx);
6808 if (!ix86_varargs_gpr_size)
6809 t = build2 (POINTER_PLUS_EXPR, type, t,
6810 size_int (-8 * X86_64_REGPARM_MAX));
6811 t = build2 (MODIFY_EXPR, type, sav, t);
6812 TREE_SIDE_EFFECTS (t) = 1;
6813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6817 /* Implement va_arg. */
6820 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6823 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6824 tree f_gpr, f_fpr, f_ovf, f_sav;
6825 tree gpr, fpr, ovf, sav, t;
6827 tree lab_false, lab_over = NULL_TREE;
6832 enum machine_mode nat_mode;
6835 /* Only 64bit target needs something special. */
6836 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6837 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6839 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6840 f_fpr = TREE_CHAIN (f_gpr);
6841 f_ovf = TREE_CHAIN (f_fpr);
6842 f_sav = TREE_CHAIN (f_ovf);
6844 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6845 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6846 valist = build_va_arg_indirect_ref (valist);
6847 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6848 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6849 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6851 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6853 type = build_pointer_type (type);
6854 size = int_size_in_bytes (type);
6855 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6857 nat_mode = type_natural_mode (type, NULL);
6866 /* Unnamed 256bit vector mode parameters are passed on stack. */
6867 if (ix86_cfun_abi () == SYSV_ABI)
6874 container = construct_container (nat_mode, TYPE_MODE (type),
6875 type, 0, X86_64_REGPARM_MAX,
6876 X86_64_SSE_REGPARM_MAX, intreg,
6881 /* Pull the value out of the saved registers. */
6883 addr = create_tmp_var (ptr_type_node, "addr");
6884 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6888 int needed_intregs, needed_sseregs;
6890 tree int_addr, sse_addr;
6892 lab_false = create_artificial_label ();
6893 lab_over = create_artificial_label ();
6895 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6897 need_temp = (!REG_P (container)
6898 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6899 || TYPE_ALIGN (type) > 128));
6901 /* In case we are passing structure, verify that it is consecutive block
6902 on the register save area. If not we need to do moves. */
6903 if (!need_temp && !REG_P (container))
6905 /* Verify that all registers are strictly consecutive */
6906 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6910 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6912 rtx slot = XVECEXP (container, 0, i);
6913 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6914 || INTVAL (XEXP (slot, 1)) != i * 16)
6922 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6924 rtx slot = XVECEXP (container, 0, i);
6925 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6926 || INTVAL (XEXP (slot, 1)) != i * 8)
6938 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6939 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6940 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6941 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6944 /* First ensure that we fit completely in registers. */
6947 t = build_int_cst (TREE_TYPE (gpr),
6948 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6949 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6950 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6951 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6952 gimplify_and_add (t, pre_p);
6956 t = build_int_cst (TREE_TYPE (fpr),
6957 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6958 + X86_64_REGPARM_MAX * 8);
6959 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6960 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6961 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6962 gimplify_and_add (t, pre_p);
6965 /* Compute index to start of area used for integer regs. */
6968 /* int_addr = gpr + sav; */
6969 t = fold_convert (sizetype, gpr);
6970 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6971 gimplify_assign (int_addr, t, pre_p);
6975 /* sse_addr = fpr + sav; */
6976 t = fold_convert (sizetype, fpr);
6977 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6978 gimplify_assign (sse_addr, t, pre_p);
6983 tree temp = create_tmp_var (type, "va_arg_tmp");
6986 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6987 gimplify_assign (addr, t, pre_p);
6989 for (i = 0; i < XVECLEN (container, 0); i++)
6991 rtx slot = XVECEXP (container, 0, i);
6992 rtx reg = XEXP (slot, 0);
6993 enum machine_mode mode = GET_MODE (reg);
6994 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6995 tree addr_type = build_pointer_type (piece_type);
6996 tree daddr_type = build_pointer_type_for_mode (piece_type,
7000 tree dest_addr, dest;
7002 if (SSE_REGNO_P (REGNO (reg)))
7004 src_addr = sse_addr;
7005 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7009 src_addr = int_addr;
7010 src_offset = REGNO (reg) * 8;
7012 src_addr = fold_convert (addr_type, src_addr);
7013 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7014 size_int (src_offset));
7015 src = build_va_arg_indirect_ref (src_addr);
7017 dest_addr = fold_convert (daddr_type, addr);
7018 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7019 size_int (INTVAL (XEXP (slot, 1))));
7020 dest = build_va_arg_indirect_ref (dest_addr);
7022 gimplify_assign (dest, src, pre_p);
7028 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7029 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7030 gimplify_assign (gpr, t, pre_p);
7035 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7036 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7037 gimplify_assign (fpr, t, pre_p);
7040 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7042 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7045 /* ... otherwise out of the overflow area. */
7047 /* When we align parameter on stack for caller, if the parameter
7048 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7049 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7050 here with caller. */
7051 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7052 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7053 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7055 /* Care for on-stack alignment if needed. */
7056 if (arg_boundary <= 64
7057 || integer_zerop (TYPE_SIZE (type)))
7061 HOST_WIDE_INT align = arg_boundary / 8;
7062 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7063 size_int (align - 1));
7064 t = fold_convert (sizetype, t);
7065 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7067 t = fold_convert (TREE_TYPE (ovf), t);
7069 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7070 gimplify_assign (addr, t, pre_p);
7072 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7073 size_int (rsize * UNITS_PER_WORD));
7074 gimplify_assign (unshare_expr (ovf), t, pre_p);
7077 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7079 ptrtype = build_pointer_type (type);
7080 addr = fold_convert (ptrtype, addr);
7083 addr = build_va_arg_indirect_ref (addr);
7084 return build_va_arg_indirect_ref (addr);
7087 /* Return nonzero if OPNUM's MEM should be matched
7088 in movabs* patterns. */
7091 ix86_check_movabs (rtx insn, int opnum)
7095 set = PATTERN (insn);
7096 if (GET_CODE (set) == PARALLEL)
7097 set = XVECEXP (set, 0, 0);
7098 gcc_assert (GET_CODE (set) == SET);
7099 mem = XEXP (set, opnum);
7100 while (GET_CODE (mem) == SUBREG)
7101 mem = SUBREG_REG (mem);
7102 gcc_assert (MEM_P (mem));
7103 return (volatile_ok || !MEM_VOLATILE_P (mem));
7106 /* Initialize the table of extra 80387 mathematical constants. */
7109 init_ext_80387_constants (void)
7111 static const char * cst[5] =
7113 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7114 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7115 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7116 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7117 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7121 for (i = 0; i < 5; i++)
7123 real_from_string (&ext_80387_constants_table[i], cst[i]);
7124 /* Ensure each constant is rounded to XFmode precision. */
7125 real_convert (&ext_80387_constants_table[i],
7126 XFmode, &ext_80387_constants_table[i]);
7129 ext_80387_constants_init = 1;
7132 /* Return true if the constant is something that can be loaded with
7133 a special instruction. */
7136 standard_80387_constant_p (rtx x)
7138 enum machine_mode mode = GET_MODE (x);
7142 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7145 if (x == CONST0_RTX (mode))
7147 if (x == CONST1_RTX (mode))
7150 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7152 /* For XFmode constants, try to find a special 80387 instruction when
7153 optimizing for size or on those CPUs that benefit from them. */
7155 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7159 if (! ext_80387_constants_init)
7160 init_ext_80387_constants ();
7162 for (i = 0; i < 5; i++)
7163 if (real_identical (&r, &ext_80387_constants_table[i]))
7167 /* Load of the constant -0.0 or -1.0 will be split as
7168 fldz;fchs or fld1;fchs sequence. */
7169 if (real_isnegzero (&r))
7171 if (real_identical (&r, &dconstm1))
7177 /* Return the opcode of the special instruction to be used to load
7181 standard_80387_constant_opcode (rtx x)
7183 switch (standard_80387_constant_p (x))
7207 /* Return the CONST_DOUBLE representing the 80387 constant that is
7208 loaded by the specified special instruction. The argument IDX
7209 matches the return value from standard_80387_constant_p. */
7212 standard_80387_constant_rtx (int idx)
7216 if (! ext_80387_constants_init)
7217 init_ext_80387_constants ();
7233 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7237 /* Return 1 if mode is a valid mode for sse. */
7239 standard_sse_mode_p (enum machine_mode mode)
7256 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7257 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7258 modes and AVX is enabled. */
7261 standard_sse_constant_p (rtx x)
7263 enum machine_mode mode = GET_MODE (x);
7265 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7267 if (vector_all_ones_operand (x, mode))
7269 if (standard_sse_mode_p (mode))
7270 return TARGET_SSE2 ? 2 : -2;
7271 else if (VALID_AVX256_REG_MODE (mode))
7272 return TARGET_AVX ? 3 : -3;
7278 /* Return the opcode of the special instruction to be used to load
7282 standard_sse_constant_opcode (rtx insn, rtx x)
7284 switch (standard_sse_constant_p (x))
7287 switch (get_attr_mode (insn))
7290 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7292 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7294 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7296 return "vxorps\t%x0, %x0, %x0";
7298 return "vxorpd\t%x0, %x0, %x0";
7300 return "vpxor\t%x0, %x0, %x0";
7306 switch (get_attr_mode (insn))
7311 return "vpcmpeqd\t%0, %0, %0";
7317 return "pcmpeqd\t%0, %0";
7322 /* Returns 1 if OP contains a symbol reference */
7325 symbolic_reference_mentioned_p (rtx op)
7330 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7333 fmt = GET_RTX_FORMAT (GET_CODE (op));
7334 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7340 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7341 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7345 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7352 /* Return 1 if it is appropriate to emit `ret' instructions in the
7353 body of a function. Do this only if the epilogue is simple, needing a
7354 couple of insns. Prior to reloading, we can't tell how many registers
7355 must be saved, so return 0 then. Return 0 if there is no frame
7356 marker to de-allocate. */
7359 ix86_can_use_return_insn_p (void)
7361 struct ix86_frame frame;
7363 if (! reload_completed || frame_pointer_needed)
7366 /* Don't allow more than 32 pop, since that's all we can do
7367 with one instruction. */
7368 if (crtl->args.pops_args
7369 && crtl->args.size >= 32768)
7372 ix86_compute_frame_layout (&frame);
7373 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7376 /* Value should be nonzero if functions must have frame pointers.
7377 Zero means the frame pointer need not be set up (and parms may
7378 be accessed via the stack pointer) in functions that seem suitable. */
7381 ix86_frame_pointer_required (void)
7383 /* If we accessed previous frames, then the generated code expects
7384 to be able to access the saved ebp value in our frame. */
7385 if (cfun->machine->accesses_prev_frame)
7388 /* Several x86 os'es need a frame pointer for other reasons,
7389 usually pertaining to setjmp. */
7390 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7393 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7394 the frame pointer by default. Turn it back on now if we've not
7395 got a leaf function. */
7396 if (TARGET_OMIT_LEAF_FRAME_POINTER
7397 && (!current_function_is_leaf
7398 || ix86_current_function_calls_tls_descriptor))
7407 /* Record that the current function accesses previous call frames. */
7410 ix86_setup_frame_addresses (void)
7412 cfun->machine->accesses_prev_frame = 1;
7415 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7416 # define USE_HIDDEN_LINKONCE 1
7418 # define USE_HIDDEN_LINKONCE 0
7421 static int pic_labels_used;
7423 /* Fills in the label name that should be used for a pc thunk for
7424 the given register. */
7427 get_pc_thunk_name (char name[32], unsigned int regno)
7429 gcc_assert (!TARGET_64BIT);
7431 if (USE_HIDDEN_LINKONCE)
7432 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7434 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7438 /* This function generates code for -fpic that loads %ebx with
7439 the return address of the caller and then returns. */
7442 ix86_file_end (void)
7447 for (regno = 0; regno < 8; ++regno)
7451 if (! ((pic_labels_used >> regno) & 1))
7454 get_pc_thunk_name (name, regno);
7459 switch_to_section (darwin_sections[text_coal_section]);
7460 fputs ("\t.weak_definition\t", asm_out_file);
7461 assemble_name (asm_out_file, name);
7462 fputs ("\n\t.private_extern\t", asm_out_file);
7463 assemble_name (asm_out_file, name);
7464 fputs ("\n", asm_out_file);
7465 ASM_OUTPUT_LABEL (asm_out_file, name);
7469 if (USE_HIDDEN_LINKONCE)
7473 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7475 TREE_PUBLIC (decl) = 1;
7476 TREE_STATIC (decl) = 1;
7477 DECL_ONE_ONLY (decl) = 1;
7479 (*targetm.asm_out.unique_section) (decl, 0);
7480 switch_to_section (get_named_section (decl, NULL, 0));
7482 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7483 fputs ("\t.hidden\t", asm_out_file);
7484 assemble_name (asm_out_file, name);
7485 fputc ('\n', asm_out_file);
7486 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7490 switch_to_section (text_section);
7491 ASM_OUTPUT_LABEL (asm_out_file, name);
7494 xops[0] = gen_rtx_REG (Pmode, regno);
7495 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7496 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7497 output_asm_insn ("ret", xops);
7500 if (NEED_INDICATE_EXEC_STACK)
7501 file_end_indicate_exec_stack ();
7504 /* Emit code for the SET_GOT patterns. */
7507 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7513 if (TARGET_VXWORKS_RTP && flag_pic)
7515 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7516 xops[2] = gen_rtx_MEM (Pmode,
7517 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7518 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7520 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7521 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7522 an unadorned address. */
7523 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7524 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7525 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7529 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7531 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7533 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7536 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7538 output_asm_insn ("call\t%a2", xops);
7541 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7542 is what will be referenced by the Mach-O PIC subsystem. */
7544 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7547 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7548 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7551 output_asm_insn ("pop%z0\t%0", xops);
7556 get_pc_thunk_name (name, REGNO (dest));
7557 pic_labels_used |= 1 << REGNO (dest);
7559 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7560 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7561 output_asm_insn ("call\t%X2", xops);
7562 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7563 is what will be referenced by the Mach-O PIC subsystem. */
7566 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7568 targetm.asm_out.internal_label (asm_out_file, "L",
7569 CODE_LABEL_NUMBER (label));
7576 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7577 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7579 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7584 /* Generate an "push" pattern for input ARG. */
7589 return gen_rtx_SET (VOIDmode,
7591 gen_rtx_PRE_DEC (Pmode,
7592 stack_pointer_rtx)),
7596 /* Return >= 0 if there is an unused call-clobbered register available
7597 for the entire function. */
7600 ix86_select_alt_pic_regnum (void)
7602 if (current_function_is_leaf && !crtl->profile
7603 && !ix86_current_function_calls_tls_descriptor)
7606 /* Can't use the same register for both PIC and DRAP. */
7608 drap = REGNO (crtl->drap_reg);
7611 for (i = 2; i >= 0; --i)
7612 if (i != drap && !df_regs_ever_live_p (i))
7616 return INVALID_REGNUM;
7619 /* Return 1 if we need to save REGNO. */
7621 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7623 if (pic_offset_table_rtx
7624 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7625 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7627 || crtl->calls_eh_return
7628 || crtl->uses_const_pool))
7630 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7635 if (crtl->calls_eh_return && maybe_eh_return)
7640 unsigned test = EH_RETURN_DATA_REGNO (i);
7641 if (test == INVALID_REGNUM)
7649 && regno == REGNO (crtl->drap_reg))
7652 return (df_regs_ever_live_p (regno)
7653 && !call_used_regs[regno]
7654 && !fixed_regs[regno]
7655 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7658 /* Return number of saved general prupose registers. */
7661 ix86_nsaved_regs (void)
7666 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7667 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7672 /* Return number of saved SSE registrers. */
7675 ix86_nsaved_sseregs (void)
7680 if (ix86_cfun_abi () != MS_ABI)
7682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7683 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7688 /* Given FROM and TO register numbers, say whether this elimination is
7689 allowed. If stack alignment is needed, we can only replace argument
7690 pointer with hard frame pointer, or replace frame pointer with stack
7691 pointer. Otherwise, frame pointer elimination is automatically
7692 handled and all other eliminations are valid. */
7695 ix86_can_eliminate (int from, int to)
7697 if (stack_realign_fp)
7698 return ((from == ARG_POINTER_REGNUM
7699 && to == HARD_FRAME_POINTER_REGNUM)
7700 || (from == FRAME_POINTER_REGNUM
7701 && to == STACK_POINTER_REGNUM));
7703 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7706 /* Return the offset between two registers, one to be eliminated, and the other
7707 its replacement, at the start of a routine. */
7710 ix86_initial_elimination_offset (int from, int to)
7712 struct ix86_frame frame;
7713 ix86_compute_frame_layout (&frame);
7715 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7716 return frame.hard_frame_pointer_offset;
7717 else if (from == FRAME_POINTER_REGNUM
7718 && to == HARD_FRAME_POINTER_REGNUM)
7719 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7722 gcc_assert (to == STACK_POINTER_REGNUM);
7724 if (from == ARG_POINTER_REGNUM)
7725 return frame.stack_pointer_offset;
7727 gcc_assert (from == FRAME_POINTER_REGNUM);
7728 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7732 /* In a dynamically-aligned function, we can't know the offset from
7733 stack pointer to frame pointer, so we must ensure that setjmp
7734 eliminates fp against the hard fp (%ebp) rather than trying to
7735 index from %esp up to the top of the frame across a gap that is
7736 of unknown (at compile-time) size. */
7738 ix86_builtin_setjmp_frame_value (void)
7740 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7743 /* Fill structure ix86_frame about frame of currently computed function. */
7746 ix86_compute_frame_layout (struct ix86_frame *frame)
7748 HOST_WIDE_INT total_size;
7749 unsigned int stack_alignment_needed;
7750 HOST_WIDE_INT offset;
7751 unsigned int preferred_alignment;
7752 HOST_WIDE_INT size = get_frame_size ();
7754 frame->nregs = ix86_nsaved_regs ();
7755 frame->nsseregs = ix86_nsaved_sseregs ();
7758 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7759 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7761 /* MS ABI seem to require stack alignment to be always 16 except for function
7763 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7765 preferred_alignment = 16;
7766 stack_alignment_needed = 16;
7767 crtl->preferred_stack_boundary = 128;
7768 crtl->stack_alignment_needed = 128;
7771 gcc_assert (!size || stack_alignment_needed);
7772 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7773 gcc_assert (preferred_alignment <= stack_alignment_needed);
7775 /* During reload iteration the amount of registers saved can change.
7776 Recompute the value as needed. Do not recompute when amount of registers
7777 didn't change as reload does multiple calls to the function and does not
7778 expect the decision to change within single iteration. */
7779 if (!optimize_function_for_size_p (cfun)
7780 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7782 int count = frame->nregs;
7784 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7785 /* The fast prologue uses move instead of push to save registers. This
7786 is significantly longer, but also executes faster as modern hardware
7787 can execute the moves in parallel, but can't do that for push/pop.
7789 Be careful about choosing what prologue to emit: When function takes
7790 many instructions to execute we may use slow version as well as in
7791 case function is known to be outside hot spot (this is known with
7792 feedback only). Weight the size of function by number of registers
7793 to save as it is cheap to use one or two push instructions but very
7794 slow to use many of them. */
7796 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7797 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7798 || (flag_branch_probabilities
7799 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7800 cfun->machine->use_fast_prologue_epilogue = false;
7802 cfun->machine->use_fast_prologue_epilogue
7803 = !expensive_function_p (count);
7805 if (TARGET_PROLOGUE_USING_MOVE
7806 && cfun->machine->use_fast_prologue_epilogue)
7807 frame->save_regs_using_mov = true;
7809 frame->save_regs_using_mov = false;
7812 /* Skip return address and saved base pointer. */
7813 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7815 frame->hard_frame_pointer_offset = offset;
7817 /* Set offset to aligned because the realigned frame starts from
7819 if (stack_realign_fp)
7820 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7822 /* Register save area */
7823 offset += frame->nregs * UNITS_PER_WORD;
7825 /* Align SSE reg save area. */
7826 if (frame->nsseregs)
7827 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7829 frame->padding0 = 0;
7831 /* SSE register save area. */
7832 offset += frame->padding0 + frame->nsseregs * 16;
7835 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7836 offset += frame->va_arg_size;
7838 /* Align start of frame for local function. */
7839 frame->padding1 = ((offset + stack_alignment_needed - 1)
7840 & -stack_alignment_needed) - offset;
7842 offset += frame->padding1;
7844 /* Frame pointer points here. */
7845 frame->frame_pointer_offset = offset;
7849 /* Add outgoing arguments area. Can be skipped if we eliminated
7850 all the function calls as dead code.
7851 Skipping is however impossible when function calls alloca. Alloca
7852 expander assumes that last crtl->outgoing_args_size
7853 of stack frame are unused. */
7854 if (ACCUMULATE_OUTGOING_ARGS
7855 && (!current_function_is_leaf || cfun->calls_alloca
7856 || ix86_current_function_calls_tls_descriptor))
7858 offset += crtl->outgoing_args_size;
7859 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7862 frame->outgoing_arguments_size = 0;
7864 /* Align stack boundary. Only needed if we're calling another function
7866 if (!current_function_is_leaf || cfun->calls_alloca
7867 || ix86_current_function_calls_tls_descriptor)
7868 frame->padding2 = ((offset + preferred_alignment - 1)
7869 & -preferred_alignment) - offset;
7871 frame->padding2 = 0;
7873 offset += frame->padding2;
7875 /* We've reached end of stack frame. */
7876 frame->stack_pointer_offset = offset;
7878 /* Size prologue needs to allocate. */
7879 frame->to_allocate =
7880 (size + frame->padding1 + frame->padding2
7881 + frame->outgoing_arguments_size + frame->va_arg_size);
7883 if ((!frame->to_allocate && frame->nregs <= 1)
7884 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7885 frame->save_regs_using_mov = false;
7887 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7888 && current_function_is_leaf
7889 && !ix86_current_function_calls_tls_descriptor)
7891 frame->red_zone_size = frame->to_allocate;
7892 if (frame->save_regs_using_mov)
7893 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7894 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7895 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7898 frame->red_zone_size = 0;
7899 frame->to_allocate -= frame->red_zone_size;
7900 frame->stack_pointer_offset -= frame->red_zone_size;
7902 fprintf (stderr, "\n");
7903 fprintf (stderr, "size: %ld\n", (long)size);
7904 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7905 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7906 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7907 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7908 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7909 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7910 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7911 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7912 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7913 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7914 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7915 (long)frame->hard_frame_pointer_offset);
7916 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7917 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7918 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7919 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7923 /* Emit code to save registers in the prologue. */
7926 ix86_emit_save_regs (void)
7931 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7932 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7934 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7935 RTX_FRAME_RELATED_P (insn) = 1;
7939 /* Emit code to save registers using MOV insns. First register
7940 is restored from POINTER + OFFSET. */
7942 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7948 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7950 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7952 gen_rtx_REG (Pmode, regno));
7953 RTX_FRAME_RELATED_P (insn) = 1;
7954 offset += UNITS_PER_WORD;
7958 /* Emit code to save registers using MOV insns. First register
7959 is restored from POINTER + OFFSET. */
7961 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7970 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7971 set_mem_align (mem, 128);
7972 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7973 RTX_FRAME_RELATED_P (insn) = 1;
7978 /* Expand prologue or epilogue stack adjustment.
7979 The pattern exist to put a dependency on all ebp-based memory accesses.
7980 STYLE should be negative if instructions should be marked as frame related,
7981 zero if %r11 register is live and cannot be freely used and positive
7985 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7990 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7991 else if (x86_64_immediate_operand (offset, DImode))
7992 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7996 /* r11 is used by indirect sibcall return as well, set before the
7997 epilogue and used after the epilogue. ATM indirect sibcall
7998 shouldn't be used together with huge frame sizes in one
7999 function because of the frame_size check in sibcall.c. */
8001 r11 = gen_rtx_REG (DImode, R11_REG);
8002 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8004 RTX_FRAME_RELATED_P (insn) = 1;
8005 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8009 RTX_FRAME_RELATED_P (insn) = 1;
8012 /* Find an available register to be used as dynamic realign argument
8013 pointer regsiter. Such a register will be written in prologue and
8014 used in begin of body, so it must not be
8015 1. parameter passing register.
8017 We reuse static-chain register if it is available. Otherwise, we
8018 use DI for i386 and R13 for x86-64. We chose R13 since it has
8021 Return: the regno of chosen register. */
8024 find_drap_reg (void)
8026 tree decl = cfun->decl;
8030 /* Use R13 for nested function or function need static chain.
8031 Since function with tail call may use any caller-saved
8032 registers in epilogue, DRAP must not use caller-saved
8033 register in such case. */
8034 if ((decl_function_context (decl)
8035 && !DECL_NO_STATIC_CHAIN (decl))
8036 || crtl->tail_call_emit)
8043 /* Use DI for nested function or function need static chain.
8044 Since function with tail call may use any caller-saved
8045 registers in epilogue, DRAP must not use caller-saved
8046 register in such case. */
8047 if ((decl_function_context (decl)
8048 && !DECL_NO_STATIC_CHAIN (decl))
8049 || crtl->tail_call_emit)
8052 /* Reuse static chain register if it isn't used for parameter
8054 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8055 && !lookup_attribute ("fastcall",
8056 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8063 /* Update incoming stack boundary and estimated stack alignment. */
8066 ix86_update_stack_boundary (void)
8068 /* Prefer the one specified at command line. */
8069 ix86_incoming_stack_boundary
8070 = (ix86_user_incoming_stack_boundary
8071 ? ix86_user_incoming_stack_boundary
8072 : ix86_default_incoming_stack_boundary);
8074 /* Incoming stack alignment can be changed on individual functions
8075 via force_align_arg_pointer attribute. We use the smallest
8076 incoming stack boundary. */
8077 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8078 && lookup_attribute (ix86_force_align_arg_pointer_string,
8079 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8080 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8082 /* The incoming stack frame has to be aligned at least at
8083 parm_stack_boundary. */
8084 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8085 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8087 /* Stack at entrance of main is aligned by runtime. We use the
8088 smallest incoming stack boundary. */
8089 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8090 && DECL_NAME (current_function_decl)
8091 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8092 && DECL_FILE_SCOPE_P (current_function_decl))
8093 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8095 /* x86_64 vararg needs 16byte stack alignment for register save
8099 && crtl->stack_alignment_estimated < 128)
8100 crtl->stack_alignment_estimated = 128;
8103 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8104 needed or an rtx for DRAP otherwise. */
8107 ix86_get_drap_rtx (void)
8109 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8110 crtl->need_drap = true;
8112 if (stack_realign_drap)
8114 /* Assign DRAP to vDRAP and returns vDRAP */
8115 unsigned int regno = find_drap_reg ();
8120 arg_ptr = gen_rtx_REG (Pmode, regno);
8121 crtl->drap_reg = arg_ptr;
8124 drap_vreg = copy_to_reg (arg_ptr);
8128 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8129 RTX_FRAME_RELATED_P (insn) = 1;
8136 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8139 ix86_internal_arg_pointer (void)
8141 return virtual_incoming_args_rtx;
8144 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8145 This is called from dwarf2out.c to emit call frame instructions
8146 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8148 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8150 rtx unspec = SET_SRC (pattern);
8151 gcc_assert (GET_CODE (unspec) == UNSPEC);
8155 case UNSPEC_REG_SAVE:
8156 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8157 SET_DEST (pattern));
8159 case UNSPEC_DEF_CFA:
8160 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8161 INTVAL (XVECEXP (unspec, 0, 0)));
8168 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8169 to be generated in correct form. */
8171 ix86_finalize_stack_realign_flags (void)
8173 /* Check if stack realign is really needed after reload, and
8174 stores result in cfun */
8175 unsigned int incoming_stack_boundary
8176 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8177 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8178 unsigned int stack_realign = (incoming_stack_boundary
8179 < (current_function_is_leaf
8180 ? crtl->max_used_stack_slot_alignment
8181 : crtl->stack_alignment_needed));
8183 if (crtl->stack_realign_finalized)
8185 /* After stack_realign_needed is finalized, we can't no longer
8187 gcc_assert (crtl->stack_realign_needed == stack_realign);
8191 crtl->stack_realign_needed = stack_realign;
8192 crtl->stack_realign_finalized = true;
8196 /* Expand the prologue into a bunch of separate insns. */
8199 ix86_expand_prologue (void)
8203 struct ix86_frame frame;
8204 HOST_WIDE_INT allocate;
8206 ix86_finalize_stack_realign_flags ();
8208 /* DRAP should not coexist with stack_realign_fp */
8209 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8211 ix86_compute_frame_layout (&frame);
8213 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8214 of DRAP is needed and stack realignment is really needed after reload */
8215 if (crtl->drap_reg && crtl->stack_realign_needed)
8218 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8219 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8220 ? 0 : UNITS_PER_WORD);
8222 gcc_assert (stack_realign_drap);
8224 /* Grab the argument pointer. */
8225 x = plus_constant (stack_pointer_rtx,
8226 (UNITS_PER_WORD + param_ptr_offset));
8229 /* Only need to push parameter pointer reg if it is caller
8231 if (!call_used_regs[REGNO (crtl->drap_reg)])
8233 /* Push arg pointer reg */
8234 insn = emit_insn (gen_push (y));
8235 RTX_FRAME_RELATED_P (insn) = 1;
8238 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8239 RTX_FRAME_RELATED_P (insn) = 1;
8241 /* Align the stack. */
8242 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8244 GEN_INT (-align_bytes)));
8245 RTX_FRAME_RELATED_P (insn) = 1;
8247 /* Replicate the return address on the stack so that return
8248 address can be reached via (argp - 1) slot. This is needed
8249 to implement macro RETURN_ADDR_RTX and intrinsic function
8250 expand_builtin_return_addr etc. */
8252 x = gen_frame_mem (Pmode,
8253 plus_constant (x, -UNITS_PER_WORD));
8254 insn = emit_insn (gen_push (x));
8255 RTX_FRAME_RELATED_P (insn) = 1;
8258 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8259 slower on all targets. Also sdb doesn't like it. */
8261 if (frame_pointer_needed)
8263 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8264 RTX_FRAME_RELATED_P (insn) = 1;
8266 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8267 RTX_FRAME_RELATED_P (insn) = 1;
8270 if (stack_realign_fp)
8272 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8273 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8275 /* Align the stack. */
8276 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8278 GEN_INT (-align_bytes)));
8279 RTX_FRAME_RELATED_P (insn) = 1;
8282 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8284 if (!frame.save_regs_using_mov)
8285 ix86_emit_save_regs ();
8287 allocate += frame.nregs * UNITS_PER_WORD;
8289 /* When using red zone we may start register saving before allocating
8290 the stack frame saving one cycle of the prologue. However I will
8291 avoid doing this if I am going to have to probe the stack since
8292 at least on x86_64 the stack probe can turn into a call that clobbers
8293 a red zone location */
8294 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8295 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8296 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8297 && !crtl->stack_realign_needed)
8298 ? hard_frame_pointer_rtx
8299 : stack_pointer_rtx,
8300 -frame.nregs * UNITS_PER_WORD);
8304 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8305 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8306 GEN_INT (-allocate), -1);
8309 /* Only valid for Win32. */
8310 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8314 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8316 if (cfun->machine->call_abi == MS_ABI)
8319 eax_live = ix86_eax_live_at_start_p ();
8323 emit_insn (gen_push (eax));
8324 allocate -= UNITS_PER_WORD;
8327 emit_move_insn (eax, GEN_INT (allocate));
8330 insn = gen_allocate_stack_worker_64 (eax, eax);
8332 insn = gen_allocate_stack_worker_32 (eax, eax);
8333 insn = emit_insn (insn);
8334 RTX_FRAME_RELATED_P (insn) = 1;
8335 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8336 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8337 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8341 if (frame_pointer_needed)
8342 t = plus_constant (hard_frame_pointer_rtx,
8345 - frame.nregs * UNITS_PER_WORD);
8347 t = plus_constant (stack_pointer_rtx, allocate);
8348 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8352 if (frame.save_regs_using_mov
8353 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8354 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8356 if (!frame_pointer_needed
8357 || !frame.to_allocate
8358 || crtl->stack_realign_needed)
8359 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8361 + frame.nsseregs * 16 + frame.padding0);
8363 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8364 -frame.nregs * UNITS_PER_WORD);
8366 if (!frame_pointer_needed
8367 || !frame.to_allocate
8368 || crtl->stack_realign_needed)
8369 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8372 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8373 - frame.nregs * UNITS_PER_WORD
8374 - frame.nsseregs * 16
8377 pic_reg_used = false;
8378 if (pic_offset_table_rtx
8379 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8382 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8384 if (alt_pic_reg_used != INVALID_REGNUM)
8385 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8387 pic_reg_used = true;
8394 if (ix86_cmodel == CM_LARGE_PIC)
8396 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8397 rtx label = gen_label_rtx ();
8399 LABEL_PRESERVE_P (label) = 1;
8400 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8401 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8402 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8403 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8404 pic_offset_table_rtx, tmp_reg));
8407 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8410 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8413 /* In the pic_reg_used case, make sure that the got load isn't deleted
8414 when mcount needs it. Blockage to avoid call movement across mcount
8415 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8417 if (crtl->profile && pic_reg_used)
8418 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8420 if (crtl->drap_reg && !crtl->stack_realign_needed)
8422 /* vDRAP is setup but after reload it turns out stack realign
8423 isn't necessary, here we will emit prologue to setup DRAP
8424 without stack realign adjustment */
8425 int drap_bp_offset = UNITS_PER_WORD * 2;
8426 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8427 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8430 /* Prevent instructions from being scheduled into register save push
8431 sequence when access to the redzone area is done through frame pointer.
8432 The offset betweeh the frame pointer and the stack pointer is calculated
8433 relative to the value of the stack pointer at the end of the function
8434 prologue, and moving instructions that access redzone area via frame
8435 pointer inside push sequence violates this assumption. */
8436 if (frame_pointer_needed && frame.red_zone_size)
8437 emit_insn (gen_memory_blockage ());
8439 /* Emit cld instruction if stringops are used in the function. */
8440 if (TARGET_CLD && ix86_current_function_needs_cld)
8441 emit_insn (gen_cld ());
8444 /* Emit code to restore saved registers using MOV insns. First register
8445 is restored from POINTER + OFFSET. */
8447 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8448 int maybe_eh_return)
8451 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8453 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8454 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8456 /* Ensure that adjust_address won't be forced to produce pointer
8457 out of range allowed by x86-64 instruction set. */
8458 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8462 r11 = gen_rtx_REG (DImode, R11_REG);
8463 emit_move_insn (r11, GEN_INT (offset));
8464 emit_insn (gen_adddi3 (r11, r11, pointer));
8465 base_address = gen_rtx_MEM (Pmode, r11);
8468 emit_move_insn (gen_rtx_REG (Pmode, regno),
8469 adjust_address (base_address, Pmode, offset));
8470 offset += UNITS_PER_WORD;
8474 /* Emit code to restore saved registers using MOV insns. First register
8475 is restored from POINTER + OFFSET. */
8477 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8478 int maybe_eh_return)
8481 rtx base_address = gen_rtx_MEM (TImode, pointer);
8484 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8485 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8487 /* Ensure that adjust_address won't be forced to produce pointer
8488 out of range allowed by x86-64 instruction set. */
8489 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8493 r11 = gen_rtx_REG (DImode, R11_REG);
8494 emit_move_insn (r11, GEN_INT (offset));
8495 emit_insn (gen_adddi3 (r11, r11, pointer));
8496 base_address = gen_rtx_MEM (TImode, r11);
8499 mem = adjust_address (base_address, TImode, offset);
8500 set_mem_align (mem, 128);
8501 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8506 /* Restore function stack, frame, and registers. */
8509 ix86_expand_epilogue (int style)
8513 struct ix86_frame frame;
8514 HOST_WIDE_INT offset;
8516 ix86_finalize_stack_realign_flags ();
8518 /* When stack is realigned, SP must be valid. */
8519 sp_valid = (!frame_pointer_needed
8520 || current_function_sp_is_unchanging
8521 || stack_realign_fp);
8523 ix86_compute_frame_layout (&frame);
8525 /* See the comment about red zone and frame
8526 pointer usage in ix86_expand_prologue. */
8527 if (frame_pointer_needed && frame.red_zone_size)
8528 emit_insn (gen_memory_blockage ());
8530 /* Calculate start of saved registers relative to ebp. Special care
8531 must be taken for the normal return case of a function using
8532 eh_return: the eax and edx registers are marked as saved, but not
8533 restored along this path. */
8534 offset = frame.nregs;
8535 if (crtl->calls_eh_return && style != 2)
8537 offset *= -UNITS_PER_WORD;
8538 offset -= frame.nsseregs * 16 + frame.padding0;
8540 /* If we're only restoring one register and sp is not valid then
8541 using a move instruction to restore the register since it's
8542 less work than reloading sp and popping the register.
8544 The default code result in stack adjustment using add/lea instruction,
8545 while this code results in LEAVE instruction (or discrete equivalent),
8546 so it is profitable in some other cases as well. Especially when there
8547 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8548 and there is exactly one register to pop. This heuristic may need some
8549 tuning in future. */
8550 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8551 || (TARGET_EPILOGUE_USING_MOVE
8552 && cfun->machine->use_fast_prologue_epilogue
8553 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8554 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8555 || (frame_pointer_needed && TARGET_USE_LEAVE
8556 && cfun->machine->use_fast_prologue_epilogue
8557 && (frame.nregs + frame.nsseregs) == 1)
8558 || crtl->calls_eh_return)
8560 /* Restore registers. We can use ebp or esp to address the memory
8561 locations. If both are available, default to ebp, since offsets
8562 are known to be small. Only exception is esp pointing directly
8563 to the end of block of saved registers, where we may simplify
8566 If we are realigning stack with bp and sp, regs restore can't
8567 be addressed by bp. sp must be used instead. */
8569 if (!frame_pointer_needed
8570 || (sp_valid && !frame.to_allocate)
8571 || stack_realign_fp)
8573 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8574 frame.to_allocate, style == 2);
8575 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8577 + frame.nsseregs * 16
8578 + frame.padding0, style == 2);
8582 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8583 offset, style == 2);
8584 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8586 + frame.nsseregs * 16
8587 + frame.padding0, style == 2);
8590 /* eh_return epilogues need %ecx added to the stack pointer. */
8593 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8595 /* Stack align doesn't work with eh_return. */
8596 gcc_assert (!crtl->stack_realign_needed);
8598 if (frame_pointer_needed)
8600 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8601 tmp = plus_constant (tmp, UNITS_PER_WORD);
8602 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8604 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8605 emit_move_insn (hard_frame_pointer_rtx, tmp);
8607 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8612 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8613 tmp = plus_constant (tmp, (frame.to_allocate
8614 + frame.nregs * UNITS_PER_WORD
8615 + frame.nsseregs * 16
8617 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8620 else if (!frame_pointer_needed)
8621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8622 GEN_INT (frame.to_allocate
8623 + frame.nregs * UNITS_PER_WORD
8624 + frame.nsseregs * 16
8627 /* If not an i386, mov & pop is faster than "leave". */
8628 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8629 || !cfun->machine->use_fast_prologue_epilogue)
8630 emit_insn ((*ix86_gen_leave) ());
8633 pro_epilogue_adjust_stack (stack_pointer_rtx,
8634 hard_frame_pointer_rtx,
8637 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8642 /* First step is to deallocate the stack frame so that we can
8645 If we realign stack with frame pointer, then stack pointer
8646 won't be able to recover via lea $offset(%bp), %sp, because
8647 there is a padding area between bp and sp for realign.
8648 "add $to_allocate, %sp" must be used instead. */
8651 gcc_assert (frame_pointer_needed);
8652 gcc_assert (!stack_realign_fp);
8653 pro_epilogue_adjust_stack (stack_pointer_rtx,
8654 hard_frame_pointer_rtx,
8655 GEN_INT (offset), style);
8656 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8657 frame.to_allocate, style == 2);
8658 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8659 GEN_INT (frame.nsseregs * 16), style);
8661 else if (frame.to_allocate || frame.nsseregs)
8663 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8666 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8667 GEN_INT (frame.to_allocate
8668 + frame.nsseregs * 16
8669 + frame.padding0), style);
8672 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8673 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8674 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8675 if (frame_pointer_needed)
8677 /* Leave results in shorter dependency chains on CPUs that are
8678 able to grok it fast. */
8679 if (TARGET_USE_LEAVE)
8680 emit_insn ((*ix86_gen_leave) ());
8683 /* For stack realigned really happens, recover stack
8684 pointer to hard frame pointer is a must, if not using
8686 if (stack_realign_fp)
8687 pro_epilogue_adjust_stack (stack_pointer_rtx,
8688 hard_frame_pointer_rtx,
8690 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8695 if (crtl->drap_reg && crtl->stack_realign_needed)
8697 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8698 ? 0 : UNITS_PER_WORD);
8699 gcc_assert (stack_realign_drap);
8700 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8702 GEN_INT (-(UNITS_PER_WORD
8703 + param_ptr_offset))));
8704 if (!call_used_regs[REGNO (crtl->drap_reg)])
8705 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8709 /* Sibcall epilogues don't want a return instruction. */
8713 if (crtl->args.pops_args && crtl->args.size)
8715 rtx popc = GEN_INT (crtl->args.pops_args);
8717 /* i386 can only pop 64K bytes. If asked to pop more, pop
8718 return address, do explicit add, and jump indirectly to the
8721 if (crtl->args.pops_args >= 65536)
8723 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8725 /* There is no "pascal" calling convention in any 64bit ABI. */
8726 gcc_assert (!TARGET_64BIT);
8728 emit_insn (gen_popsi1 (ecx));
8729 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8730 emit_jump_insn (gen_return_indirect_internal (ecx));
8733 emit_jump_insn (gen_return_pop_internal (popc));
8736 emit_jump_insn (gen_return_internal ());
8739 /* Reset from the function's potential modifications. */
8742 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8743 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8745 if (pic_offset_table_rtx)
8746 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8748 /* Mach-O doesn't support labels at the end of objects, so if
8749 it looks like we might want one, insert a NOP. */
8751 rtx insn = get_last_insn ();
8754 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8755 insn = PREV_INSN (insn);
8759 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8760 fputs ("\tnop\n", file);
8766 /* Extract the parts of an RTL expression that is a valid memory address
8767 for an instruction. Return 0 if the structure of the address is
8768 grossly off. Return -1 if the address contains ASHIFT, so it is not
8769 strictly valid, but still used for computing length of lea instruction. */
8772 ix86_decompose_address (rtx addr, struct ix86_address *out)
8774 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8775 rtx base_reg, index_reg;
8776 HOST_WIDE_INT scale = 1;
8777 rtx scale_rtx = NULL_RTX;
8779 enum ix86_address_seg seg = SEG_DEFAULT;
8781 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8783 else if (GET_CODE (addr) == PLUS)
8793 addends[n++] = XEXP (op, 1);
8796 while (GET_CODE (op) == PLUS);
8801 for (i = n; i >= 0; --i)
8804 switch (GET_CODE (op))
8809 index = XEXP (op, 0);
8810 scale_rtx = XEXP (op, 1);
8814 if (XINT (op, 1) == UNSPEC_TP
8815 && TARGET_TLS_DIRECT_SEG_REFS
8816 && seg == SEG_DEFAULT)
8817 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8846 else if (GET_CODE (addr) == MULT)
8848 index = XEXP (addr, 0); /* index*scale */
8849 scale_rtx = XEXP (addr, 1);
8851 else if (GET_CODE (addr) == ASHIFT)
8855 /* We're called for lea too, which implements ashift on occasion. */
8856 index = XEXP (addr, 0);
8857 tmp = XEXP (addr, 1);
8858 if (!CONST_INT_P (tmp))
8860 scale = INTVAL (tmp);
8861 if ((unsigned HOST_WIDE_INT) scale > 3)
8867 disp = addr; /* displacement */
8869 /* Extract the integral value of scale. */
8872 if (!CONST_INT_P (scale_rtx))
8874 scale = INTVAL (scale_rtx);
8877 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8878 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8880 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8881 if (base_reg && index_reg && scale == 1
8882 && (index_reg == arg_pointer_rtx
8883 || index_reg == frame_pointer_rtx
8884 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8887 tmp = base, base = index, index = tmp;
8888 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8891 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8892 if ((base_reg == hard_frame_pointer_rtx
8893 || base_reg == frame_pointer_rtx
8894 || base_reg == arg_pointer_rtx) && !disp)
8897 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8898 Avoid this by transforming to [%esi+0].
8899 Reload calls address legitimization without cfun defined, so we need
8900 to test cfun for being non-NULL. */
8901 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8902 && base_reg && !index_reg && !disp
8904 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8907 /* Special case: encode reg+reg instead of reg*2. */
8908 if (!base && index && scale && scale == 2)
8909 base = index, base_reg = index_reg, scale = 1;
8911 /* Special case: scaling cannot be encoded without base or displacement. */
8912 if (!base && !disp && index && scale != 1)
8924 /* Return cost of the memory address x.
8925 For i386, it is better to use a complex address than let gcc copy
8926 the address into a reg and make a new pseudo. But not if the address
8927 requires to two regs - that would mean more pseudos with longer
8930 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8932 struct ix86_address parts;
8934 int ok = ix86_decompose_address (x, &parts);
8938 if (parts.base && GET_CODE (parts.base) == SUBREG)
8939 parts.base = SUBREG_REG (parts.base);
8940 if (parts.index && GET_CODE (parts.index) == SUBREG)
8941 parts.index = SUBREG_REG (parts.index);
8943 /* Attempt to minimize number of registers in the address. */
8945 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8947 && (!REG_P (parts.index)
8948 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8952 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8954 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8955 && parts.base != parts.index)
8958 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8959 since it's predecode logic can't detect the length of instructions
8960 and it degenerates to vector decoded. Increase cost of such
8961 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8962 to split such addresses or even refuse such addresses at all.
8964 Following addressing modes are affected:
8969 The first and last case may be avoidable by explicitly coding the zero in
8970 memory address, but I don't have AMD-K6 machine handy to check this
8974 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8975 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8976 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8983 this is used for to form addresses to local data when -fPIC is in
8987 darwin_local_data_pic (rtx disp)
8989 return (GET_CODE (disp) == UNSPEC
8990 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8993 /* Determine if a given RTX is a valid constant. We already know this
8994 satisfies CONSTANT_P. */
8997 legitimate_constant_p (rtx x)
8999 switch (GET_CODE (x))
9004 if (GET_CODE (x) == PLUS)
9006 if (!CONST_INT_P (XEXP (x, 1)))
9011 if (TARGET_MACHO && darwin_local_data_pic (x))
9014 /* Only some unspecs are valid as "constants". */
9015 if (GET_CODE (x) == UNSPEC)
9016 switch (XINT (x, 1))
9021 return TARGET_64BIT;
9024 x = XVECEXP (x, 0, 0);
9025 return (GET_CODE (x) == SYMBOL_REF
9026 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9028 x = XVECEXP (x, 0, 0);
9029 return (GET_CODE (x) == SYMBOL_REF
9030 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9035 /* We must have drilled down to a symbol. */
9036 if (GET_CODE (x) == LABEL_REF)
9038 if (GET_CODE (x) != SYMBOL_REF)
9043 /* TLS symbols are never valid. */
9044 if (SYMBOL_REF_TLS_MODEL (x))
9047 /* DLLIMPORT symbols are never valid. */
9048 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9049 && SYMBOL_REF_DLLIMPORT_P (x))
9054 if (GET_MODE (x) == TImode
9055 && x != CONST0_RTX (TImode)
9061 if (!standard_sse_constant_p (x))
9068 /* Otherwise we handle everything else in the move patterns. */
9072 /* Determine if it's legal to put X into the constant pool. This
9073 is not possible for the address of thread-local symbols, which
9074 is checked above. */
9077 ix86_cannot_force_const_mem (rtx x)
9079 /* We can always put integral constants and vectors in memory. */
9080 switch (GET_CODE (x))
9090 return !legitimate_constant_p (x);
9094 /* Nonzero if the constant value X is a legitimate general operand
9095 when generating PIC code. It is given that flag_pic is on and
9096 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9099 legitimate_pic_operand_p (rtx x)
9103 switch (GET_CODE (x))
9106 inner = XEXP (x, 0);
9107 if (GET_CODE (inner) == PLUS
9108 && CONST_INT_P (XEXP (inner, 1)))
9109 inner = XEXP (inner, 0);
9111 /* Only some unspecs are valid as "constants". */
9112 if (GET_CODE (inner) == UNSPEC)
9113 switch (XINT (inner, 1))
9118 return TARGET_64BIT;
9120 x = XVECEXP (inner, 0, 0);
9121 return (GET_CODE (x) == SYMBOL_REF
9122 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9123 case UNSPEC_MACHOPIC_OFFSET:
9124 return legitimate_pic_address_disp_p (x);
9132 return legitimate_pic_address_disp_p (x);
9139 /* Determine if a given CONST RTX is a valid memory displacement
9143 legitimate_pic_address_disp_p (rtx disp)
9147 /* In 64bit mode we can allow direct addresses of symbols and labels
9148 when they are not dynamic symbols. */
9151 rtx op0 = disp, op1;
9153 switch (GET_CODE (disp))
9159 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9161 op0 = XEXP (XEXP (disp, 0), 0);
9162 op1 = XEXP (XEXP (disp, 0), 1);
9163 if (!CONST_INT_P (op1)
9164 || INTVAL (op1) >= 16*1024*1024
9165 || INTVAL (op1) < -16*1024*1024)
9167 if (GET_CODE (op0) == LABEL_REF)
9169 if (GET_CODE (op0) != SYMBOL_REF)
9174 /* TLS references should always be enclosed in UNSPEC. */
9175 if (SYMBOL_REF_TLS_MODEL (op0))
9177 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9178 && ix86_cmodel != CM_LARGE_PIC)
9186 if (GET_CODE (disp) != CONST)
9188 disp = XEXP (disp, 0);
9192 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9193 of GOT tables. We should not need these anyway. */
9194 if (GET_CODE (disp) != UNSPEC
9195 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9196 && XINT (disp, 1) != UNSPEC_GOTOFF
9197 && XINT (disp, 1) != UNSPEC_PLTOFF))
9200 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9201 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9207 if (GET_CODE (disp) == PLUS)
9209 if (!CONST_INT_P (XEXP (disp, 1)))
9211 disp = XEXP (disp, 0);
9215 if (TARGET_MACHO && darwin_local_data_pic (disp))
9218 if (GET_CODE (disp) != UNSPEC)
9221 switch (XINT (disp, 1))
9226 /* We need to check for both symbols and labels because VxWorks loads
9227 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9229 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9230 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9232 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9233 While ABI specify also 32bit relocation but we don't produce it in
9234 small PIC model at all. */
9235 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9236 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9238 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9240 case UNSPEC_GOTTPOFF:
9241 case UNSPEC_GOTNTPOFF:
9242 case UNSPEC_INDNTPOFF:
9245 disp = XVECEXP (disp, 0, 0);
9246 return (GET_CODE (disp) == SYMBOL_REF
9247 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9249 disp = XVECEXP (disp, 0, 0);
9250 return (GET_CODE (disp) == SYMBOL_REF
9251 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9253 disp = XVECEXP (disp, 0, 0);
9254 return (GET_CODE (disp) == SYMBOL_REF
9255 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9261 /* Recognizes RTL expressions that are valid memory addresses for an
9262 instruction. The MODE argument is the machine mode for the MEM
9263 expression that wants to use this address.
9265 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9266 convert common non-canonical forms to canonical form so that they will
9270 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9271 rtx addr, bool strict)
9273 struct ix86_address parts;
9274 rtx base, index, disp;
9275 HOST_WIDE_INT scale;
9276 const char *reason = NULL;
9277 rtx reason_rtx = NULL_RTX;
9279 if (ix86_decompose_address (addr, &parts) <= 0)
9281 reason = "decomposition failed";
9286 index = parts.index;
9288 scale = parts.scale;
9290 /* Validate base register.
9292 Don't allow SUBREG's that span more than a word here. It can lead to spill
9293 failures when the base is one word out of a two word structure, which is
9294 represented internally as a DImode int. */
9303 else if (GET_CODE (base) == SUBREG
9304 && REG_P (SUBREG_REG (base))
9305 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9307 reg = SUBREG_REG (base);
9310 reason = "base is not a register";
9314 if (GET_MODE (base) != Pmode)
9316 reason = "base is not in Pmode";
9320 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9321 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9323 reason = "base is not valid";
9328 /* Validate index register.
9330 Don't allow SUBREG's that span more than a word here -- same as above. */
9339 else if (GET_CODE (index) == SUBREG
9340 && REG_P (SUBREG_REG (index))
9341 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9343 reg = SUBREG_REG (index);
9346 reason = "index is not a register";
9350 if (GET_MODE (index) != Pmode)
9352 reason = "index is not in Pmode";
9356 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9357 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9359 reason = "index is not valid";
9364 /* Validate scale factor. */
9367 reason_rtx = GEN_INT (scale);
9370 reason = "scale without index";
9374 if (scale != 2 && scale != 4 && scale != 8)
9376 reason = "scale is not a valid multiplier";
9381 /* Validate displacement. */
9386 if (GET_CODE (disp) == CONST
9387 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9388 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9389 switch (XINT (XEXP (disp, 0), 1))
9391 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9392 used. While ABI specify also 32bit relocations, we don't produce
9393 them at all and use IP relative instead. */
9396 gcc_assert (flag_pic);
9398 goto is_legitimate_pic;
9399 reason = "64bit address unspec";
9402 case UNSPEC_GOTPCREL:
9403 gcc_assert (flag_pic);
9404 goto is_legitimate_pic;
9406 case UNSPEC_GOTTPOFF:
9407 case UNSPEC_GOTNTPOFF:
9408 case UNSPEC_INDNTPOFF:
9414 reason = "invalid address unspec";
9418 else if (SYMBOLIC_CONST (disp)
9422 && MACHOPIC_INDIRECT
9423 && !machopic_operand_p (disp)
9429 if (TARGET_64BIT && (index || base))
9431 /* foo@dtpoff(%rX) is ok. */
9432 if (GET_CODE (disp) != CONST
9433 || GET_CODE (XEXP (disp, 0)) != PLUS
9434 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9435 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9436 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9437 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9439 reason = "non-constant pic memory reference";
9443 else if (! legitimate_pic_address_disp_p (disp))
9445 reason = "displacement is an invalid pic construct";
9449 /* This code used to verify that a symbolic pic displacement
9450 includes the pic_offset_table_rtx register.
9452 While this is good idea, unfortunately these constructs may
9453 be created by "adds using lea" optimization for incorrect
9462 This code is nonsensical, but results in addressing
9463 GOT table with pic_offset_table_rtx base. We can't
9464 just refuse it easily, since it gets matched by
9465 "addsi3" pattern, that later gets split to lea in the
9466 case output register differs from input. While this
9467 can be handled by separate addsi pattern for this case
9468 that never results in lea, this seems to be easier and
9469 correct fix for crash to disable this test. */
9471 else if (GET_CODE (disp) != LABEL_REF
9472 && !CONST_INT_P (disp)
9473 && (GET_CODE (disp) != CONST
9474 || !legitimate_constant_p (disp))
9475 && (GET_CODE (disp) != SYMBOL_REF
9476 || !legitimate_constant_p (disp)))
9478 reason = "displacement is not constant";
9481 else if (TARGET_64BIT
9482 && !x86_64_immediate_operand (disp, VOIDmode))
9484 reason = "displacement is out of range";
9489 /* Everything looks valid. */
9496 /* Determine if a given RTX is a valid constant address. */
9499 constant_address_p (rtx x)
9501 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9504 /* Return a unique alias set for the GOT. */
9506 static alias_set_type
9507 ix86_GOT_alias_set (void)
9509 static alias_set_type set = -1;
9511 set = new_alias_set ();
9515 /* Return a legitimate reference for ORIG (an address) using the
9516 register REG. If REG is 0, a new pseudo is generated.
9518 There are two types of references that must be handled:
9520 1. Global data references must load the address from the GOT, via
9521 the PIC reg. An insn is emitted to do this load, and the reg is
9524 2. Static data references, constant pool addresses, and code labels
9525 compute the address as an offset from the GOT, whose base is in
9526 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9527 differentiate them from global data objects. The returned
9528 address is the PIC reg + an unspec constant.
9530 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9531 reg also appears in the address. */
9534 legitimize_pic_address (rtx orig, rtx reg)
9541 if (TARGET_MACHO && !TARGET_64BIT)
9544 reg = gen_reg_rtx (Pmode);
9545 /* Use the generic Mach-O PIC machinery. */
9546 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9550 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9552 else if (TARGET_64BIT
9553 && ix86_cmodel != CM_SMALL_PIC
9554 && gotoff_operand (addr, Pmode))
9557 /* This symbol may be referenced via a displacement from the PIC
9558 base address (@GOTOFF). */
9560 if (reload_in_progress)
9561 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9562 if (GET_CODE (addr) == CONST)
9563 addr = XEXP (addr, 0);
9564 if (GET_CODE (addr) == PLUS)
9566 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9568 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9571 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9572 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9574 tmpreg = gen_reg_rtx (Pmode);
9577 emit_move_insn (tmpreg, new_rtx);
9581 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9582 tmpreg, 1, OPTAB_DIRECT);
9585 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9587 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9589 /* This symbol may be referenced via a displacement from the PIC
9590 base address (@GOTOFF). */
9592 if (reload_in_progress)
9593 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9594 if (GET_CODE (addr) == CONST)
9595 addr = XEXP (addr, 0);
9596 if (GET_CODE (addr) == PLUS)
9598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9603 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9604 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9605 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9609 emit_move_insn (reg, new_rtx);
9613 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9614 /* We can't use @GOTOFF for text labels on VxWorks;
9615 see gotoff_operand. */
9616 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9618 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9620 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9621 return legitimize_dllimport_symbol (addr, true);
9622 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9623 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9624 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9626 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9627 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9631 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9634 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9635 new_rtx = gen_const_mem (Pmode, new_rtx);
9636 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9639 reg = gen_reg_rtx (Pmode);
9640 /* Use directly gen_movsi, otherwise the address is loaded
9641 into register for CSE. We don't want to CSE this addresses,
9642 instead we CSE addresses from the GOT table, so skip this. */
9643 emit_insn (gen_movsi (reg, new_rtx));
9648 /* This symbol must be referenced via a load from the
9649 Global Offset Table (@GOT). */
9651 if (reload_in_progress)
9652 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9653 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9654 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9656 new_rtx = force_reg (Pmode, new_rtx);
9657 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9658 new_rtx = gen_const_mem (Pmode, new_rtx);
9659 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9662 reg = gen_reg_rtx (Pmode);
9663 emit_move_insn (reg, new_rtx);
9669 if (CONST_INT_P (addr)
9670 && !x86_64_immediate_operand (addr, VOIDmode))
9674 emit_move_insn (reg, addr);
9678 new_rtx = force_reg (Pmode, addr);
9680 else if (GET_CODE (addr) == CONST)
9682 addr = XEXP (addr, 0);
9684 /* We must match stuff we generate before. Assume the only
9685 unspecs that can get here are ours. Not that we could do
9686 anything with them anyway.... */
9687 if (GET_CODE (addr) == UNSPEC
9688 || (GET_CODE (addr) == PLUS
9689 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9691 gcc_assert (GET_CODE (addr) == PLUS);
9693 if (GET_CODE (addr) == PLUS)
9695 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9697 /* Check first to see if this is a constant offset from a @GOTOFF
9698 symbol reference. */
9699 if (gotoff_operand (op0, Pmode)
9700 && CONST_INT_P (op1))
9704 if (reload_in_progress)
9705 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9706 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9708 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9709 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9710 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9714 emit_move_insn (reg, new_rtx);
9720 if (INTVAL (op1) < -16*1024*1024
9721 || INTVAL (op1) >= 16*1024*1024)
9723 if (!x86_64_immediate_operand (op1, Pmode))
9724 op1 = force_reg (Pmode, op1);
9725 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9731 base = legitimize_pic_address (XEXP (addr, 0), reg);
9732 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9733 base == reg ? NULL_RTX : reg);
9735 if (CONST_INT_P (new_rtx))
9736 new_rtx = plus_constant (base, INTVAL (new_rtx));
9739 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9741 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9742 new_rtx = XEXP (new_rtx, 1);
9744 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9752 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9755 get_thread_pointer (int to_reg)
9759 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9763 reg = gen_reg_rtx (Pmode);
9764 insn = gen_rtx_SET (VOIDmode, reg, tp);
9765 insn = emit_insn (insn);
9770 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9771 false if we expect this to be used for a memory address and true if
9772 we expect to load the address into a register. */
9775 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9777 rtx dest, base, off, pic, tp;
9782 case TLS_MODEL_GLOBAL_DYNAMIC:
9783 dest = gen_reg_rtx (Pmode);
9784 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9786 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9788 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9791 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9792 insns = get_insns ();
9795 RTL_CONST_CALL_P (insns) = 1;
9796 emit_libcall_block (insns, dest, rax, x);
9798 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9799 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9801 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9803 if (TARGET_GNU2_TLS)
9805 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9807 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9811 case TLS_MODEL_LOCAL_DYNAMIC:
9812 base = gen_reg_rtx (Pmode);
9813 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9815 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9817 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9820 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9821 insns = get_insns ();
9824 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9825 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9826 RTL_CONST_CALL_P (insns) = 1;
9827 emit_libcall_block (insns, base, rax, note);
9829 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9830 emit_insn (gen_tls_local_dynamic_base_64 (base));
9832 emit_insn (gen_tls_local_dynamic_base_32 (base));
9834 if (TARGET_GNU2_TLS)
9836 rtx x = ix86_tls_module_base ();
9838 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9839 gen_rtx_MINUS (Pmode, x, tp));
9842 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9843 off = gen_rtx_CONST (Pmode, off);
9845 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9847 if (TARGET_GNU2_TLS)
9849 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9851 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9856 case TLS_MODEL_INITIAL_EXEC:
9860 type = UNSPEC_GOTNTPOFF;
9864 if (reload_in_progress)
9865 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9866 pic = pic_offset_table_rtx;
9867 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9869 else if (!TARGET_ANY_GNU_TLS)
9871 pic = gen_reg_rtx (Pmode);
9872 emit_insn (gen_set_got (pic));
9873 type = UNSPEC_GOTTPOFF;
9878 type = UNSPEC_INDNTPOFF;
9881 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9882 off = gen_rtx_CONST (Pmode, off);
9884 off = gen_rtx_PLUS (Pmode, pic, off);
9885 off = gen_const_mem (Pmode, off);
9886 set_mem_alias_set (off, ix86_GOT_alias_set ());
9888 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9890 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9891 off = force_reg (Pmode, off);
9892 return gen_rtx_PLUS (Pmode, base, off);
9896 base = get_thread_pointer (true);
9897 dest = gen_reg_rtx (Pmode);
9898 emit_insn (gen_subsi3 (dest, base, off));
9902 case TLS_MODEL_LOCAL_EXEC:
9903 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9904 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9905 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9906 off = gen_rtx_CONST (Pmode, off);
9908 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9910 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9911 return gen_rtx_PLUS (Pmode, base, off);
9915 base = get_thread_pointer (true);
9916 dest = gen_reg_rtx (Pmode);
9917 emit_insn (gen_subsi3 (dest, base, off));
9928 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9931 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9932 htab_t dllimport_map;
9935 get_dllimport_decl (tree decl)
9937 struct tree_map *h, in;
9941 size_t namelen, prefixlen;
9947 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9949 in.hash = htab_hash_pointer (decl);
9950 in.base.from = decl;
9951 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9952 h = (struct tree_map *) *loc;
9956 *loc = h = GGC_NEW (struct tree_map);
9958 h->base.from = decl;
9959 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9960 DECL_ARTIFICIAL (to) = 1;
9961 DECL_IGNORED_P (to) = 1;
9962 DECL_EXTERNAL (to) = 1;
9963 TREE_READONLY (to) = 1;
9965 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9966 name = targetm.strip_name_encoding (name);
9967 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9968 ? "*__imp_" : "*__imp__";
9969 namelen = strlen (name);
9970 prefixlen = strlen (prefix);
9971 imp_name = (char *) alloca (namelen + prefixlen + 1);
9972 memcpy (imp_name, prefix, prefixlen);
9973 memcpy (imp_name + prefixlen, name, namelen + 1);
9975 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9976 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9977 SET_SYMBOL_REF_DECL (rtl, to);
9978 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9980 rtl = gen_const_mem (Pmode, rtl);
9981 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9983 SET_DECL_RTL (to, rtl);
9984 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9989 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9990 true if we require the result be a register. */
9993 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9998 gcc_assert (SYMBOL_REF_DECL (symbol));
9999 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10001 x = DECL_RTL (imp_decl);
10003 x = force_reg (Pmode, x);
10007 /* Try machine-dependent ways of modifying an illegitimate address
10008 to be legitimate. If we find one, return the new, valid address.
10009 This macro is used in only one place: `memory_address' in explow.c.
10011 OLDX is the address as it was before break_out_memory_refs was called.
10012 In some cases it is useful to look at this to decide what needs to be done.
10014 It is always safe for this macro to do nothing. It exists to recognize
10015 opportunities to optimize the output.
10017 For the 80386, we handle X+REG by loading X into a register R and
10018 using R+REG. R will go in a general reg and indexing will be used.
10019 However, if REG is a broken-out memory address or multiplication,
10020 nothing needs to be done because REG can certainly go in a general reg.
10022 When -fpic is used, special handling is needed for symbolic references.
10023 See comments by legitimize_pic_address in i386.c for details. */
10026 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10027 enum machine_mode mode)
10032 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10034 return legitimize_tls_address (x, (enum tls_model) log, false);
10035 if (GET_CODE (x) == CONST
10036 && GET_CODE (XEXP (x, 0)) == PLUS
10037 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10038 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10040 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10041 (enum tls_model) log, false);
10042 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10045 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10047 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10048 return legitimize_dllimport_symbol (x, true);
10049 if (GET_CODE (x) == CONST
10050 && GET_CODE (XEXP (x, 0)) == PLUS
10051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10052 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10054 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10055 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10059 if (flag_pic && SYMBOLIC_CONST (x))
10060 return legitimize_pic_address (x, 0);
10062 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10063 if (GET_CODE (x) == ASHIFT
10064 && CONST_INT_P (XEXP (x, 1))
10065 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10068 log = INTVAL (XEXP (x, 1));
10069 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10070 GEN_INT (1 << log));
10073 if (GET_CODE (x) == PLUS)
10075 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10077 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10078 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10079 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10082 log = INTVAL (XEXP (XEXP (x, 0), 1));
10083 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10084 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10085 GEN_INT (1 << log));
10088 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10089 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10090 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10093 log = INTVAL (XEXP (XEXP (x, 1), 1));
10094 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10095 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10096 GEN_INT (1 << log));
10099 /* Put multiply first if it isn't already. */
10100 if (GET_CODE (XEXP (x, 1)) == MULT)
10102 rtx tmp = XEXP (x, 0);
10103 XEXP (x, 0) = XEXP (x, 1);
10108 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10109 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10110 created by virtual register instantiation, register elimination, and
10111 similar optimizations. */
10112 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10115 x = gen_rtx_PLUS (Pmode,
10116 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10117 XEXP (XEXP (x, 1), 0)),
10118 XEXP (XEXP (x, 1), 1));
10122 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10123 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10124 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10125 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10126 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10127 && CONSTANT_P (XEXP (x, 1)))
10130 rtx other = NULL_RTX;
10132 if (CONST_INT_P (XEXP (x, 1)))
10134 constant = XEXP (x, 1);
10135 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10137 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10139 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10140 other = XEXP (x, 1);
10148 x = gen_rtx_PLUS (Pmode,
10149 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10150 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10151 plus_constant (other, INTVAL (constant)));
10155 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10158 if (GET_CODE (XEXP (x, 0)) == MULT)
10161 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10164 if (GET_CODE (XEXP (x, 1)) == MULT)
10167 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10171 && REG_P (XEXP (x, 1))
10172 && REG_P (XEXP (x, 0)))
10175 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10178 x = legitimize_pic_address (x, 0);
10181 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10184 if (REG_P (XEXP (x, 0)))
10186 rtx temp = gen_reg_rtx (Pmode);
10187 rtx val = force_operand (XEXP (x, 1), temp);
10189 emit_move_insn (temp, val);
10191 XEXP (x, 1) = temp;
10195 else if (REG_P (XEXP (x, 1)))
10197 rtx temp = gen_reg_rtx (Pmode);
10198 rtx val = force_operand (XEXP (x, 0), temp);
10200 emit_move_insn (temp, val);
10202 XEXP (x, 0) = temp;
10210 /* Print an integer constant expression in assembler syntax. Addition
10211 and subtraction are the only arithmetic that may appear in these
10212 expressions. FILE is the stdio stream to write to, X is the rtx, and
10213 CODE is the operand print code from the output string. */
10216 output_pic_addr_const (FILE *file, rtx x, int code)
10220 switch (GET_CODE (x))
10223 gcc_assert (flag_pic);
10228 if (! TARGET_MACHO || TARGET_64BIT)
10229 output_addr_const (file, x);
10232 const char *name = XSTR (x, 0);
10234 /* Mark the decl as referenced so that cgraph will
10235 output the function. */
10236 if (SYMBOL_REF_DECL (x))
10237 mark_decl_referenced (SYMBOL_REF_DECL (x));
10240 if (MACHOPIC_INDIRECT
10241 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10242 name = machopic_indirection_name (x, /*stub_p=*/true);
10244 assemble_name (file, name);
10246 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10247 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10248 fputs ("@PLT", file);
10255 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10256 assemble_name (asm_out_file, buf);
10260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10264 /* This used to output parentheses around the expression,
10265 but that does not work on the 386 (either ATT or BSD assembler). */
10266 output_pic_addr_const (file, XEXP (x, 0), code);
10270 if (GET_MODE (x) == VOIDmode)
10272 /* We can use %d if the number is <32 bits and positive. */
10273 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10274 fprintf (file, "0x%lx%08lx",
10275 (unsigned long) CONST_DOUBLE_HIGH (x),
10276 (unsigned long) CONST_DOUBLE_LOW (x));
10278 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10281 /* We can't handle floating point constants;
10282 PRINT_OPERAND must handle them. */
10283 output_operand_lossage ("floating constant misused");
10287 /* Some assemblers need integer constants to appear first. */
10288 if (CONST_INT_P (XEXP (x, 0)))
10290 output_pic_addr_const (file, XEXP (x, 0), code);
10292 output_pic_addr_const (file, XEXP (x, 1), code);
10296 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10297 output_pic_addr_const (file, XEXP (x, 1), code);
10299 output_pic_addr_const (file, XEXP (x, 0), code);
10305 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10306 output_pic_addr_const (file, XEXP (x, 0), code);
10308 output_pic_addr_const (file, XEXP (x, 1), code);
10310 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10314 gcc_assert (XVECLEN (x, 0) == 1);
10315 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10316 switch (XINT (x, 1))
10319 fputs ("@GOT", file);
10321 case UNSPEC_GOTOFF:
10322 fputs ("@GOTOFF", file);
10324 case UNSPEC_PLTOFF:
10325 fputs ("@PLTOFF", file);
10327 case UNSPEC_GOTPCREL:
10328 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10329 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10331 case UNSPEC_GOTTPOFF:
10332 /* FIXME: This might be @TPOFF in Sun ld too. */
10333 fputs ("@GOTTPOFF", file);
10336 fputs ("@TPOFF", file);
10338 case UNSPEC_NTPOFF:
10340 fputs ("@TPOFF", file);
10342 fputs ("@NTPOFF", file);
10344 case UNSPEC_DTPOFF:
10345 fputs ("@DTPOFF", file);
10347 case UNSPEC_GOTNTPOFF:
10349 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10350 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10352 fputs ("@GOTNTPOFF", file);
10354 case UNSPEC_INDNTPOFF:
10355 fputs ("@INDNTPOFF", file);
10358 case UNSPEC_MACHOPIC_OFFSET:
10360 machopic_output_function_base_name (file);
10364 output_operand_lossage ("invalid UNSPEC as operand");
10370 output_operand_lossage ("invalid expression as operand");
10374 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10375 We need to emit DTP-relative relocations. */
10377 static void ATTRIBUTE_UNUSED
10378 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10380 fputs (ASM_LONG, file);
10381 output_addr_const (file, x);
10382 fputs ("@DTPOFF", file);
10388 fputs (", 0", file);
10391 gcc_unreachable ();
10395 /* Return true if X is a representation of the PIC register. This copes
10396 with calls from ix86_find_base_term, where the register might have
10397 been replaced by a cselib value. */
10400 ix86_pic_register_p (rtx x)
10402 if (GET_CODE (x) == VALUE)
10403 return (pic_offset_table_rtx
10404 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10406 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10409 /* In the name of slightly smaller debug output, and to cater to
10410 general assembler lossage, recognize PIC+GOTOFF and turn it back
10411 into a direct symbol reference.
10413 On Darwin, this is necessary to avoid a crash, because Darwin
10414 has a different PIC label for each routine but the DWARF debugging
10415 information is not associated with any particular routine, so it's
10416 necessary to remove references to the PIC label from RTL stored by
10417 the DWARF output code. */
10420 ix86_delegitimize_address (rtx orig_x)
10423 /* reg_addend is NULL or a multiple of some register. */
10424 rtx reg_addend = NULL_RTX;
10425 /* const_addend is NULL or a const_int. */
10426 rtx const_addend = NULL_RTX;
10427 /* This is the result, or NULL. */
10428 rtx result = NULL_RTX;
10435 if (GET_CODE (x) != CONST
10436 || GET_CODE (XEXP (x, 0)) != UNSPEC
10437 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10438 || !MEM_P (orig_x))
10440 return XVECEXP (XEXP (x, 0), 0, 0);
10443 if (GET_CODE (x) != PLUS
10444 || GET_CODE (XEXP (x, 1)) != CONST)
10447 if (ix86_pic_register_p (XEXP (x, 0)))
10448 /* %ebx + GOT/GOTOFF */
10450 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10452 /* %ebx + %reg * scale + GOT/GOTOFF */
10453 reg_addend = XEXP (x, 0);
10454 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10455 reg_addend = XEXP (reg_addend, 1);
10456 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10457 reg_addend = XEXP (reg_addend, 0);
10460 if (!REG_P (reg_addend)
10461 && GET_CODE (reg_addend) != MULT
10462 && GET_CODE (reg_addend) != ASHIFT)
10468 x = XEXP (XEXP (x, 1), 0);
10469 if (GET_CODE (x) == PLUS
10470 && CONST_INT_P (XEXP (x, 1)))
10472 const_addend = XEXP (x, 1);
10476 if (GET_CODE (x) == UNSPEC
10477 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10478 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10479 result = XVECEXP (x, 0, 0);
10481 if (TARGET_MACHO && darwin_local_data_pic (x)
10482 && !MEM_P (orig_x))
10483 result = XVECEXP (x, 0, 0);
10489 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10491 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10495 /* If X is a machine specific address (i.e. a symbol or label being
10496 referenced as a displacement from the GOT implemented using an
10497 UNSPEC), then return the base term. Otherwise return X. */
10500 ix86_find_base_term (rtx x)
10506 if (GET_CODE (x) != CONST)
10508 term = XEXP (x, 0);
10509 if (GET_CODE (term) == PLUS
10510 && (CONST_INT_P (XEXP (term, 1))
10511 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10512 term = XEXP (term, 0);
10513 if (GET_CODE (term) != UNSPEC
10514 || XINT (term, 1) != UNSPEC_GOTPCREL)
10517 return XVECEXP (term, 0, 0);
10520 return ix86_delegitimize_address (x);
10524 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10525 int fp, FILE *file)
10527 const char *suffix;
10529 if (mode == CCFPmode || mode == CCFPUmode)
10531 enum rtx_code second_code, bypass_code;
10532 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10533 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10534 code = ix86_fp_compare_code_to_integer (code);
10538 code = reverse_condition (code);
10589 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10593 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10594 Those same assemblers have the same but opposite lossage on cmov. */
10595 if (mode == CCmode)
10596 suffix = fp ? "nbe" : "a";
10597 else if (mode == CCCmode)
10600 gcc_unreachable ();
10616 gcc_unreachable ();
10620 gcc_assert (mode == CCmode || mode == CCCmode);
10637 gcc_unreachable ();
10641 /* ??? As above. */
10642 gcc_assert (mode == CCmode || mode == CCCmode);
10643 suffix = fp ? "nb" : "ae";
10646 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10650 /* ??? As above. */
10651 if (mode == CCmode)
10653 else if (mode == CCCmode)
10654 suffix = fp ? "nb" : "ae";
10656 gcc_unreachable ();
10659 suffix = fp ? "u" : "p";
10662 suffix = fp ? "nu" : "np";
10665 gcc_unreachable ();
10667 fputs (suffix, file);
10670 /* Print the name of register X to FILE based on its machine mode and number.
10671 If CODE is 'w', pretend the mode is HImode.
10672 If CODE is 'b', pretend the mode is QImode.
10673 If CODE is 'k', pretend the mode is SImode.
10674 If CODE is 'q', pretend the mode is DImode.
10675 If CODE is 'x', pretend the mode is V4SFmode.
10676 If CODE is 't', pretend the mode is V8SFmode.
10677 If CODE is 'h', pretend the reg is the 'high' byte register.
10678 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10679 If CODE is 'd', duplicate the operand for AVX instruction.
10683 print_reg (rtx x, int code, FILE *file)
10686 bool duplicated = code == 'd' && TARGET_AVX;
10688 gcc_assert (x == pc_rtx
10689 || (REGNO (x) != ARG_POINTER_REGNUM
10690 && REGNO (x) != FRAME_POINTER_REGNUM
10691 && REGNO (x) != FLAGS_REG
10692 && REGNO (x) != FPSR_REG
10693 && REGNO (x) != FPCR_REG));
10695 if (ASSEMBLER_DIALECT == ASM_ATT)
10700 gcc_assert (TARGET_64BIT);
10701 fputs ("rip", file);
10705 if (code == 'w' || MMX_REG_P (x))
10707 else if (code == 'b')
10709 else if (code == 'k')
10711 else if (code == 'q')
10713 else if (code == 'y')
10715 else if (code == 'h')
10717 else if (code == 'x')
10719 else if (code == 't')
10722 code = GET_MODE_SIZE (GET_MODE (x));
10724 /* Irritatingly, AMD extended registers use different naming convention
10725 from the normal registers. */
10726 if (REX_INT_REG_P (x))
10728 gcc_assert (TARGET_64BIT);
10732 error ("extended registers have no high halves");
10735 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10738 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10741 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10744 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10747 error ("unsupported operand size for extended register");
10757 if (STACK_TOP_P (x))
10766 if (! ANY_FP_REG_P (x))
10767 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10772 reg = hi_reg_name[REGNO (x)];
10775 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10777 reg = qi_reg_name[REGNO (x)];
10780 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10782 reg = qi_high_reg_name[REGNO (x)];
10787 gcc_assert (!duplicated);
10789 fputs (hi_reg_name[REGNO (x)] + 1, file);
10794 gcc_unreachable ();
10800 if (ASSEMBLER_DIALECT == ASM_ATT)
10801 fprintf (file, ", %%%s", reg);
10803 fprintf (file, ", %s", reg);
10807 /* Locate some local-dynamic symbol still in use by this function
10808 so that we can print its name in some tls_local_dynamic_base
10812 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10816 if (GET_CODE (x) == SYMBOL_REF
10817 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10819 cfun->machine->some_ld_name = XSTR (x, 0);
10826 static const char *
10827 get_some_local_dynamic_name (void)
10831 if (cfun->machine->some_ld_name)
10832 return cfun->machine->some_ld_name;
10834 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10836 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10837 return cfun->machine->some_ld_name;
10839 gcc_unreachable ();
10842 /* Meaning of CODE:
10843 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10844 C -- print opcode suffix for set/cmov insn.
10845 c -- like C, but print reversed condition
10846 E,e -- likewise, but for compare-and-branch fused insn.
10847 F,f -- likewise, but for floating-point.
10848 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10850 R -- print the prefix for register names.
10851 z -- print the opcode suffix for the size of the current operand.
10852 Z -- likewise, with special suffixes for x87 instructions.
10853 * -- print a star (in certain assembler syntax)
10854 A -- print an absolute memory reference.
10855 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10856 s -- print a shift double count, followed by the assemblers argument
10858 b -- print the QImode name of the register for the indicated operand.
10859 %b0 would print %al if operands[0] is reg 0.
10860 w -- likewise, print the HImode name of the register.
10861 k -- likewise, print the SImode name of the register.
10862 q -- likewise, print the DImode name of the register.
10863 x -- likewise, print the V4SFmode name of the register.
10864 t -- likewise, print the V8SFmode name of the register.
10865 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10866 y -- print "st(0)" instead of "st" as a register.
10867 d -- print duplicated register operand for AVX instruction.
10868 D -- print condition for SSE cmp instruction.
10869 P -- if PIC, print an @PLT suffix.
10870 X -- don't print any sort of PIC '@' suffix for a symbol.
10871 & -- print some in-use local-dynamic symbol name.
10872 H -- print a memory address offset by 8; used for sse high-parts
10873 Y -- print condition for SSE5 com* instruction.
10874 + -- print a branch hint as 'cs' or 'ds' prefix
10875 ; -- print a semicolon (after prefixes due to bug in older gas).
10879 print_operand (FILE *file, rtx x, int code)
10886 if (ASSEMBLER_DIALECT == ASM_ATT)
10891 assemble_name (file, get_some_local_dynamic_name ());
10895 switch (ASSEMBLER_DIALECT)
10902 /* Intel syntax. For absolute addresses, registers should not
10903 be surrounded by braces. */
10907 PRINT_OPERAND (file, x, 0);
10914 gcc_unreachable ();
10917 PRINT_OPERAND (file, x, 0);
10922 if (ASSEMBLER_DIALECT == ASM_ATT)
10927 if (ASSEMBLER_DIALECT == ASM_ATT)
10932 if (ASSEMBLER_DIALECT == ASM_ATT)
10937 if (ASSEMBLER_DIALECT == ASM_ATT)
10942 if (ASSEMBLER_DIALECT == ASM_ATT)
10947 if (ASSEMBLER_DIALECT == ASM_ATT)
10952 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10954 /* Opcodes don't get size suffixes if using Intel opcodes. */
10955 if (ASSEMBLER_DIALECT == ASM_INTEL)
10958 switch (GET_MODE_SIZE (GET_MODE (x)))
10977 output_operand_lossage
10978 ("invalid operand size for operand code '%c'", code);
10983 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10985 (0, "non-integer operand used with operand code '%c'", code);
10989 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
10990 if (ASSEMBLER_DIALECT == ASM_INTEL)
10993 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10995 switch (GET_MODE_SIZE (GET_MODE (x)))
10998 #ifdef HAVE_AS_IX86_FILDS
11008 #ifdef HAVE_AS_IX86_FILDQ
11011 fputs ("ll", file);
11019 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11021 /* 387 opcodes don't get size suffixes
11022 if the operands are registers. */
11023 if (STACK_REG_P (x))
11026 switch (GET_MODE_SIZE (GET_MODE (x)))
11047 output_operand_lossage
11048 ("invalid operand type used with operand code '%c'", code);
11052 output_operand_lossage
11053 ("invalid operand size for operand code '%c'", code);
11070 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11072 PRINT_OPERAND (file, x, 0);
11073 fputs (", ", file);
11078 /* Little bit of braindamage here. The SSE compare instructions
11079 does use completely different names for the comparisons that the
11080 fp conditional moves. */
11083 switch (GET_CODE (x))
11086 fputs ("eq", file);
11089 fputs ("eq_us", file);
11092 fputs ("lt", file);
11095 fputs ("nge", file);
11098 fputs ("le", file);
11101 fputs ("ngt", file);
11104 fputs ("unord", file);
11107 fputs ("neq", file);
11110 fputs ("neq_oq", file);
11113 fputs ("ge", file);
11116 fputs ("nlt", file);
11119 fputs ("gt", file);
11122 fputs ("nle", file);
11125 fputs ("ord", file);
11128 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11134 switch (GET_CODE (x))
11138 fputs ("eq", file);
11142 fputs ("lt", file);
11146 fputs ("le", file);
11149 fputs ("unord", file);
11153 fputs ("neq", file);
11157 fputs ("nlt", file);
11161 fputs ("nle", file);
11164 fputs ("ord", file);
11167 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11173 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11174 if (ASSEMBLER_DIALECT == ASM_ATT)
11176 switch (GET_MODE (x))
11178 case HImode: putc ('w', file); break;
11180 case SFmode: putc ('l', file); break;
11182 case DFmode: putc ('q', file); break;
11183 default: gcc_unreachable ();
11190 if (!COMPARISON_P (x))
11192 output_operand_lossage ("operand is neither a constant nor a "
11193 "condition code, invalid operand code "
11197 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11200 if (!COMPARISON_P (x))
11202 output_operand_lossage ("operand is neither a constant nor a "
11203 "condition code, invalid operand code "
11207 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11208 if (ASSEMBLER_DIALECT == ASM_ATT)
11211 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11214 /* Like above, but reverse condition */
11216 /* Check to see if argument to %c is really a constant
11217 and not a condition code which needs to be reversed. */
11218 if (!COMPARISON_P (x))
11220 output_operand_lossage ("operand is neither a constant nor a "
11221 "condition code, invalid operand "
11225 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11228 if (!COMPARISON_P (x))
11230 output_operand_lossage ("operand is neither a constant nor a "
11231 "condition code, invalid operand "
11235 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11236 if (ASSEMBLER_DIALECT == ASM_ATT)
11239 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11243 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11247 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11251 /* It doesn't actually matter what mode we use here, as we're
11252 only going to use this for printing. */
11253 x = adjust_address_nv (x, DImode, 8);
11261 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11264 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11267 int pred_val = INTVAL (XEXP (x, 0));
11269 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11270 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11272 int taken = pred_val > REG_BR_PROB_BASE / 2;
11273 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11275 /* Emit hints only in the case default branch prediction
11276 heuristics would fail. */
11277 if (taken != cputaken)
11279 /* We use 3e (DS) prefix for taken branches and
11280 2e (CS) prefix for not taken branches. */
11282 fputs ("ds ; ", file);
11284 fputs ("cs ; ", file);
11292 switch (GET_CODE (x))
11295 fputs ("neq", file);
11298 fputs ("eq", file);
11302 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11306 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11310 fputs ("le", file);
11314 fputs ("lt", file);
11317 fputs ("unord", file);
11320 fputs ("ord", file);
11323 fputs ("ueq", file);
11326 fputs ("nlt", file);
11329 fputs ("nle", file);
11332 fputs ("ule", file);
11335 fputs ("ult", file);
11338 fputs ("une", file);
11341 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11348 fputs (" ; ", file);
11355 output_operand_lossage ("invalid operand code '%c'", code);
11360 print_reg (x, code, file);
11362 else if (MEM_P (x))
11364 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11365 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11366 && GET_MODE (x) != BLKmode)
11369 switch (GET_MODE_SIZE (GET_MODE (x)))
11371 case 1: size = "BYTE"; break;
11372 case 2: size = "WORD"; break;
11373 case 4: size = "DWORD"; break;
11374 case 8: size = "QWORD"; break;
11375 case 12: size = "XWORD"; break;
11377 if (GET_MODE (x) == XFmode)
11383 gcc_unreachable ();
11386 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11389 else if (code == 'w')
11391 else if (code == 'k')
11394 fputs (size, file);
11395 fputs (" PTR ", file);
11399 /* Avoid (%rip) for call operands. */
11400 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11401 && !CONST_INT_P (x))
11402 output_addr_const (file, x);
11403 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11404 output_operand_lossage ("invalid constraints for operand");
11406 output_address (x);
11409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11414 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11415 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11417 if (ASSEMBLER_DIALECT == ASM_ATT)
11419 fprintf (file, "0x%08lx", (long unsigned int) l);
11422 /* These float cases don't actually occur as immediate operands. */
11423 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11427 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11428 fprintf (file, "%s", dstr);
11431 else if (GET_CODE (x) == CONST_DOUBLE
11432 && GET_MODE (x) == XFmode)
11436 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11437 fprintf (file, "%s", dstr);
11442 /* We have patterns that allow zero sets of memory, for instance.
11443 In 64-bit mode, we should probably support all 8-byte vectors,
11444 since we can in fact encode that into an immediate. */
11445 if (GET_CODE (x) == CONST_VECTOR)
11447 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11453 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11455 if (ASSEMBLER_DIALECT == ASM_ATT)
11458 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11459 || GET_CODE (x) == LABEL_REF)
11461 if (ASSEMBLER_DIALECT == ASM_ATT)
11464 fputs ("OFFSET FLAT:", file);
11467 if (CONST_INT_P (x))
11468 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11470 output_pic_addr_const (file, x, code);
11472 output_addr_const (file, x);
11476 /* Print a memory operand whose address is ADDR. */
11479 print_operand_address (FILE *file, rtx addr)
11481 struct ix86_address parts;
11482 rtx base, index, disp;
11484 int ok = ix86_decompose_address (addr, &parts);
11489 index = parts.index;
11491 scale = parts.scale;
11499 if (ASSEMBLER_DIALECT == ASM_ATT)
11501 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11504 gcc_unreachable ();
11507 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11508 if (TARGET_64BIT && !base && !index)
11512 if (GET_CODE (disp) == CONST
11513 && GET_CODE (XEXP (disp, 0)) == PLUS
11514 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11515 symbol = XEXP (XEXP (disp, 0), 0);
11517 if (GET_CODE (symbol) == LABEL_REF
11518 || (GET_CODE (symbol) == SYMBOL_REF
11519 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11522 if (!base && !index)
11524 /* Displacement only requires special attention. */
11526 if (CONST_INT_P (disp))
11528 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11529 fputs ("ds:", file);
11530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11533 output_pic_addr_const (file, disp, 0);
11535 output_addr_const (file, disp);
11539 if (ASSEMBLER_DIALECT == ASM_ATT)
11544 output_pic_addr_const (file, disp, 0);
11545 else if (GET_CODE (disp) == LABEL_REF)
11546 output_asm_label (disp);
11548 output_addr_const (file, disp);
11553 print_reg (base, 0, file);
11557 print_reg (index, 0, file);
11559 fprintf (file, ",%d", scale);
11565 rtx offset = NULL_RTX;
11569 /* Pull out the offset of a symbol; print any symbol itself. */
11570 if (GET_CODE (disp) == CONST
11571 && GET_CODE (XEXP (disp, 0)) == PLUS
11572 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11574 offset = XEXP (XEXP (disp, 0), 1);
11575 disp = gen_rtx_CONST (VOIDmode,
11576 XEXP (XEXP (disp, 0), 0));
11580 output_pic_addr_const (file, disp, 0);
11581 else if (GET_CODE (disp) == LABEL_REF)
11582 output_asm_label (disp);
11583 else if (CONST_INT_P (disp))
11586 output_addr_const (file, disp);
11592 print_reg (base, 0, file);
11595 if (INTVAL (offset) >= 0)
11597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11608 print_reg (index, 0, file);
11610 fprintf (file, "*%d", scale);
11618 output_addr_const_extra (FILE *file, rtx x)
11622 if (GET_CODE (x) != UNSPEC)
11625 op = XVECEXP (x, 0, 0);
11626 switch (XINT (x, 1))
11628 case UNSPEC_GOTTPOFF:
11629 output_addr_const (file, op);
11630 /* FIXME: This might be @TPOFF in Sun ld. */
11631 fputs ("@GOTTPOFF", file);
11634 output_addr_const (file, op);
11635 fputs ("@TPOFF", file);
11637 case UNSPEC_NTPOFF:
11638 output_addr_const (file, op);
11640 fputs ("@TPOFF", file);
11642 fputs ("@NTPOFF", file);
11644 case UNSPEC_DTPOFF:
11645 output_addr_const (file, op);
11646 fputs ("@DTPOFF", file);
11648 case UNSPEC_GOTNTPOFF:
11649 output_addr_const (file, op);
11651 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11652 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11654 fputs ("@GOTNTPOFF", file);
11656 case UNSPEC_INDNTPOFF:
11657 output_addr_const (file, op);
11658 fputs ("@INDNTPOFF", file);
11661 case UNSPEC_MACHOPIC_OFFSET:
11662 output_addr_const (file, op);
11664 machopic_output_function_base_name (file);
11675 /* Split one or more DImode RTL references into pairs of SImode
11676 references. The RTL can be REG, offsettable MEM, integer constant, or
11677 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11678 split and "num" is its length. lo_half and hi_half are output arrays
11679 that parallel "operands". */
11682 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11686 rtx op = operands[num];
11688 /* simplify_subreg refuse to split volatile memory addresses,
11689 but we still have to handle it. */
11692 lo_half[num] = adjust_address (op, SImode, 0);
11693 hi_half[num] = adjust_address (op, SImode, 4);
11697 lo_half[num] = simplify_gen_subreg (SImode, op,
11698 GET_MODE (op) == VOIDmode
11699 ? DImode : GET_MODE (op), 0);
11700 hi_half[num] = simplify_gen_subreg (SImode, op,
11701 GET_MODE (op) == VOIDmode
11702 ? DImode : GET_MODE (op), 4);
11706 /* Split one or more TImode RTL references into pairs of DImode
11707 references. The RTL can be REG, offsettable MEM, integer constant, or
11708 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11709 split and "num" is its length. lo_half and hi_half are output arrays
11710 that parallel "operands". */
11713 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11717 rtx op = operands[num];
11719 /* simplify_subreg refuse to split volatile memory addresses, but we
11720 still have to handle it. */
11723 lo_half[num] = adjust_address (op, DImode, 0);
11724 hi_half[num] = adjust_address (op, DImode, 8);
11728 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11729 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11734 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11735 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11736 is the expression of the binary operation. The output may either be
11737 emitted here, or returned to the caller, like all output_* functions.
11739 There is no guarantee that the operands are the same mode, as they
11740 might be within FLOAT or FLOAT_EXTEND expressions. */
11742 #ifndef SYSV386_COMPAT
11743 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11744 wants to fix the assemblers because that causes incompatibility
11745 with gcc. No-one wants to fix gcc because that causes
11746 incompatibility with assemblers... You can use the option of
11747 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11748 #define SYSV386_COMPAT 1
11752 output_387_binary_op (rtx insn, rtx *operands)
11754 static char buf[40];
11757 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11759 #ifdef ENABLE_CHECKING
11760 /* Even if we do not want to check the inputs, this documents input
11761 constraints. Which helps in understanding the following code. */
11762 if (STACK_REG_P (operands[0])
11763 && ((REG_P (operands[1])
11764 && REGNO (operands[0]) == REGNO (operands[1])
11765 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11766 || (REG_P (operands[2])
11767 && REGNO (operands[0]) == REGNO (operands[2])
11768 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11769 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11772 gcc_assert (is_sse);
11775 switch (GET_CODE (operands[3]))
11778 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11779 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11787 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11788 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11796 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11797 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11805 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11806 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11814 gcc_unreachable ();
11821 strcpy (buf, ssep);
11822 if (GET_MODE (operands[0]) == SFmode)
11823 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11825 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11829 strcpy (buf, ssep + 1);
11830 if (GET_MODE (operands[0]) == SFmode)
11831 strcat (buf, "ss\t{%2, %0|%0, %2}");
11833 strcat (buf, "sd\t{%2, %0|%0, %2}");
11839 switch (GET_CODE (operands[3]))
11843 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11845 rtx temp = operands[2];
11846 operands[2] = operands[1];
11847 operands[1] = temp;
11850 /* know operands[0] == operands[1]. */
11852 if (MEM_P (operands[2]))
11858 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11860 if (STACK_TOP_P (operands[0]))
11861 /* How is it that we are storing to a dead operand[2]?
11862 Well, presumably operands[1] is dead too. We can't
11863 store the result to st(0) as st(0) gets popped on this
11864 instruction. Instead store to operands[2] (which I
11865 think has to be st(1)). st(1) will be popped later.
11866 gcc <= 2.8.1 didn't have this check and generated
11867 assembly code that the Unixware assembler rejected. */
11868 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11870 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11874 if (STACK_TOP_P (operands[0]))
11875 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11877 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11882 if (MEM_P (operands[1]))
11888 if (MEM_P (operands[2]))
11894 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11897 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11898 derived assemblers, confusingly reverse the direction of
11899 the operation for fsub{r} and fdiv{r} when the
11900 destination register is not st(0). The Intel assembler
11901 doesn't have this brain damage. Read !SYSV386_COMPAT to
11902 figure out what the hardware really does. */
11903 if (STACK_TOP_P (operands[0]))
11904 p = "{p\t%0, %2|rp\t%2, %0}";
11906 p = "{rp\t%2, %0|p\t%0, %2}";
11908 if (STACK_TOP_P (operands[0]))
11909 /* As above for fmul/fadd, we can't store to st(0). */
11910 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11912 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11920 if (STACK_TOP_P (operands[0]))
11921 p = "{rp\t%0, %1|p\t%1, %0}";
11923 p = "{p\t%1, %0|rp\t%0, %1}";
11925 if (STACK_TOP_P (operands[0]))
11926 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11928 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11933 if (STACK_TOP_P (operands[0]))
11935 if (STACK_TOP_P (operands[1]))
11936 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11938 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11941 else if (STACK_TOP_P (operands[1]))
11944 p = "{\t%1, %0|r\t%0, %1}";
11946 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11952 p = "{r\t%2, %0|\t%0, %2}";
11954 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11960 gcc_unreachable ();
11967 /* Return needed mode for entity in optimize_mode_switching pass. */
11970 ix86_mode_needed (int entity, rtx insn)
11972 enum attr_i387_cw mode;
11974 /* The mode UNINITIALIZED is used to store control word after a
11975 function call or ASM pattern. The mode ANY specify that function
11976 has no requirements on the control word and make no changes in the
11977 bits we are interested in. */
11980 || (NONJUMP_INSN_P (insn)
11981 && (asm_noperands (PATTERN (insn)) >= 0
11982 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11983 return I387_CW_UNINITIALIZED;
11985 if (recog_memoized (insn) < 0)
11986 return I387_CW_ANY;
11988 mode = get_attr_i387_cw (insn);
11993 if (mode == I387_CW_TRUNC)
11998 if (mode == I387_CW_FLOOR)
12003 if (mode == I387_CW_CEIL)
12008 if (mode == I387_CW_MASK_PM)
12013 gcc_unreachable ();
12016 return I387_CW_ANY;
12019 /* Output code to initialize control word copies used by trunc?f?i and
12020 rounding patterns. CURRENT_MODE is set to current control word,
12021 while NEW_MODE is set to new control word. */
12024 emit_i387_cw_initialization (int mode)
12026 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12029 enum ix86_stack_slot slot;
12031 rtx reg = gen_reg_rtx (HImode);
12033 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12034 emit_move_insn (reg, copy_rtx (stored_mode));
12036 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12037 || optimize_function_for_size_p (cfun))
12041 case I387_CW_TRUNC:
12042 /* round toward zero (truncate) */
12043 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12044 slot = SLOT_CW_TRUNC;
12047 case I387_CW_FLOOR:
12048 /* round down toward -oo */
12049 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12050 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12051 slot = SLOT_CW_FLOOR;
12055 /* round up toward +oo */
12056 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12057 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12058 slot = SLOT_CW_CEIL;
12061 case I387_CW_MASK_PM:
12062 /* mask precision exception for nearbyint() */
12063 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12064 slot = SLOT_CW_MASK_PM;
12068 gcc_unreachable ();
12075 case I387_CW_TRUNC:
12076 /* round toward zero (truncate) */
12077 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12078 slot = SLOT_CW_TRUNC;
12081 case I387_CW_FLOOR:
12082 /* round down toward -oo */
12083 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12084 slot = SLOT_CW_FLOOR;
12088 /* round up toward +oo */
12089 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12090 slot = SLOT_CW_CEIL;
12093 case I387_CW_MASK_PM:
12094 /* mask precision exception for nearbyint() */
12095 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12096 slot = SLOT_CW_MASK_PM;
12100 gcc_unreachable ();
12104 gcc_assert (slot < MAX_386_STACK_LOCALS);
12106 new_mode = assign_386_stack_local (HImode, slot);
12107 emit_move_insn (new_mode, reg);
12110 /* Output code for INSN to convert a float to a signed int. OPERANDS
12111 are the insn operands. The output may be [HSD]Imode and the input
12112 operand may be [SDX]Fmode. */
12115 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12117 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12118 int dimode_p = GET_MODE (operands[0]) == DImode;
12119 int round_mode = get_attr_i387_cw (insn);
12121 /* Jump through a hoop or two for DImode, since the hardware has no
12122 non-popping instruction. We used to do this a different way, but
12123 that was somewhat fragile and broke with post-reload splitters. */
12124 if ((dimode_p || fisttp) && !stack_top_dies)
12125 output_asm_insn ("fld\t%y1", operands);
12127 gcc_assert (STACK_TOP_P (operands[1]));
12128 gcc_assert (MEM_P (operands[0]));
12129 gcc_assert (GET_MODE (operands[1]) != TFmode);
12132 output_asm_insn ("fisttp%Z0\t%0", operands);
12135 if (round_mode != I387_CW_ANY)
12136 output_asm_insn ("fldcw\t%3", operands);
12137 if (stack_top_dies || dimode_p)
12138 output_asm_insn ("fistp%Z0\t%0", operands);
12140 output_asm_insn ("fist%Z0\t%0", operands);
12141 if (round_mode != I387_CW_ANY)
12142 output_asm_insn ("fldcw\t%2", operands);
12148 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12149 have the values zero or one, indicates the ffreep insn's operand
12150 from the OPERANDS array. */
12152 static const char *
12153 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12155 if (TARGET_USE_FFREEP)
12156 #if HAVE_AS_IX86_FFREEP
12157 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12160 static char retval[] = ".word\t0xc_df";
12161 int regno = REGNO (operands[opno]);
12163 gcc_assert (FP_REGNO_P (regno));
12165 retval[9] = '0' + (regno - FIRST_STACK_REG);
12170 return opno ? "fstp\t%y1" : "fstp\t%y0";
12174 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12175 should be used. UNORDERED_P is true when fucom should be used. */
12178 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12180 int stack_top_dies;
12181 rtx cmp_op0, cmp_op1;
12182 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12186 cmp_op0 = operands[0];
12187 cmp_op1 = operands[1];
12191 cmp_op0 = operands[1];
12192 cmp_op1 = operands[2];
12197 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12198 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12199 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12200 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12202 if (GET_MODE (operands[0]) == SFmode)
12204 return &ucomiss[TARGET_AVX ? 0 : 1];
12206 return &comiss[TARGET_AVX ? 0 : 1];
12209 return &ucomisd[TARGET_AVX ? 0 : 1];
12211 return &comisd[TARGET_AVX ? 0 : 1];
12214 gcc_assert (STACK_TOP_P (cmp_op0));
12216 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12218 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12220 if (stack_top_dies)
12222 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12223 return output_387_ffreep (operands, 1);
12226 return "ftst\n\tfnstsw\t%0";
12229 if (STACK_REG_P (cmp_op1)
12231 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12232 && REGNO (cmp_op1) != FIRST_STACK_REG)
12234 /* If both the top of the 387 stack dies, and the other operand
12235 is also a stack register that dies, then this must be a
12236 `fcompp' float compare */
12240 /* There is no double popping fcomi variant. Fortunately,
12241 eflags is immune from the fstp's cc clobbering. */
12243 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12245 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12246 return output_387_ffreep (operands, 0);
12251 return "fucompp\n\tfnstsw\t%0";
12253 return "fcompp\n\tfnstsw\t%0";
12258 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12260 static const char * const alt[16] =
12262 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12263 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12264 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12265 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12267 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12268 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12272 "fcomi\t{%y1, %0|%0, %y1}",
12273 "fcomip\t{%y1, %0|%0, %y1}",
12274 "fucomi\t{%y1, %0|%0, %y1}",
12275 "fucomip\t{%y1, %0|%0, %y1}",
12286 mask = eflags_p << 3;
12287 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12288 mask |= unordered_p << 1;
12289 mask |= stack_top_dies;
12291 gcc_assert (mask < 16);
12300 ix86_output_addr_vec_elt (FILE *file, int value)
12302 const char *directive = ASM_LONG;
12306 directive = ASM_QUAD;
12308 gcc_assert (!TARGET_64BIT);
12311 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12315 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12317 const char *directive = ASM_LONG;
12320 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12321 directive = ASM_QUAD;
12323 gcc_assert (!TARGET_64BIT);
12325 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12326 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12327 fprintf (file, "%s%s%d-%s%d\n",
12328 directive, LPREFIX, value, LPREFIX, rel);
12329 else if (HAVE_AS_GOTOFF_IN_DATA)
12330 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12332 else if (TARGET_MACHO)
12334 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12335 machopic_output_function_base_name (file);
12336 fprintf(file, "\n");
12340 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12341 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12344 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12348 ix86_expand_clear (rtx dest)
12352 /* We play register width games, which are only valid after reload. */
12353 gcc_assert (reload_completed);
12355 /* Avoid HImode and its attendant prefix byte. */
12356 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12357 dest = gen_rtx_REG (SImode, REGNO (dest));
12358 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12360 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12361 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12363 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12364 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12370 /* X is an unchanging MEM. If it is a constant pool reference, return
12371 the constant pool rtx, else NULL. */
12374 maybe_get_pool_constant (rtx x)
12376 x = ix86_delegitimize_address (XEXP (x, 0));
12378 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12379 return get_pool_constant (x);
12385 ix86_expand_move (enum machine_mode mode, rtx operands[])
12388 enum tls_model model;
12393 if (GET_CODE (op1) == SYMBOL_REF)
12395 model = SYMBOL_REF_TLS_MODEL (op1);
12398 op1 = legitimize_tls_address (op1, model, true);
12399 op1 = force_operand (op1, op0);
12403 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12404 && SYMBOL_REF_DLLIMPORT_P (op1))
12405 op1 = legitimize_dllimport_symbol (op1, false);
12407 else if (GET_CODE (op1) == CONST
12408 && GET_CODE (XEXP (op1, 0)) == PLUS
12409 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12411 rtx addend = XEXP (XEXP (op1, 0), 1);
12412 rtx symbol = XEXP (XEXP (op1, 0), 0);
12415 model = SYMBOL_REF_TLS_MODEL (symbol);
12417 tmp = legitimize_tls_address (symbol, model, true);
12418 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12419 && SYMBOL_REF_DLLIMPORT_P (symbol))
12420 tmp = legitimize_dllimport_symbol (symbol, true);
12424 tmp = force_operand (tmp, NULL);
12425 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12426 op0, 1, OPTAB_DIRECT);
12432 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12434 if (TARGET_MACHO && !TARGET_64BIT)
12439 rtx temp = ((reload_in_progress
12440 || ((op0 && REG_P (op0))
12442 ? op0 : gen_reg_rtx (Pmode));
12443 op1 = machopic_indirect_data_reference (op1, temp);
12444 op1 = machopic_legitimize_pic_address (op1, mode,
12445 temp == op1 ? 0 : temp);
12447 else if (MACHOPIC_INDIRECT)
12448 op1 = machopic_indirect_data_reference (op1, 0);
12456 op1 = force_reg (Pmode, op1);
12457 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12459 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12460 op1 = legitimize_pic_address (op1, reg);
12469 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12470 || !push_operand (op0, mode))
12472 op1 = force_reg (mode, op1);
12474 if (push_operand (op0, mode)
12475 && ! general_no_elim_operand (op1, mode))
12476 op1 = copy_to_mode_reg (mode, op1);
12478 /* Force large constants in 64bit compilation into register
12479 to get them CSEed. */
12480 if (can_create_pseudo_p ()
12481 && (mode == DImode) && TARGET_64BIT
12482 && immediate_operand (op1, mode)
12483 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12484 && !register_operand (op0, mode)
12486 op1 = copy_to_mode_reg (mode, op1);
12488 if (can_create_pseudo_p ()
12489 && FLOAT_MODE_P (mode)
12490 && GET_CODE (op1) == CONST_DOUBLE)
12492 /* If we are loading a floating point constant to a register,
12493 force the value to memory now, since we'll get better code
12494 out the back end. */
12496 op1 = validize_mem (force_const_mem (mode, op1));
12497 if (!register_operand (op0, mode))
12499 rtx temp = gen_reg_rtx (mode);
12500 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12501 emit_move_insn (op0, temp);
12507 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12511 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12513 rtx op0 = operands[0], op1 = operands[1];
12514 unsigned int align = GET_MODE_ALIGNMENT (mode);
12516 /* Force constants other than zero into memory. We do not know how
12517 the instructions used to build constants modify the upper 64 bits
12518 of the register, once we have that information we may be able
12519 to handle some of them more efficiently. */
12520 if (can_create_pseudo_p ()
12521 && register_operand (op0, mode)
12522 && (CONSTANT_P (op1)
12523 || (GET_CODE (op1) == SUBREG
12524 && CONSTANT_P (SUBREG_REG (op1))))
12525 && standard_sse_constant_p (op1) <= 0)
12526 op1 = validize_mem (force_const_mem (mode, op1));
12528 /* We need to check memory alignment for SSE mode since attribute
12529 can make operands unaligned. */
12530 if (can_create_pseudo_p ()
12531 && SSE_REG_MODE_P (mode)
12532 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12533 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12537 /* ix86_expand_vector_move_misalign() does not like constants ... */
12538 if (CONSTANT_P (op1)
12539 || (GET_CODE (op1) == SUBREG
12540 && CONSTANT_P (SUBREG_REG (op1))))
12541 op1 = validize_mem (force_const_mem (mode, op1));
12543 /* ... nor both arguments in memory. */
12544 if (!register_operand (op0, mode)
12545 && !register_operand (op1, mode))
12546 op1 = force_reg (mode, op1);
12548 tmp[0] = op0; tmp[1] = op1;
12549 ix86_expand_vector_move_misalign (mode, tmp);
12553 /* Make operand1 a register if it isn't already. */
12554 if (can_create_pseudo_p ()
12555 && !register_operand (op0, mode)
12556 && !register_operand (op1, mode))
12558 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12562 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12565 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12566 straight to ix86_expand_vector_move. */
12567 /* Code generation for scalar reg-reg moves of single and double precision data:
12568 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12572 if (x86_sse_partial_reg_dependency == true)
12577 Code generation for scalar loads of double precision data:
12578 if (x86_sse_split_regs == true)
12579 movlpd mem, reg (gas syntax)
12583 Code generation for unaligned packed loads of single precision data
12584 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12585 if (x86_sse_unaligned_move_optimal)
12588 if (x86_sse_partial_reg_dependency == true)
12600 Code generation for unaligned packed loads of double precision data
12601 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12602 if (x86_sse_unaligned_move_optimal)
12605 if (x86_sse_split_regs == true)
12618 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12627 switch (GET_MODE_CLASS (mode))
12629 case MODE_VECTOR_INT:
12631 switch (GET_MODE_SIZE (mode))
12634 op0 = gen_lowpart (V16QImode, op0);
12635 op1 = gen_lowpart (V16QImode, op1);
12636 emit_insn (gen_avx_movdqu (op0, op1));
12639 op0 = gen_lowpart (V32QImode, op0);
12640 op1 = gen_lowpart (V32QImode, op1);
12641 emit_insn (gen_avx_movdqu256 (op0, op1));
12644 gcc_unreachable ();
12647 case MODE_VECTOR_FLOAT:
12648 op0 = gen_lowpart (mode, op0);
12649 op1 = gen_lowpart (mode, op1);
12654 emit_insn (gen_avx_movups (op0, op1));
12657 emit_insn (gen_avx_movups256 (op0, op1));
12660 emit_insn (gen_avx_movupd (op0, op1));
12663 emit_insn (gen_avx_movupd256 (op0, op1));
12666 gcc_unreachable ();
12671 gcc_unreachable ();
12679 /* If we're optimizing for size, movups is the smallest. */
12680 if (optimize_insn_for_size_p ())
12682 op0 = gen_lowpart (V4SFmode, op0);
12683 op1 = gen_lowpart (V4SFmode, op1);
12684 emit_insn (gen_sse_movups (op0, op1));
12688 /* ??? If we have typed data, then it would appear that using
12689 movdqu is the only way to get unaligned data loaded with
12691 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12693 op0 = gen_lowpart (V16QImode, op0);
12694 op1 = gen_lowpart (V16QImode, op1);
12695 emit_insn (gen_sse2_movdqu (op0, op1));
12699 if (TARGET_SSE2 && mode == V2DFmode)
12703 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12705 op0 = gen_lowpart (V2DFmode, op0);
12706 op1 = gen_lowpart (V2DFmode, op1);
12707 emit_insn (gen_sse2_movupd (op0, op1));
12711 /* When SSE registers are split into halves, we can avoid
12712 writing to the top half twice. */
12713 if (TARGET_SSE_SPLIT_REGS)
12715 emit_clobber (op0);
12720 /* ??? Not sure about the best option for the Intel chips.
12721 The following would seem to satisfy; the register is
12722 entirely cleared, breaking the dependency chain. We
12723 then store to the upper half, with a dependency depth
12724 of one. A rumor has it that Intel recommends two movsd
12725 followed by an unpacklpd, but this is unconfirmed. And
12726 given that the dependency depth of the unpacklpd would
12727 still be one, I'm not sure why this would be better. */
12728 zero = CONST0_RTX (V2DFmode);
12731 m = adjust_address (op1, DFmode, 0);
12732 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12733 m = adjust_address (op1, DFmode, 8);
12734 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12738 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12740 op0 = gen_lowpart (V4SFmode, op0);
12741 op1 = gen_lowpart (V4SFmode, op1);
12742 emit_insn (gen_sse_movups (op0, op1));
12746 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12747 emit_move_insn (op0, CONST0_RTX (mode));
12749 emit_clobber (op0);
12751 if (mode != V4SFmode)
12752 op0 = gen_lowpart (V4SFmode, op0);
12753 m = adjust_address (op1, V2SFmode, 0);
12754 emit_insn (gen_sse_loadlps (op0, op0, m));
12755 m = adjust_address (op1, V2SFmode, 8);
12756 emit_insn (gen_sse_loadhps (op0, op0, m));
12759 else if (MEM_P (op0))
12761 /* If we're optimizing for size, movups is the smallest. */
12762 if (optimize_insn_for_size_p ())
12764 op0 = gen_lowpart (V4SFmode, op0);
12765 op1 = gen_lowpart (V4SFmode, op1);
12766 emit_insn (gen_sse_movups (op0, op1));
12770 /* ??? Similar to above, only less clear because of quote
12771 typeless stores unquote. */
12772 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12773 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12775 op0 = gen_lowpart (V16QImode, op0);
12776 op1 = gen_lowpart (V16QImode, op1);
12777 emit_insn (gen_sse2_movdqu (op0, op1));
12781 if (TARGET_SSE2 && mode == V2DFmode)
12783 m = adjust_address (op0, DFmode, 0);
12784 emit_insn (gen_sse2_storelpd (m, op1));
12785 m = adjust_address (op0, DFmode, 8);
12786 emit_insn (gen_sse2_storehpd (m, op1));
12790 if (mode != V4SFmode)
12791 op1 = gen_lowpart (V4SFmode, op1);
12792 m = adjust_address (op0, V2SFmode, 0);
12793 emit_insn (gen_sse_storelps (m, op1));
12794 m = adjust_address (op0, V2SFmode, 8);
12795 emit_insn (gen_sse_storehps (m, op1));
12799 gcc_unreachable ();
12802 /* Expand a push in MODE. This is some mode for which we do not support
12803 proper push instructions, at least from the registers that we expect
12804 the value to live in. */
12807 ix86_expand_push (enum machine_mode mode, rtx x)
12811 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12812 GEN_INT (-GET_MODE_SIZE (mode)),
12813 stack_pointer_rtx, 1, OPTAB_DIRECT);
12814 if (tmp != stack_pointer_rtx)
12815 emit_move_insn (stack_pointer_rtx, tmp);
12817 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12819 /* When we push an operand onto stack, it has to be aligned at least
12820 at the function argument boundary. However since we don't have
12821 the argument type, we can't determine the actual argument
12823 emit_move_insn (tmp, x);
12826 /* Helper function of ix86_fixup_binary_operands to canonicalize
12827 operand order. Returns true if the operands should be swapped. */
12830 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12833 rtx dst = operands[0];
12834 rtx src1 = operands[1];
12835 rtx src2 = operands[2];
12837 /* If the operation is not commutative, we can't do anything. */
12838 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12841 /* Highest priority is that src1 should match dst. */
12842 if (rtx_equal_p (dst, src1))
12844 if (rtx_equal_p (dst, src2))
12847 /* Next highest priority is that immediate constants come second. */
12848 if (immediate_operand (src2, mode))
12850 if (immediate_operand (src1, mode))
12853 /* Lowest priority is that memory references should come second. */
12863 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12864 destination to use for the operation. If different from the true
12865 destination in operands[0], a copy operation will be required. */
12868 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12871 rtx dst = operands[0];
12872 rtx src1 = operands[1];
12873 rtx src2 = operands[2];
12875 /* Canonicalize operand order. */
12876 if (ix86_swap_binary_operands_p (code, mode, operands))
12880 /* It is invalid to swap operands of different modes. */
12881 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12888 /* Both source operands cannot be in memory. */
12889 if (MEM_P (src1) && MEM_P (src2))
12891 /* Optimization: Only read from memory once. */
12892 if (rtx_equal_p (src1, src2))
12894 src2 = force_reg (mode, src2);
12898 src2 = force_reg (mode, src2);
12901 /* If the destination is memory, and we do not have matching source
12902 operands, do things in registers. */
12903 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12904 dst = gen_reg_rtx (mode);
12906 /* Source 1 cannot be a constant. */
12907 if (CONSTANT_P (src1))
12908 src1 = force_reg (mode, src1);
12910 /* Source 1 cannot be a non-matching memory. */
12911 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12912 src1 = force_reg (mode, src1);
12914 operands[1] = src1;
12915 operands[2] = src2;
12919 /* Similarly, but assume that the destination has already been
12920 set up properly. */
12923 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12924 enum machine_mode mode, rtx operands[])
12926 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12927 gcc_assert (dst == operands[0]);
12930 /* Attempt to expand a binary operator. Make the expansion closer to the
12931 actual machine, then just general_operand, which will allow 3 separate
12932 memory references (one output, two input) in a single insn. */
12935 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12938 rtx src1, src2, dst, op, clob;
12940 dst = ix86_fixup_binary_operands (code, mode, operands);
12941 src1 = operands[1];
12942 src2 = operands[2];
12944 /* Emit the instruction. */
12946 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12947 if (reload_in_progress)
12949 /* Reload doesn't know about the flags register, and doesn't know that
12950 it doesn't want to clobber it. We can only do this with PLUS. */
12951 gcc_assert (code == PLUS);
12956 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12957 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12960 /* Fix up the destination if needed. */
12961 if (dst != operands[0])
12962 emit_move_insn (operands[0], dst);
12965 /* Return TRUE or FALSE depending on whether the binary operator meets the
12966 appropriate constraints. */
12969 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12972 rtx dst = operands[0];
12973 rtx src1 = operands[1];
12974 rtx src2 = operands[2];
12976 /* Both source operands cannot be in memory. */
12977 if (MEM_P (src1) && MEM_P (src2))
12980 /* Canonicalize operand order for commutative operators. */
12981 if (ix86_swap_binary_operands_p (code, mode, operands))
12988 /* If the destination is memory, we must have a matching source operand. */
12989 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12992 /* Source 1 cannot be a constant. */
12993 if (CONSTANT_P (src1))
12996 /* Source 1 cannot be a non-matching memory. */
12997 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13003 /* Attempt to expand a unary operator. Make the expansion closer to the
13004 actual machine, then just general_operand, which will allow 2 separate
13005 memory references (one output, one input) in a single insn. */
13008 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13011 int matching_memory;
13012 rtx src, dst, op, clob;
13017 /* If the destination is memory, and we do not have matching source
13018 operands, do things in registers. */
13019 matching_memory = 0;
13022 if (rtx_equal_p (dst, src))
13023 matching_memory = 1;
13025 dst = gen_reg_rtx (mode);
13028 /* When source operand is memory, destination must match. */
13029 if (MEM_P (src) && !matching_memory)
13030 src = force_reg (mode, src);
13032 /* Emit the instruction. */
13034 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13035 if (reload_in_progress || code == NOT)
13037 /* Reload doesn't know about the flags register, and doesn't know that
13038 it doesn't want to clobber it. */
13039 gcc_assert (code == NOT);
13044 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13045 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13048 /* Fix up the destination if needed. */
13049 if (dst != operands[0])
13050 emit_move_insn (operands[0], dst);
13053 #define LEA_SEARCH_THRESHOLD 12
13055 /* Search backward for non-agu definition of register number REGNO1
13056 or register number REGNO2 in INSN's basic block until
13057 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13058 2. Reach BB boundary, or
13059 3. Reach agu definition.
13060 Returns the distance between the non-agu definition point and INSN.
13061 If no definition point, returns -1. */
13064 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13067 basic_block bb = BLOCK_FOR_INSN (insn);
13070 enum attr_type insn_type;
13072 if (insn != BB_HEAD (bb))
13074 rtx prev = PREV_INSN (insn);
13075 while (prev && distance < LEA_SEARCH_THRESHOLD)
13080 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13081 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13082 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13083 && (regno1 == DF_REF_REGNO (*def_rec)
13084 || regno2 == DF_REF_REGNO (*def_rec)))
13086 insn_type = get_attr_type (prev);
13087 if (insn_type != TYPE_LEA)
13091 if (prev == BB_HEAD (bb))
13093 prev = PREV_INSN (prev);
13097 if (distance < LEA_SEARCH_THRESHOLD)
13101 bool simple_loop = false;
13103 FOR_EACH_EDGE (e, ei, bb->preds)
13106 simple_loop = true;
13112 rtx prev = BB_END (bb);
13115 && distance < LEA_SEARCH_THRESHOLD)
13120 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13121 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13122 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13123 && (regno1 == DF_REF_REGNO (*def_rec)
13124 || regno2 == DF_REF_REGNO (*def_rec)))
13126 insn_type = get_attr_type (prev);
13127 if (insn_type != TYPE_LEA)
13131 prev = PREV_INSN (prev);
13139 /* get_attr_type may modify recog data. We want to make sure
13140 that recog data is valid for instruction INSN, on which
13141 distance_non_agu_define is called. INSN is unchanged here. */
13142 extract_insn_cached (insn);
13146 /* Return the distance between INSN and the next insn that uses
13147 register number REGNO0 in memory address. Return -1 if no such
13148 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13151 distance_agu_use (unsigned int regno0, rtx insn)
13153 basic_block bb = BLOCK_FOR_INSN (insn);
13158 if (insn != BB_END (bb))
13160 rtx next = NEXT_INSN (insn);
13161 while (next && distance < LEA_SEARCH_THRESHOLD)
13167 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13168 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13169 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13170 && regno0 == DF_REF_REGNO (*use_rec))
13172 /* Return DISTANCE if OP0 is used in memory
13173 address in NEXT. */
13177 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13178 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13179 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13180 && regno0 == DF_REF_REGNO (*def_rec))
13182 /* Return -1 if OP0 is set in NEXT. */
13186 if (next == BB_END (bb))
13188 next = NEXT_INSN (next);
13192 if (distance < LEA_SEARCH_THRESHOLD)
13196 bool simple_loop = false;
13198 FOR_EACH_EDGE (e, ei, bb->succs)
13201 simple_loop = true;
13207 rtx next = BB_HEAD (bb);
13210 && distance < LEA_SEARCH_THRESHOLD)
13216 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13217 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13218 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13219 && regno0 == DF_REF_REGNO (*use_rec))
13221 /* Return DISTANCE if OP0 is used in memory
13222 address in NEXT. */
13226 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13227 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13228 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13229 && regno0 == DF_REF_REGNO (*def_rec))
13231 /* Return -1 if OP0 is set in NEXT. */
13236 next = NEXT_INSN (next);
13244 /* Define this macro to tune LEA priority vs ADD, it take effect when
13245 there is a dilemma of choicing LEA or ADD
13246 Negative value: ADD is more preferred than LEA
13248 Positive value: LEA is more preferred than ADD*/
13249 #define IX86_LEA_PRIORITY 2
13251 /* Return true if it is ok to optimize an ADD operation to LEA
13252 operation to avoid flag register consumation. For the processors
13253 like ATOM, if the destination register of LEA holds an actual
13254 address which will be used soon, LEA is better and otherwise ADD
13258 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13259 rtx insn, rtx operands[])
13261 unsigned int regno0 = true_regnum (operands[0]);
13262 unsigned int regno1 = true_regnum (operands[1]);
13263 unsigned int regno2;
13265 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13266 return regno0 != regno1;
13268 regno2 = true_regnum (operands[2]);
13270 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13271 if (regno0 != regno1 && regno0 != regno2)
13275 int dist_define, dist_use;
13276 dist_define = distance_non_agu_define (regno1, regno2, insn);
13277 if (dist_define <= 0)
13280 /* If this insn has both backward non-agu dependence and forward
13281 agu dependence, the one with short distance take effect. */
13282 dist_use = distance_agu_use (regno0, insn);
13284 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13291 /* Return true if destination reg of SET_BODY is shift count of
13295 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13301 /* Retrieve destination of SET_BODY. */
13302 switch (GET_CODE (set_body))
13305 set_dest = SET_DEST (set_body);
13306 if (!set_dest || !REG_P (set_dest))
13310 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13311 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13319 /* Retrieve shift count of USE_BODY. */
13320 switch (GET_CODE (use_body))
13323 shift_rtx = XEXP (use_body, 1);
13326 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13327 if (ix86_dep_by_shift_count_body (set_body,
13328 XVECEXP (use_body, 0, i)))
13336 && (GET_CODE (shift_rtx) == ASHIFT
13337 || GET_CODE (shift_rtx) == LSHIFTRT
13338 || GET_CODE (shift_rtx) == ASHIFTRT
13339 || GET_CODE (shift_rtx) == ROTATE
13340 || GET_CODE (shift_rtx) == ROTATERT))
13342 rtx shift_count = XEXP (shift_rtx, 1);
13344 /* Return true if shift count is dest of SET_BODY. */
13345 if (REG_P (shift_count)
13346 && true_regnum (set_dest) == true_regnum (shift_count))
13353 /* Return true if destination reg of SET_INSN is shift count of
13357 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13359 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13360 PATTERN (use_insn));
13363 /* Return TRUE or FALSE depending on whether the unary operator meets the
13364 appropriate constraints. */
13367 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13368 enum machine_mode mode ATTRIBUTE_UNUSED,
13369 rtx operands[2] ATTRIBUTE_UNUSED)
13371 /* If one of operands is memory, source and destination must match. */
13372 if ((MEM_P (operands[0])
13373 || MEM_P (operands[1]))
13374 && ! rtx_equal_p (operands[0], operands[1]))
13379 /* Post-reload splitter for converting an SF or DFmode value in an
13380 SSE register into an unsigned SImode. */
13383 ix86_split_convert_uns_si_sse (rtx operands[])
13385 enum machine_mode vecmode;
13386 rtx value, large, zero_or_two31, input, two31, x;
13388 large = operands[1];
13389 zero_or_two31 = operands[2];
13390 input = operands[3];
13391 two31 = operands[4];
13392 vecmode = GET_MODE (large);
13393 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13395 /* Load up the value into the low element. We must ensure that the other
13396 elements are valid floats -- zero is the easiest such value. */
13399 if (vecmode == V4SFmode)
13400 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13402 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13406 input = gen_rtx_REG (vecmode, REGNO (input));
13407 emit_move_insn (value, CONST0_RTX (vecmode));
13408 if (vecmode == V4SFmode)
13409 emit_insn (gen_sse_movss (value, value, input));
13411 emit_insn (gen_sse2_movsd (value, value, input));
13414 emit_move_insn (large, two31);
13415 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13417 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13418 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13420 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13421 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13423 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13424 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13426 large = gen_rtx_REG (V4SImode, REGNO (large));
13427 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13429 x = gen_rtx_REG (V4SImode, REGNO (value));
13430 if (vecmode == V4SFmode)
13431 emit_insn (gen_sse2_cvttps2dq (x, value));
13433 emit_insn (gen_sse2_cvttpd2dq (x, value));
13436 emit_insn (gen_xorv4si3 (value, value, large));
13439 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13440 Expects the 64-bit DImode to be supplied in a pair of integral
13441 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13442 -mfpmath=sse, !optimize_size only. */
13445 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13447 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13448 rtx int_xmm, fp_xmm;
13449 rtx biases, exponents;
13452 int_xmm = gen_reg_rtx (V4SImode);
13453 if (TARGET_INTER_UNIT_MOVES)
13454 emit_insn (gen_movdi_to_sse (int_xmm, input));
13455 else if (TARGET_SSE_SPLIT_REGS)
13457 emit_clobber (int_xmm);
13458 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13462 x = gen_reg_rtx (V2DImode);
13463 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13464 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13467 x = gen_rtx_CONST_VECTOR (V4SImode,
13468 gen_rtvec (4, GEN_INT (0x43300000UL),
13469 GEN_INT (0x45300000UL),
13470 const0_rtx, const0_rtx));
13471 exponents = validize_mem (force_const_mem (V4SImode, x));
13473 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13474 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13476 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13477 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13478 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13479 (0x1.0p84 + double(fp_value_hi_xmm)).
13480 Note these exponents differ by 32. */
13482 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13484 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13485 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13486 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13487 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13488 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13489 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13490 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13491 biases = validize_mem (force_const_mem (V2DFmode, biases));
13492 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13494 /* Add the upper and lower DFmode values together. */
13496 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13499 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13500 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13501 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13504 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13507 /* Not used, but eases macroization of patterns. */
13509 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13510 rtx input ATTRIBUTE_UNUSED)
13512 gcc_unreachable ();
13515 /* Convert an unsigned SImode value into a DFmode. Only currently used
13516 for SSE, but applicable anywhere. */
13519 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13521 REAL_VALUE_TYPE TWO31r;
13524 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13525 NULL, 1, OPTAB_DIRECT);
13527 fp = gen_reg_rtx (DFmode);
13528 emit_insn (gen_floatsidf2 (fp, x));
13530 real_ldexp (&TWO31r, &dconst1, 31);
13531 x = const_double_from_real_value (TWO31r, DFmode);
13533 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13535 emit_move_insn (target, x);
13538 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13539 32-bit mode; otherwise we have a direct convert instruction. */
13542 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13544 REAL_VALUE_TYPE TWO32r;
13545 rtx fp_lo, fp_hi, x;
13547 fp_lo = gen_reg_rtx (DFmode);
13548 fp_hi = gen_reg_rtx (DFmode);
13550 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13552 real_ldexp (&TWO32r, &dconst1, 32);
13553 x = const_double_from_real_value (TWO32r, DFmode);
13554 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13556 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13558 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13561 emit_move_insn (target, x);
13564 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13565 For x86_32, -mfpmath=sse, !optimize_size only. */
13567 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13569 REAL_VALUE_TYPE ONE16r;
13570 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13572 real_ldexp (&ONE16r, &dconst1, 16);
13573 x = const_double_from_real_value (ONE16r, SFmode);
13574 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13575 NULL, 0, OPTAB_DIRECT);
13576 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13577 NULL, 0, OPTAB_DIRECT);
13578 fp_hi = gen_reg_rtx (SFmode);
13579 fp_lo = gen_reg_rtx (SFmode);
13580 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13581 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13582 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13584 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13586 if (!rtx_equal_p (target, fp_hi))
13587 emit_move_insn (target, fp_hi);
13590 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13591 then replicate the value for all elements of the vector
13595 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13602 v = gen_rtvec (4, value, value, value, value);
13603 return gen_rtx_CONST_VECTOR (V4SImode, v);
13607 v = gen_rtvec (2, value, value);
13608 return gen_rtx_CONST_VECTOR (V2DImode, v);
13612 v = gen_rtvec (4, value, value, value, value);
13614 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13615 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13616 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13620 v = gen_rtvec (2, value, value);
13622 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13623 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13626 gcc_unreachable ();
13630 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13631 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13632 for an SSE register. If VECT is true, then replicate the mask for
13633 all elements of the vector register. If INVERT is true, then create
13634 a mask excluding the sign bit. */
13637 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13639 enum machine_mode vec_mode, imode;
13640 HOST_WIDE_INT hi, lo;
13645 /* Find the sign bit, sign extended to 2*HWI. */
13651 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13652 lo = 0x80000000, hi = lo < 0;
13658 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13659 if (HOST_BITS_PER_WIDE_INT >= 64)
13660 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13662 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13667 vec_mode = VOIDmode;
13668 if (HOST_BITS_PER_WIDE_INT >= 64)
13671 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13678 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13682 lo = ~lo, hi = ~hi;
13688 mask = immed_double_const (lo, hi, imode);
13690 vec = gen_rtvec (2, v, mask);
13691 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13692 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13699 gcc_unreachable ();
13703 lo = ~lo, hi = ~hi;
13705 /* Force this value into the low part of a fp vector constant. */
13706 mask = immed_double_const (lo, hi, imode);
13707 mask = gen_lowpart (mode, mask);
13709 if (vec_mode == VOIDmode)
13710 return force_reg (mode, mask);
13712 v = ix86_build_const_vector (mode, vect, mask);
13713 return force_reg (vec_mode, v);
13716 /* Generate code for floating point ABS or NEG. */
13719 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13722 rtx mask, set, use, clob, dst, src;
13723 bool use_sse = false;
13724 bool vector_mode = VECTOR_MODE_P (mode);
13725 enum machine_mode elt_mode = mode;
13729 elt_mode = GET_MODE_INNER (mode);
13732 else if (mode == TFmode)
13734 else if (TARGET_SSE_MATH)
13735 use_sse = SSE_FLOAT_MODE_P (mode);
13737 /* NEG and ABS performed with SSE use bitwise mask operations.
13738 Create the appropriate mask now. */
13740 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13749 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13750 set = gen_rtx_SET (VOIDmode, dst, set);
13755 set = gen_rtx_fmt_e (code, mode, src);
13756 set = gen_rtx_SET (VOIDmode, dst, set);
13759 use = gen_rtx_USE (VOIDmode, mask);
13760 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13761 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13762 gen_rtvec (3, set, use, clob)));
13769 /* Expand a copysign operation. Special case operand 0 being a constant. */
13772 ix86_expand_copysign (rtx operands[])
13774 enum machine_mode mode;
13775 rtx dest, op0, op1, mask, nmask;
13777 dest = operands[0];
13781 mode = GET_MODE (dest);
13783 if (GET_CODE (op0) == CONST_DOUBLE)
13785 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13787 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13788 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13790 if (mode == SFmode || mode == DFmode)
13792 enum machine_mode vmode;
13794 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13796 if (op0 == CONST0_RTX (mode))
13797 op0 = CONST0_RTX (vmode);
13802 if (mode == SFmode)
13803 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13804 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13806 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13808 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13811 else if (op0 != CONST0_RTX (mode))
13812 op0 = force_reg (mode, op0);
13814 mask = ix86_build_signbit_mask (mode, 0, 0);
13816 if (mode == SFmode)
13817 copysign_insn = gen_copysignsf3_const;
13818 else if (mode == DFmode)
13819 copysign_insn = gen_copysigndf3_const;
13821 copysign_insn = gen_copysigntf3_const;
13823 emit_insn (copysign_insn (dest, op0, op1, mask));
13827 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13829 nmask = ix86_build_signbit_mask (mode, 0, 1);
13830 mask = ix86_build_signbit_mask (mode, 0, 0);
13832 if (mode == SFmode)
13833 copysign_insn = gen_copysignsf3_var;
13834 else if (mode == DFmode)
13835 copysign_insn = gen_copysigndf3_var;
13837 copysign_insn = gen_copysigntf3_var;
13839 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13843 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13844 be a constant, and so has already been expanded into a vector constant. */
13847 ix86_split_copysign_const (rtx operands[])
13849 enum machine_mode mode, vmode;
13850 rtx dest, op0, op1, mask, x;
13852 dest = operands[0];
13855 mask = operands[3];
13857 mode = GET_MODE (dest);
13858 vmode = GET_MODE (mask);
13860 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13861 x = gen_rtx_AND (vmode, dest, mask);
13862 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13864 if (op0 != CONST0_RTX (vmode))
13866 x = gen_rtx_IOR (vmode, dest, op0);
13867 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13871 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13872 so we have to do two masks. */
13875 ix86_split_copysign_var (rtx operands[])
13877 enum machine_mode mode, vmode;
13878 rtx dest, scratch, op0, op1, mask, nmask, x;
13880 dest = operands[0];
13881 scratch = operands[1];
13884 nmask = operands[4];
13885 mask = operands[5];
13887 mode = GET_MODE (dest);
13888 vmode = GET_MODE (mask);
13890 if (rtx_equal_p (op0, op1))
13892 /* Shouldn't happen often (it's useless, obviously), but when it does
13893 we'd generate incorrect code if we continue below. */
13894 emit_move_insn (dest, op0);
13898 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13900 gcc_assert (REGNO (op1) == REGNO (scratch));
13902 x = gen_rtx_AND (vmode, scratch, mask);
13903 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13906 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13907 x = gen_rtx_NOT (vmode, dest);
13908 x = gen_rtx_AND (vmode, x, op0);
13909 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13913 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13915 x = gen_rtx_AND (vmode, scratch, mask);
13917 else /* alternative 2,4 */
13919 gcc_assert (REGNO (mask) == REGNO (scratch));
13920 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13921 x = gen_rtx_AND (vmode, scratch, op1);
13923 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13925 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13927 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13928 x = gen_rtx_AND (vmode, dest, nmask);
13930 else /* alternative 3,4 */
13932 gcc_assert (REGNO (nmask) == REGNO (dest));
13934 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13935 x = gen_rtx_AND (vmode, dest, op0);
13937 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13940 x = gen_rtx_IOR (vmode, dest, scratch);
13941 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13944 /* Return TRUE or FALSE depending on whether the first SET in INSN
13945 has source and destination with matching CC modes, and that the
13946 CC mode is at least as constrained as REQ_MODE. */
13949 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13952 enum machine_mode set_mode;
13954 set = PATTERN (insn);
13955 if (GET_CODE (set) == PARALLEL)
13956 set = XVECEXP (set, 0, 0);
13957 gcc_assert (GET_CODE (set) == SET);
13958 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13960 set_mode = GET_MODE (SET_DEST (set));
13964 if (req_mode != CCNOmode
13965 && (req_mode != CCmode
13966 || XEXP (SET_SRC (set), 1) != const0_rtx))
13970 if (req_mode == CCGCmode)
13974 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13978 if (req_mode == CCZmode)
13989 gcc_unreachable ();
13992 return (GET_MODE (SET_SRC (set)) == set_mode);
13995 /* Generate insn patterns to do an integer compare of OPERANDS. */
13998 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14000 enum machine_mode cmpmode;
14003 cmpmode = SELECT_CC_MODE (code, op0, op1);
14004 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14006 /* This is very simple, but making the interface the same as in the
14007 FP case makes the rest of the code easier. */
14008 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14009 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14011 /* Return the test that should be put into the flags user, i.e.
14012 the bcc, scc, or cmov instruction. */
14013 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14016 /* Figure out whether to use ordered or unordered fp comparisons.
14017 Return the appropriate mode to use. */
14020 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14022 /* ??? In order to make all comparisons reversible, we do all comparisons
14023 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14024 all forms trapping and nontrapping comparisons, we can make inequality
14025 comparisons trapping again, since it results in better code when using
14026 FCOM based compares. */
14027 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14031 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14033 enum machine_mode mode = GET_MODE (op0);
14035 if (SCALAR_FLOAT_MODE_P (mode))
14037 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14038 return ix86_fp_compare_mode (code);
14043 /* Only zero flag is needed. */
14044 case EQ: /* ZF=0 */
14045 case NE: /* ZF!=0 */
14047 /* Codes needing carry flag. */
14048 case GEU: /* CF=0 */
14049 case LTU: /* CF=1 */
14050 /* Detect overflow checks. They need just the carry flag. */
14051 if (GET_CODE (op0) == PLUS
14052 && rtx_equal_p (op1, XEXP (op0, 0)))
14056 case GTU: /* CF=0 & ZF=0 */
14057 case LEU: /* CF=1 | ZF=1 */
14058 /* Detect overflow checks. They need just the carry flag. */
14059 if (GET_CODE (op0) == MINUS
14060 && rtx_equal_p (op1, XEXP (op0, 0)))
14064 /* Codes possibly doable only with sign flag when
14065 comparing against zero. */
14066 case GE: /* SF=OF or SF=0 */
14067 case LT: /* SF<>OF or SF=1 */
14068 if (op1 == const0_rtx)
14071 /* For other cases Carry flag is not required. */
14073 /* Codes doable only with sign flag when comparing
14074 against zero, but we miss jump instruction for it
14075 so we need to use relational tests against overflow
14076 that thus needs to be zero. */
14077 case GT: /* ZF=0 & SF=OF */
14078 case LE: /* ZF=1 | SF<>OF */
14079 if (op1 == const0_rtx)
14083 /* strcmp pattern do (use flags) and combine may ask us for proper
14088 gcc_unreachable ();
14092 /* Return the fixed registers used for condition codes. */
14095 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14102 /* If two condition code modes are compatible, return a condition code
14103 mode which is compatible with both. Otherwise, return
14106 static enum machine_mode
14107 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14112 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14115 if ((m1 == CCGCmode && m2 == CCGOCmode)
14116 || (m1 == CCGOCmode && m2 == CCGCmode))
14122 gcc_unreachable ();
14152 /* These are only compatible with themselves, which we already
14158 /* Split comparison code CODE into comparisons we can do using branch
14159 instructions. BYPASS_CODE is comparison code for branch that will
14160 branch around FIRST_CODE and SECOND_CODE. If some of branches
14161 is not required, set value to UNKNOWN.
14162 We never require more than two branches. */
14165 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14166 enum rtx_code *first_code,
14167 enum rtx_code *second_code)
14169 *first_code = code;
14170 *bypass_code = UNKNOWN;
14171 *second_code = UNKNOWN;
14173 /* The fcomi comparison sets flags as follows:
14183 case GT: /* GTU - CF=0 & ZF=0 */
14184 case GE: /* GEU - CF=0 */
14185 case ORDERED: /* PF=0 */
14186 case UNORDERED: /* PF=1 */
14187 case UNEQ: /* EQ - ZF=1 */
14188 case UNLT: /* LTU - CF=1 */
14189 case UNLE: /* LEU - CF=1 | ZF=1 */
14190 case LTGT: /* EQ - ZF=0 */
14192 case LT: /* LTU - CF=1 - fails on unordered */
14193 *first_code = UNLT;
14194 *bypass_code = UNORDERED;
14196 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14197 *first_code = UNLE;
14198 *bypass_code = UNORDERED;
14200 case EQ: /* EQ - ZF=1 - fails on unordered */
14201 *first_code = UNEQ;
14202 *bypass_code = UNORDERED;
14204 case NE: /* NE - ZF=0 - fails on unordered */
14205 *first_code = LTGT;
14206 *second_code = UNORDERED;
14208 case UNGE: /* GEU - CF=0 - fails on unordered */
14210 *second_code = UNORDERED;
14212 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14214 *second_code = UNORDERED;
14217 gcc_unreachable ();
14219 if (!TARGET_IEEE_FP)
14221 *second_code = UNKNOWN;
14222 *bypass_code = UNKNOWN;
14226 /* Return cost of comparison done fcom + arithmetics operations on AX.
14227 All following functions do use number of instructions as a cost metrics.
14228 In future this should be tweaked to compute bytes for optimize_size and
14229 take into account performance of various instructions on various CPUs. */
14231 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14233 if (!TARGET_IEEE_FP)
14235 /* The cost of code output by ix86_expand_fp_compare. */
14259 gcc_unreachable ();
14263 /* Return cost of comparison done using fcomi operation.
14264 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14266 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14268 enum rtx_code bypass_code, first_code, second_code;
14269 /* Return arbitrarily high cost when instruction is not supported - this
14270 prevents gcc from using it. */
14273 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14274 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14277 /* Return cost of comparison done using sahf operation.
14278 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14280 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14282 enum rtx_code bypass_code, first_code, second_code;
14283 /* Return arbitrarily high cost when instruction is not preferred - this
14284 avoids gcc from using it. */
14285 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14287 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14288 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14291 /* Compute cost of the comparison done using any method.
14292 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14294 ix86_fp_comparison_cost (enum rtx_code code)
14296 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14299 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14300 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14302 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14303 if (min > sahf_cost)
14305 if (min > fcomi_cost)
14310 /* Return true if we should use an FCOMI instruction for this
14314 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14316 enum rtx_code swapped_code = swap_condition (code);
14318 return ((ix86_fp_comparison_cost (code)
14319 == ix86_fp_comparison_fcomi_cost (code))
14320 || (ix86_fp_comparison_cost (swapped_code)
14321 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14324 /* Swap, force into registers, or otherwise massage the two operands
14325 to a fp comparison. The operands are updated in place; the new
14326 comparison code is returned. */
14328 static enum rtx_code
14329 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14331 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14332 rtx op0 = *pop0, op1 = *pop1;
14333 enum machine_mode op_mode = GET_MODE (op0);
14334 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14336 /* All of the unordered compare instructions only work on registers.
14337 The same is true of the fcomi compare instructions. The XFmode
14338 compare instructions require registers except when comparing
14339 against zero or when converting operand 1 from fixed point to
14343 && (fpcmp_mode == CCFPUmode
14344 || (op_mode == XFmode
14345 && ! (standard_80387_constant_p (op0) == 1
14346 || standard_80387_constant_p (op1) == 1)
14347 && GET_CODE (op1) != FLOAT)
14348 || ix86_use_fcomi_compare (code)))
14350 op0 = force_reg (op_mode, op0);
14351 op1 = force_reg (op_mode, op1);
14355 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14356 things around if they appear profitable, otherwise force op0
14357 into a register. */
14359 if (standard_80387_constant_p (op0) == 0
14361 && ! (standard_80387_constant_p (op1) == 0
14365 tmp = op0, op0 = op1, op1 = tmp;
14366 code = swap_condition (code);
14370 op0 = force_reg (op_mode, op0);
14372 if (CONSTANT_P (op1))
14374 int tmp = standard_80387_constant_p (op1);
14376 op1 = validize_mem (force_const_mem (op_mode, op1));
14380 op1 = force_reg (op_mode, op1);
14383 op1 = force_reg (op_mode, op1);
14387 /* Try to rearrange the comparison to make it cheaper. */
14388 if (ix86_fp_comparison_cost (code)
14389 > ix86_fp_comparison_cost (swap_condition (code))
14390 && (REG_P (op1) || can_create_pseudo_p ()))
14393 tmp = op0, op0 = op1, op1 = tmp;
14394 code = swap_condition (code);
14396 op0 = force_reg (op_mode, op0);
14404 /* Convert comparison codes we use to represent FP comparison to integer
14405 code that will result in proper branch. Return UNKNOWN if no such code
14409 ix86_fp_compare_code_to_integer (enum rtx_code code)
14438 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14441 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14442 rtx *second_test, rtx *bypass_test)
14444 enum machine_mode fpcmp_mode, intcmp_mode;
14446 int cost = ix86_fp_comparison_cost (code);
14447 enum rtx_code bypass_code, first_code, second_code;
14449 fpcmp_mode = ix86_fp_compare_mode (code);
14450 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14453 *second_test = NULL_RTX;
14455 *bypass_test = NULL_RTX;
14457 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14459 /* Do fcomi/sahf based test when profitable. */
14460 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14461 && (bypass_code == UNKNOWN || bypass_test)
14462 && (second_code == UNKNOWN || second_test))
14464 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14465 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14471 gcc_assert (TARGET_SAHF);
14474 scratch = gen_reg_rtx (HImode);
14475 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14477 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14480 /* The FP codes work out to act like unsigned. */
14481 intcmp_mode = fpcmp_mode;
14483 if (bypass_code != UNKNOWN)
14484 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14485 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14487 if (second_code != UNKNOWN)
14488 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14489 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14494 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14495 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14496 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14498 scratch = gen_reg_rtx (HImode);
14499 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14501 /* In the unordered case, we have to check C2 for NaN's, which
14502 doesn't happen to work out to anything nice combination-wise.
14503 So do some bit twiddling on the value we've got in AH to come
14504 up with an appropriate set of condition codes. */
14506 intcmp_mode = CCNOmode;
14511 if (code == GT || !TARGET_IEEE_FP)
14513 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14518 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14519 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14520 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14521 intcmp_mode = CCmode;
14527 if (code == LT && TARGET_IEEE_FP)
14529 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14530 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14531 intcmp_mode = CCmode;
14536 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14542 if (code == GE || !TARGET_IEEE_FP)
14544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14549 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14550 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14557 if (code == LE && TARGET_IEEE_FP)
14559 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14560 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14561 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14562 intcmp_mode = CCmode;
14567 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14573 if (code == EQ && TARGET_IEEE_FP)
14575 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14576 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14577 intcmp_mode = CCmode;
14582 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14589 if (code == NE && TARGET_IEEE_FP)
14591 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14592 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14604 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14608 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14613 gcc_unreachable ();
14617 /* Return the test that should be put into the flags user, i.e.
14618 the bcc, scc, or cmov instruction. */
14619 return gen_rtx_fmt_ee (code, VOIDmode,
14620 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14625 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14628 op0 = ix86_compare_op0;
14629 op1 = ix86_compare_op1;
14632 *second_test = NULL_RTX;
14634 *bypass_test = NULL_RTX;
14636 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14637 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14639 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14641 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14642 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14643 second_test, bypass_test);
14646 ret = ix86_expand_int_compare (code, op0, op1);
14651 /* Return true if the CODE will result in nontrivial jump sequence. */
14653 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14655 enum rtx_code bypass_code, first_code, second_code;
14658 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14659 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14663 ix86_expand_branch (enum rtx_code code, rtx label)
14667 switch (GET_MODE (ix86_compare_op0))
14673 tmp = ix86_expand_compare (code, NULL, NULL);
14674 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14675 gen_rtx_LABEL_REF (VOIDmode, label),
14677 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14686 enum rtx_code bypass_code, first_code, second_code;
14688 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14689 &ix86_compare_op1);
14691 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14693 /* Check whether we will use the natural sequence with one jump. If
14694 so, we can expand jump early. Otherwise delay expansion by
14695 creating compound insn to not confuse optimizers. */
14696 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14698 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14699 gen_rtx_LABEL_REF (VOIDmode, label),
14700 pc_rtx, NULL_RTX, NULL_RTX);
14704 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14705 ix86_compare_op0, ix86_compare_op1);
14706 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14707 gen_rtx_LABEL_REF (VOIDmode, label),
14709 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14711 use_fcomi = ix86_use_fcomi_compare (code);
14712 vec = rtvec_alloc (3 + !use_fcomi);
14713 RTVEC_ELT (vec, 0) = tmp;
14715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14717 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14720 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14722 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14731 /* Expand DImode branch into multiple compare+branch. */
14733 rtx lo[2], hi[2], label2;
14734 enum rtx_code code1, code2, code3;
14735 enum machine_mode submode;
14737 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14739 tmp = ix86_compare_op0;
14740 ix86_compare_op0 = ix86_compare_op1;
14741 ix86_compare_op1 = tmp;
14742 code = swap_condition (code);
14744 if (GET_MODE (ix86_compare_op0) == DImode)
14746 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14747 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14752 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14753 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14757 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14758 avoid two branches. This costs one extra insn, so disable when
14759 optimizing for size. */
14761 if ((code == EQ || code == NE)
14762 && (!optimize_insn_for_size_p ()
14763 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14768 if (hi[1] != const0_rtx)
14769 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14770 NULL_RTX, 0, OPTAB_WIDEN);
14773 if (lo[1] != const0_rtx)
14774 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14775 NULL_RTX, 0, OPTAB_WIDEN);
14777 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14778 NULL_RTX, 0, OPTAB_WIDEN);
14780 ix86_compare_op0 = tmp;
14781 ix86_compare_op1 = const0_rtx;
14782 ix86_expand_branch (code, label);
14786 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14787 op1 is a constant and the low word is zero, then we can just
14788 examine the high word. Similarly for low word -1 and
14789 less-or-equal-than or greater-than. */
14791 if (CONST_INT_P (hi[1]))
14794 case LT: case LTU: case GE: case GEU:
14795 if (lo[1] == const0_rtx)
14797 ix86_compare_op0 = hi[0];
14798 ix86_compare_op1 = hi[1];
14799 ix86_expand_branch (code, label);
14803 case LE: case LEU: case GT: case GTU:
14804 if (lo[1] == constm1_rtx)
14806 ix86_compare_op0 = hi[0];
14807 ix86_compare_op1 = hi[1];
14808 ix86_expand_branch (code, label);
14816 /* Otherwise, we need two or three jumps. */
14818 label2 = gen_label_rtx ();
14821 code2 = swap_condition (code);
14822 code3 = unsigned_condition (code);
14826 case LT: case GT: case LTU: case GTU:
14829 case LE: code1 = LT; code2 = GT; break;
14830 case GE: code1 = GT; code2 = LT; break;
14831 case LEU: code1 = LTU; code2 = GTU; break;
14832 case GEU: code1 = GTU; code2 = LTU; break;
14834 case EQ: code1 = UNKNOWN; code2 = NE; break;
14835 case NE: code2 = UNKNOWN; break;
14838 gcc_unreachable ();
14843 * if (hi(a) < hi(b)) goto true;
14844 * if (hi(a) > hi(b)) goto false;
14845 * if (lo(a) < lo(b)) goto true;
14849 ix86_compare_op0 = hi[0];
14850 ix86_compare_op1 = hi[1];
14852 if (code1 != UNKNOWN)
14853 ix86_expand_branch (code1, label);
14854 if (code2 != UNKNOWN)
14855 ix86_expand_branch (code2, label2);
14857 ix86_compare_op0 = lo[0];
14858 ix86_compare_op1 = lo[1];
14859 ix86_expand_branch (code3, label);
14861 if (code2 != UNKNOWN)
14862 emit_label (label2);
14867 /* If we have already emitted a compare insn, go straight to simple.
14868 ix86_expand_compare won't emit anything if ix86_compare_emitted
14870 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14875 /* Split branch based on floating point condition. */
14877 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14878 rtx target1, rtx target2, rtx tmp, rtx pushed)
14880 rtx second, bypass;
14881 rtx label = NULL_RTX;
14883 int bypass_probability = -1, second_probability = -1, probability = -1;
14886 if (target2 != pc_rtx)
14889 code = reverse_condition_maybe_unordered (code);
14894 condition = ix86_expand_fp_compare (code, op1, op2,
14895 tmp, &second, &bypass);
14897 /* Remove pushed operand from stack. */
14899 ix86_free_from_memory (GET_MODE (pushed));
14901 if (split_branch_probability >= 0)
14903 /* Distribute the probabilities across the jumps.
14904 Assume the BYPASS and SECOND to be always test
14906 probability = split_branch_probability;
14908 /* Value of 1 is low enough to make no need for probability
14909 to be updated. Later we may run some experiments and see
14910 if unordered values are more frequent in practice. */
14912 bypass_probability = 1;
14914 second_probability = 1;
14916 if (bypass != NULL_RTX)
14918 label = gen_label_rtx ();
14919 i = emit_jump_insn (gen_rtx_SET
14921 gen_rtx_IF_THEN_ELSE (VOIDmode,
14923 gen_rtx_LABEL_REF (VOIDmode,
14926 if (bypass_probability >= 0)
14927 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14929 i = emit_jump_insn (gen_rtx_SET
14931 gen_rtx_IF_THEN_ELSE (VOIDmode,
14932 condition, target1, target2)));
14933 if (probability >= 0)
14934 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14935 if (second != NULL_RTX)
14937 i = emit_jump_insn (gen_rtx_SET
14939 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14941 if (second_probability >= 0)
14942 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14944 if (label != NULL_RTX)
14945 emit_label (label);
14949 ix86_expand_setcc (enum rtx_code code, rtx dest)
14951 rtx ret, tmp, tmpreg, equiv;
14952 rtx second_test, bypass_test;
14954 gcc_assert (GET_MODE (dest) == QImode);
14956 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14957 PUT_MODE (ret, QImode);
14962 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14963 if (bypass_test || second_test)
14965 rtx test = second_test;
14967 rtx tmp2 = gen_reg_rtx (QImode);
14970 gcc_assert (!second_test);
14971 test = bypass_test;
14973 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14975 PUT_MODE (test, QImode);
14976 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14979 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14981 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14984 /* Attach a REG_EQUAL note describing the comparison result. */
14985 if (ix86_compare_op0 && ix86_compare_op1)
14987 equiv = simplify_gen_relational (code, QImode,
14988 GET_MODE (ix86_compare_op0),
14989 ix86_compare_op0, ix86_compare_op1);
14990 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14994 /* Expand comparison setting or clearing carry flag. Return true when
14995 successful and set pop for the operation. */
14997 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14999 enum machine_mode mode =
15000 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15002 /* Do not handle DImode compares that go through special path. */
15003 if (mode == (TARGET_64BIT ? TImode : DImode))
15006 if (SCALAR_FLOAT_MODE_P (mode))
15008 rtx second_test = NULL, bypass_test = NULL;
15009 rtx compare_op, compare_seq;
15011 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15013 /* Shortcut: following common codes never translate
15014 into carry flag compares. */
15015 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15016 || code == ORDERED || code == UNORDERED)
15019 /* These comparisons require zero flag; swap operands so they won't. */
15020 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15021 && !TARGET_IEEE_FP)
15026 code = swap_condition (code);
15029 /* Try to expand the comparison and verify that we end up with
15030 carry flag based comparison. This fails to be true only when
15031 we decide to expand comparison using arithmetic that is not
15032 too common scenario. */
15034 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15035 &second_test, &bypass_test);
15036 compare_seq = get_insns ();
15039 if (second_test || bypass_test)
15042 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15043 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15044 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15046 code = GET_CODE (compare_op);
15048 if (code != LTU && code != GEU)
15051 emit_insn (compare_seq);
15056 if (!INTEGRAL_MODE_P (mode))
15065 /* Convert a==0 into (unsigned)a<1. */
15068 if (op1 != const0_rtx)
15071 code = (code == EQ ? LTU : GEU);
15074 /* Convert a>b into b<a or a>=b-1. */
15077 if (CONST_INT_P (op1))
15079 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15080 /* Bail out on overflow. We still can swap operands but that
15081 would force loading of the constant into register. */
15082 if (op1 == const0_rtx
15083 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15085 code = (code == GTU ? GEU : LTU);
15092 code = (code == GTU ? LTU : GEU);
15096 /* Convert a>=0 into (unsigned)a<0x80000000. */
15099 if (mode == DImode || op1 != const0_rtx)
15101 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15102 code = (code == LT ? GEU : LTU);
15106 if (mode == DImode || op1 != constm1_rtx)
15108 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15109 code = (code == LE ? GEU : LTU);
15115 /* Swapping operands may cause constant to appear as first operand. */
15116 if (!nonimmediate_operand (op0, VOIDmode))
15118 if (!can_create_pseudo_p ())
15120 op0 = force_reg (mode, op0);
15122 ix86_compare_op0 = op0;
15123 ix86_compare_op1 = op1;
15124 *pop = ix86_expand_compare (code, NULL, NULL);
15125 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15130 ix86_expand_int_movcc (rtx operands[])
15132 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15133 rtx compare_seq, compare_op;
15134 rtx second_test, bypass_test;
15135 enum machine_mode mode = GET_MODE (operands[0]);
15136 bool sign_bit_compare_p = false;;
15139 ix86_compare_op0 = XEXP (operands[1], 0);
15140 ix86_compare_op1 = XEXP (operands[1], 1);
15141 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15142 compare_seq = get_insns ();
15145 compare_code = GET_CODE (compare_op);
15147 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15148 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15149 sign_bit_compare_p = true;
15151 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15152 HImode insns, we'd be swallowed in word prefix ops. */
15154 if ((mode != HImode || TARGET_FAST_PREFIX)
15155 && (mode != (TARGET_64BIT ? TImode : DImode))
15156 && CONST_INT_P (operands[2])
15157 && CONST_INT_P (operands[3]))
15159 rtx out = operands[0];
15160 HOST_WIDE_INT ct = INTVAL (operands[2]);
15161 HOST_WIDE_INT cf = INTVAL (operands[3]);
15162 HOST_WIDE_INT diff;
15165 /* Sign bit compares are better done using shifts than we do by using
15167 if (sign_bit_compare_p
15168 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15169 ix86_compare_op1, &compare_op))
15171 /* Detect overlap between destination and compare sources. */
15174 if (!sign_bit_compare_p)
15176 bool fpcmp = false;
15178 compare_code = GET_CODE (compare_op);
15180 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15181 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15184 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15187 /* To simplify rest of code, restrict to the GEU case. */
15188 if (compare_code == LTU)
15190 HOST_WIDE_INT tmp = ct;
15193 compare_code = reverse_condition (compare_code);
15194 code = reverse_condition (code);
15199 PUT_CODE (compare_op,
15200 reverse_condition_maybe_unordered
15201 (GET_CODE (compare_op)));
15203 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15207 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15208 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15209 tmp = gen_reg_rtx (mode);
15211 if (mode == DImode)
15212 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15214 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15218 if (code == GT || code == GE)
15219 code = reverse_condition (code);
15222 HOST_WIDE_INT tmp = ct;
15227 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15228 ix86_compare_op1, VOIDmode, 0, -1);
15241 tmp = expand_simple_binop (mode, PLUS,
15243 copy_rtx (tmp), 1, OPTAB_DIRECT);
15254 tmp = expand_simple_binop (mode, IOR,
15256 copy_rtx (tmp), 1, OPTAB_DIRECT);
15258 else if (diff == -1 && ct)
15268 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15270 tmp = expand_simple_binop (mode, PLUS,
15271 copy_rtx (tmp), GEN_INT (cf),
15272 copy_rtx (tmp), 1, OPTAB_DIRECT);
15280 * andl cf - ct, dest
15290 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15293 tmp = expand_simple_binop (mode, AND,
15295 gen_int_mode (cf - ct, mode),
15296 copy_rtx (tmp), 1, OPTAB_DIRECT);
15298 tmp = expand_simple_binop (mode, PLUS,
15299 copy_rtx (tmp), GEN_INT (ct),
15300 copy_rtx (tmp), 1, OPTAB_DIRECT);
15303 if (!rtx_equal_p (tmp, out))
15304 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15306 return 1; /* DONE */
15311 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15314 tmp = ct, ct = cf, cf = tmp;
15317 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15319 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15321 /* We may be reversing unordered compare to normal compare, that
15322 is not valid in general (we may convert non-trapping condition
15323 to trapping one), however on i386 we currently emit all
15324 comparisons unordered. */
15325 compare_code = reverse_condition_maybe_unordered (compare_code);
15326 code = reverse_condition_maybe_unordered (code);
15330 compare_code = reverse_condition (compare_code);
15331 code = reverse_condition (code);
15335 compare_code = UNKNOWN;
15336 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15337 && CONST_INT_P (ix86_compare_op1))
15339 if (ix86_compare_op1 == const0_rtx
15340 && (code == LT || code == GE))
15341 compare_code = code;
15342 else if (ix86_compare_op1 == constm1_rtx)
15346 else if (code == GT)
15351 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15352 if (compare_code != UNKNOWN
15353 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15354 && (cf == -1 || ct == -1))
15356 /* If lea code below could be used, only optimize
15357 if it results in a 2 insn sequence. */
15359 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15360 || diff == 3 || diff == 5 || diff == 9)
15361 || (compare_code == LT && ct == -1)
15362 || (compare_code == GE && cf == -1))
15365 * notl op1 (if necessary)
15373 code = reverse_condition (code);
15376 out = emit_store_flag (out, code, ix86_compare_op0,
15377 ix86_compare_op1, VOIDmode, 0, -1);
15379 out = expand_simple_binop (mode, IOR,
15381 out, 1, OPTAB_DIRECT);
15382 if (out != operands[0])
15383 emit_move_insn (operands[0], out);
15385 return 1; /* DONE */
15390 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15391 || diff == 3 || diff == 5 || diff == 9)
15392 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15394 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15400 * lea cf(dest*(ct-cf)),dest
15404 * This also catches the degenerate setcc-only case.
15410 out = emit_store_flag (out, code, ix86_compare_op0,
15411 ix86_compare_op1, VOIDmode, 0, 1);
15414 /* On x86_64 the lea instruction operates on Pmode, so we need
15415 to get arithmetics done in proper mode to match. */
15417 tmp = copy_rtx (out);
15421 out1 = copy_rtx (out);
15422 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15426 tmp = gen_rtx_PLUS (mode, tmp, out1);
15432 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15435 if (!rtx_equal_p (tmp, out))
15438 out = force_operand (tmp, copy_rtx (out));
15440 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15442 if (!rtx_equal_p (out, operands[0]))
15443 emit_move_insn (operands[0], copy_rtx (out));
15445 return 1; /* DONE */
15449 * General case: Jumpful:
15450 * xorl dest,dest cmpl op1, op2
15451 * cmpl op1, op2 movl ct, dest
15452 * setcc dest jcc 1f
15453 * decl dest movl cf, dest
15454 * andl (cf-ct),dest 1:
15457 * Size 20. Size 14.
15459 * This is reasonably steep, but branch mispredict costs are
15460 * high on modern cpus, so consider failing only if optimizing
15464 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15465 && BRANCH_COST (optimize_insn_for_speed_p (),
15470 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15475 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15477 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15479 /* We may be reversing unordered compare to normal compare,
15480 that is not valid in general (we may convert non-trapping
15481 condition to trapping one), however on i386 we currently
15482 emit all comparisons unordered. */
15483 code = reverse_condition_maybe_unordered (code);
15487 code = reverse_condition (code);
15488 if (compare_code != UNKNOWN)
15489 compare_code = reverse_condition (compare_code);
15493 if (compare_code != UNKNOWN)
15495 /* notl op1 (if needed)
15500 For x < 0 (resp. x <= -1) there will be no notl,
15501 so if possible swap the constants to get rid of the
15503 True/false will be -1/0 while code below (store flag
15504 followed by decrement) is 0/-1, so the constants need
15505 to be exchanged once more. */
15507 if (compare_code == GE || !cf)
15509 code = reverse_condition (code);
15514 HOST_WIDE_INT tmp = cf;
15519 out = emit_store_flag (out, code, ix86_compare_op0,
15520 ix86_compare_op1, VOIDmode, 0, -1);
15524 out = emit_store_flag (out, code, ix86_compare_op0,
15525 ix86_compare_op1, VOIDmode, 0, 1);
15527 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15528 copy_rtx (out), 1, OPTAB_DIRECT);
15531 out = expand_simple_binop (mode, AND, copy_rtx (out),
15532 gen_int_mode (cf - ct, mode),
15533 copy_rtx (out), 1, OPTAB_DIRECT);
15535 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15536 copy_rtx (out), 1, OPTAB_DIRECT);
15537 if (!rtx_equal_p (out, operands[0]))
15538 emit_move_insn (operands[0], copy_rtx (out));
15540 return 1; /* DONE */
15544 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15546 /* Try a few things more with specific constants and a variable. */
15549 rtx var, orig_out, out, tmp;
15551 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15552 return 0; /* FAIL */
15554 /* If one of the two operands is an interesting constant, load a
15555 constant with the above and mask it in with a logical operation. */
15557 if (CONST_INT_P (operands[2]))
15560 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15561 operands[3] = constm1_rtx, op = and_optab;
15562 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15563 operands[3] = const0_rtx, op = ior_optab;
15565 return 0; /* FAIL */
15567 else if (CONST_INT_P (operands[3]))
15570 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15571 operands[2] = constm1_rtx, op = and_optab;
15572 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15573 operands[2] = const0_rtx, op = ior_optab;
15575 return 0; /* FAIL */
15578 return 0; /* FAIL */
15580 orig_out = operands[0];
15581 tmp = gen_reg_rtx (mode);
15584 /* Recurse to get the constant loaded. */
15585 if (ix86_expand_int_movcc (operands) == 0)
15586 return 0; /* FAIL */
15588 /* Mask in the interesting variable. */
15589 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15591 if (!rtx_equal_p (out, orig_out))
15592 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15594 return 1; /* DONE */
15598 * For comparison with above,
15608 if (! nonimmediate_operand (operands[2], mode))
15609 operands[2] = force_reg (mode, operands[2]);
15610 if (! nonimmediate_operand (operands[3], mode))
15611 operands[3] = force_reg (mode, operands[3]);
15613 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15615 rtx tmp = gen_reg_rtx (mode);
15616 emit_move_insn (tmp, operands[3]);
15619 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15621 rtx tmp = gen_reg_rtx (mode);
15622 emit_move_insn (tmp, operands[2]);
15626 if (! register_operand (operands[2], VOIDmode)
15628 || ! register_operand (operands[3], VOIDmode)))
15629 operands[2] = force_reg (mode, operands[2]);
15632 && ! register_operand (operands[3], VOIDmode))
15633 operands[3] = force_reg (mode, operands[3]);
15635 emit_insn (compare_seq);
15636 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15637 gen_rtx_IF_THEN_ELSE (mode,
15638 compare_op, operands[2],
15641 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15642 gen_rtx_IF_THEN_ELSE (mode,
15644 copy_rtx (operands[3]),
15645 copy_rtx (operands[0]))));
15647 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15648 gen_rtx_IF_THEN_ELSE (mode,
15650 copy_rtx (operands[2]),
15651 copy_rtx (operands[0]))));
15653 return 1; /* DONE */
15656 /* Swap, force into registers, or otherwise massage the two operands
15657 to an sse comparison with a mask result. Thus we differ a bit from
15658 ix86_prepare_fp_compare_args which expects to produce a flags result.
15660 The DEST operand exists to help determine whether to commute commutative
15661 operators. The POP0/POP1 operands are updated in place. The new
15662 comparison code is returned, or UNKNOWN if not implementable. */
15664 static enum rtx_code
15665 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15666 rtx *pop0, rtx *pop1)
15674 /* We have no LTGT as an operator. We could implement it with
15675 NE & ORDERED, but this requires an extra temporary. It's
15676 not clear that it's worth it. */
15683 /* These are supported directly. */
15690 /* For commutative operators, try to canonicalize the destination
15691 operand to be first in the comparison - this helps reload to
15692 avoid extra moves. */
15693 if (!dest || !rtx_equal_p (dest, *pop1))
15701 /* These are not supported directly. Swap the comparison operands
15702 to transform into something that is supported. */
15706 code = swap_condition (code);
15710 gcc_unreachable ();
15716 /* Detect conditional moves that exactly match min/max operational
15717 semantics. Note that this is IEEE safe, as long as we don't
15718 interchange the operands.
15720 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15721 and TRUE if the operation is successful and instructions are emitted. */
15724 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15725 rtx cmp_op1, rtx if_true, rtx if_false)
15727 enum machine_mode mode;
15733 else if (code == UNGE)
15736 if_true = if_false;
15742 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15744 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15749 mode = GET_MODE (dest);
15751 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15752 but MODE may be a vector mode and thus not appropriate. */
15753 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15755 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15758 if_true = force_reg (mode, if_true);
15759 v = gen_rtvec (2, if_true, if_false);
15760 tmp = gen_rtx_UNSPEC (mode, v, u);
15764 code = is_min ? SMIN : SMAX;
15765 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15768 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15772 /* Expand an sse vector comparison. Return the register with the result. */
15775 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15776 rtx op_true, rtx op_false)
15778 enum machine_mode mode = GET_MODE (dest);
15781 cmp_op0 = force_reg (mode, cmp_op0);
15782 if (!nonimmediate_operand (cmp_op1, mode))
15783 cmp_op1 = force_reg (mode, cmp_op1);
15786 || reg_overlap_mentioned_p (dest, op_true)
15787 || reg_overlap_mentioned_p (dest, op_false))
15788 dest = gen_reg_rtx (mode);
15790 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15791 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15796 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15797 operations. This is used for both scalar and vector conditional moves. */
15800 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15802 enum machine_mode mode = GET_MODE (dest);
15805 if (op_false == CONST0_RTX (mode))
15807 op_true = force_reg (mode, op_true);
15808 x = gen_rtx_AND (mode, cmp, op_true);
15809 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15811 else if (op_true == CONST0_RTX (mode))
15813 op_false = force_reg (mode, op_false);
15814 x = gen_rtx_NOT (mode, cmp);
15815 x = gen_rtx_AND (mode, x, op_false);
15816 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15818 else if (TARGET_SSE5)
15820 rtx pcmov = gen_rtx_SET (mode, dest,
15821 gen_rtx_IF_THEN_ELSE (mode, cmp,
15828 op_true = force_reg (mode, op_true);
15829 op_false = force_reg (mode, op_false);
15831 t2 = gen_reg_rtx (mode);
15833 t3 = gen_reg_rtx (mode);
15837 x = gen_rtx_AND (mode, op_true, cmp);
15838 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15840 x = gen_rtx_NOT (mode, cmp);
15841 x = gen_rtx_AND (mode, x, op_false);
15842 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15844 x = gen_rtx_IOR (mode, t3, t2);
15845 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15849 /* Expand a floating-point conditional move. Return true if successful. */
15852 ix86_expand_fp_movcc (rtx operands[])
15854 enum machine_mode mode = GET_MODE (operands[0]);
15855 enum rtx_code code = GET_CODE (operands[1]);
15856 rtx tmp, compare_op, second_test, bypass_test;
15858 ix86_compare_op0 = XEXP (operands[1], 0);
15859 ix86_compare_op1 = XEXP (operands[1], 1);
15860 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15862 enum machine_mode cmode;
15864 /* Since we've no cmove for sse registers, don't force bad register
15865 allocation just to gain access to it. Deny movcc when the
15866 comparison mode doesn't match the move mode. */
15867 cmode = GET_MODE (ix86_compare_op0);
15868 if (cmode == VOIDmode)
15869 cmode = GET_MODE (ix86_compare_op1);
15873 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15875 &ix86_compare_op1);
15876 if (code == UNKNOWN)
15879 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15880 ix86_compare_op1, operands[2],
15884 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15885 ix86_compare_op1, operands[2], operands[3]);
15886 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15890 /* The floating point conditional move instructions don't directly
15891 support conditions resulting from a signed integer comparison. */
15893 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15895 /* The floating point conditional move instructions don't directly
15896 support signed integer comparisons. */
15898 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15900 gcc_assert (!second_test && !bypass_test);
15901 tmp = gen_reg_rtx (QImode);
15902 ix86_expand_setcc (code, tmp);
15904 ix86_compare_op0 = tmp;
15905 ix86_compare_op1 = const0_rtx;
15906 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15908 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15910 tmp = gen_reg_rtx (mode);
15911 emit_move_insn (tmp, operands[3]);
15914 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15916 tmp = gen_reg_rtx (mode);
15917 emit_move_insn (tmp, operands[2]);
15921 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15922 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15923 operands[2], operands[3])));
15925 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15926 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15927 operands[3], operands[0])));
15929 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15930 gen_rtx_IF_THEN_ELSE (mode, second_test,
15931 operands[2], operands[0])));
15936 /* Expand a floating-point vector conditional move; a vcond operation
15937 rather than a movcc operation. */
15940 ix86_expand_fp_vcond (rtx operands[])
15942 enum rtx_code code = GET_CODE (operands[3]);
15945 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15946 &operands[4], &operands[5]);
15947 if (code == UNKNOWN)
15950 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15951 operands[5], operands[1], operands[2]))
15954 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15955 operands[1], operands[2]);
15956 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15960 /* Expand a signed/unsigned integral vector conditional move. */
15963 ix86_expand_int_vcond (rtx operands[])
15965 enum machine_mode mode = GET_MODE (operands[0]);
15966 enum rtx_code code = GET_CODE (operands[3]);
15967 bool negate = false;
15970 cop0 = operands[4];
15971 cop1 = operands[5];
15973 /* SSE5 supports all of the comparisons on all vector int types. */
15976 /* Canonicalize the comparison to EQ, GT, GTU. */
15987 code = reverse_condition (code);
15993 code = reverse_condition (code);
15999 code = swap_condition (code);
16000 x = cop0, cop0 = cop1, cop1 = x;
16004 gcc_unreachable ();
16007 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16008 if (mode == V2DImode)
16013 /* SSE4.1 supports EQ. */
16014 if (!TARGET_SSE4_1)
16020 /* SSE4.2 supports GT/GTU. */
16021 if (!TARGET_SSE4_2)
16026 gcc_unreachable ();
16030 /* Unsigned parallel compare is not supported by the hardware. Play some
16031 tricks to turn this into a signed comparison against 0. */
16034 cop0 = force_reg (mode, cop0);
16043 /* Perform a parallel modulo subtraction. */
16044 t1 = gen_reg_rtx (mode);
16045 emit_insn ((mode == V4SImode
16047 : gen_subv2di3) (t1, cop0, cop1));
16049 /* Extract the original sign bit of op0. */
16050 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16052 t2 = gen_reg_rtx (mode);
16053 emit_insn ((mode == V4SImode
16055 : gen_andv2di3) (t2, cop0, mask));
16057 /* XOR it back into the result of the subtraction. This results
16058 in the sign bit set iff we saw unsigned underflow. */
16059 x = gen_reg_rtx (mode);
16060 emit_insn ((mode == V4SImode
16062 : gen_xorv2di3) (x, t1, t2));
16070 /* Perform a parallel unsigned saturating subtraction. */
16071 x = gen_reg_rtx (mode);
16072 emit_insn (gen_rtx_SET (VOIDmode, x,
16073 gen_rtx_US_MINUS (mode, cop0, cop1)));
16080 gcc_unreachable ();
16084 cop1 = CONST0_RTX (mode);
16088 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16089 operands[1+negate], operands[2-negate]);
16091 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16092 operands[2-negate]);
16096 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16097 true if we should do zero extension, else sign extension. HIGH_P is
16098 true if we want the N/2 high elements, else the low elements. */
16101 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16103 enum machine_mode imode = GET_MODE (operands[1]);
16104 rtx (*unpack)(rtx, rtx, rtx);
16111 unpack = gen_vec_interleave_highv16qi;
16113 unpack = gen_vec_interleave_lowv16qi;
16117 unpack = gen_vec_interleave_highv8hi;
16119 unpack = gen_vec_interleave_lowv8hi;
16123 unpack = gen_vec_interleave_highv4si;
16125 unpack = gen_vec_interleave_lowv4si;
16128 gcc_unreachable ();
16131 dest = gen_lowpart (imode, operands[0]);
16134 se = force_reg (imode, CONST0_RTX (imode));
16136 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16137 operands[1], pc_rtx, pc_rtx);
16139 emit_insn (unpack (dest, operands[1], se));
16142 /* This function performs the same task as ix86_expand_sse_unpack,
16143 but with SSE4.1 instructions. */
16146 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16148 enum machine_mode imode = GET_MODE (operands[1]);
16149 rtx (*unpack)(rtx, rtx);
16156 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16158 unpack = gen_sse4_1_extendv8qiv8hi2;
16162 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16164 unpack = gen_sse4_1_extendv4hiv4si2;
16168 unpack = gen_sse4_1_zero_extendv2siv2di2;
16170 unpack = gen_sse4_1_extendv2siv2di2;
16173 gcc_unreachable ();
16176 dest = operands[0];
16179 /* Shift higher 8 bytes to lower 8 bytes. */
16180 src = gen_reg_rtx (imode);
16181 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16182 gen_lowpart (TImode, operands[1]),
16188 emit_insn (unpack (dest, src));
16191 /* This function performs the same task as ix86_expand_sse_unpack,
16192 but with sse5 instructions. */
16195 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16197 enum machine_mode imode = GET_MODE (operands[1]);
16198 int pperm_bytes[16];
16200 int h = (high_p) ? 8 : 0;
16203 rtvec v = rtvec_alloc (16);
16206 rtx op0 = operands[0], op1 = operands[1];
16211 vs = rtvec_alloc (8);
16212 h2 = (high_p) ? 8 : 0;
16213 for (i = 0; i < 8; i++)
16215 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16216 pperm_bytes[2*i+1] = ((unsigned_p)
16218 : PPERM_SIGN | PPERM_SRC2 | i | h);
16221 for (i = 0; i < 16; i++)
16222 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16224 for (i = 0; i < 8; i++)
16225 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16227 p = gen_rtx_PARALLEL (VOIDmode, vs);
16228 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16230 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16232 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16236 vs = rtvec_alloc (4);
16237 h2 = (high_p) ? 4 : 0;
16238 for (i = 0; i < 4; i++)
16240 sign_extend = ((unsigned_p)
16242 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16243 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16244 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16245 pperm_bytes[4*i+2] = sign_extend;
16246 pperm_bytes[4*i+3] = sign_extend;
16249 for (i = 0; i < 16; i++)
16250 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16252 for (i = 0; i < 4; i++)
16253 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16255 p = gen_rtx_PARALLEL (VOIDmode, vs);
16256 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16258 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16260 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16264 vs = rtvec_alloc (2);
16265 h2 = (high_p) ? 2 : 0;
16266 for (i = 0; i < 2; i++)
16268 sign_extend = ((unsigned_p)
16270 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16271 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16272 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16273 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16274 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16275 pperm_bytes[8*i+4] = sign_extend;
16276 pperm_bytes[8*i+5] = sign_extend;
16277 pperm_bytes[8*i+6] = sign_extend;
16278 pperm_bytes[8*i+7] = sign_extend;
16281 for (i = 0; i < 16; i++)
16282 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16284 for (i = 0; i < 2; i++)
16285 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16287 p = gen_rtx_PARALLEL (VOIDmode, vs);
16288 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16290 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16292 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16296 gcc_unreachable ();
16302 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16303 next narrower integer vector type */
16305 ix86_expand_sse5_pack (rtx operands[3])
16307 enum machine_mode imode = GET_MODE (operands[0]);
16308 int pperm_bytes[16];
16310 rtvec v = rtvec_alloc (16);
16312 rtx op0 = operands[0];
16313 rtx op1 = operands[1];
16314 rtx op2 = operands[2];
16319 for (i = 0; i < 8; i++)
16321 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16322 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16325 for (i = 0; i < 16; i++)
16326 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16328 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16329 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16333 for (i = 0; i < 4; i++)
16335 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16336 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16337 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16338 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16341 for (i = 0; i < 16; i++)
16342 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16344 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16345 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16349 for (i = 0; i < 2; i++)
16351 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16352 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16353 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16354 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16355 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16356 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16357 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16358 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16361 for (i = 0; i < 16; i++)
16362 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16364 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16365 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16369 gcc_unreachable ();
16375 /* Expand conditional increment or decrement using adb/sbb instructions.
16376 The default case using setcc followed by the conditional move can be
16377 done by generic code. */
16379 ix86_expand_int_addcc (rtx operands[])
16381 enum rtx_code code = GET_CODE (operands[1]);
16383 rtx val = const0_rtx;
16384 bool fpcmp = false;
16385 enum machine_mode mode = GET_MODE (operands[0]);
16387 ix86_compare_op0 = XEXP (operands[1], 0);
16388 ix86_compare_op1 = XEXP (operands[1], 1);
16389 if (operands[3] != const1_rtx
16390 && operands[3] != constm1_rtx)
16392 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16393 ix86_compare_op1, &compare_op))
16395 code = GET_CODE (compare_op);
16397 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16398 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16401 code = ix86_fp_compare_code_to_integer (code);
16408 PUT_CODE (compare_op,
16409 reverse_condition_maybe_unordered
16410 (GET_CODE (compare_op)));
16412 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16414 PUT_MODE (compare_op, mode);
16416 /* Construct either adc or sbb insn. */
16417 if ((code == LTU) == (operands[3] == constm1_rtx))
16419 switch (GET_MODE (operands[0]))
16422 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16425 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16428 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16431 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16434 gcc_unreachable ();
16439 switch (GET_MODE (operands[0]))
16442 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16445 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16448 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16451 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16454 gcc_unreachable ();
16457 return 1; /* DONE */
16461 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16462 works for floating pointer parameters and nonoffsetable memories.
16463 For pushes, it returns just stack offsets; the values will be saved
16464 in the right order. Maximally three parts are generated. */
16467 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16472 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16474 size = (GET_MODE_SIZE (mode) + 4) / 8;
16476 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16477 gcc_assert (size >= 2 && size <= 4);
16479 /* Optimize constant pool reference to immediates. This is used by fp
16480 moves, that force all constants to memory to allow combining. */
16481 if (MEM_P (operand) && MEM_READONLY_P (operand))
16483 rtx tmp = maybe_get_pool_constant (operand);
16488 if (MEM_P (operand) && !offsettable_memref_p (operand))
16490 /* The only non-offsetable memories we handle are pushes. */
16491 int ok = push_operand (operand, VOIDmode);
16495 operand = copy_rtx (operand);
16496 PUT_MODE (operand, Pmode);
16497 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16501 if (GET_CODE (operand) == CONST_VECTOR)
16503 enum machine_mode imode = int_mode_for_mode (mode);
16504 /* Caution: if we looked through a constant pool memory above,
16505 the operand may actually have a different mode now. That's
16506 ok, since we want to pun this all the way back to an integer. */
16507 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16508 gcc_assert (operand != NULL);
16514 if (mode == DImode)
16515 split_di (&operand, 1, &parts[0], &parts[1]);
16520 if (REG_P (operand))
16522 gcc_assert (reload_completed);
16523 for (i = 0; i < size; i++)
16524 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16526 else if (offsettable_memref_p (operand))
16528 operand = adjust_address (operand, SImode, 0);
16529 parts[0] = operand;
16530 for (i = 1; i < size; i++)
16531 parts[i] = adjust_address (operand, SImode, 4 * i);
16533 else if (GET_CODE (operand) == CONST_DOUBLE)
16538 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16542 real_to_target (l, &r, mode);
16543 parts[3] = gen_int_mode (l[3], SImode);
16544 parts[2] = gen_int_mode (l[2], SImode);
16547 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16548 parts[2] = gen_int_mode (l[2], SImode);
16551 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16554 gcc_unreachable ();
16556 parts[1] = gen_int_mode (l[1], SImode);
16557 parts[0] = gen_int_mode (l[0], SImode);
16560 gcc_unreachable ();
16565 if (mode == TImode)
16566 split_ti (&operand, 1, &parts[0], &parts[1]);
16567 if (mode == XFmode || mode == TFmode)
16569 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16570 if (REG_P (operand))
16572 gcc_assert (reload_completed);
16573 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16574 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16576 else if (offsettable_memref_p (operand))
16578 operand = adjust_address (operand, DImode, 0);
16579 parts[0] = operand;
16580 parts[1] = adjust_address (operand, upper_mode, 8);
16582 else if (GET_CODE (operand) == CONST_DOUBLE)
16587 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16588 real_to_target (l, &r, mode);
16590 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16591 if (HOST_BITS_PER_WIDE_INT >= 64)
16594 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16595 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16598 parts[0] = immed_double_const (l[0], l[1], DImode);
16600 if (upper_mode == SImode)
16601 parts[1] = gen_int_mode (l[2], SImode);
16602 else if (HOST_BITS_PER_WIDE_INT >= 64)
16605 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16606 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16609 parts[1] = immed_double_const (l[2], l[3], DImode);
16612 gcc_unreachable ();
16619 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16620 Return false when normal moves are needed; true when all required
16621 insns have been emitted. Operands 2-4 contain the input values
16622 int the correct order; operands 5-7 contain the output values. */
16625 ix86_split_long_move (rtx operands[])
16630 int collisions = 0;
16631 enum machine_mode mode = GET_MODE (operands[0]);
16632 bool collisionparts[4];
16634 /* The DFmode expanders may ask us to move double.
16635 For 64bit target this is single move. By hiding the fact
16636 here we simplify i386.md splitters. */
16637 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16639 /* Optimize constant pool reference to immediates. This is used by
16640 fp moves, that force all constants to memory to allow combining. */
16642 if (MEM_P (operands[1])
16643 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16644 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16645 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16646 if (push_operand (operands[0], VOIDmode))
16648 operands[0] = copy_rtx (operands[0]);
16649 PUT_MODE (operands[0], Pmode);
16652 operands[0] = gen_lowpart (DImode, operands[0]);
16653 operands[1] = gen_lowpart (DImode, operands[1]);
16654 emit_move_insn (operands[0], operands[1]);
16658 /* The only non-offsettable memory we handle is push. */
16659 if (push_operand (operands[0], VOIDmode))
16662 gcc_assert (!MEM_P (operands[0])
16663 || offsettable_memref_p (operands[0]));
16665 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16666 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16668 /* When emitting push, take care for source operands on the stack. */
16669 if (push && MEM_P (operands[1])
16670 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16671 for (i = 0; i < nparts - 1; i++)
16672 part[1][i] = change_address (part[1][i],
16673 GET_MODE (part[1][i]),
16674 XEXP (part[1][i + 1], 0));
16676 /* We need to do copy in the right order in case an address register
16677 of the source overlaps the destination. */
16678 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16682 for (i = 0; i < nparts; i++)
16685 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16686 if (collisionparts[i])
16690 /* Collision in the middle part can be handled by reordering. */
16691 if (collisions == 1 && nparts == 3 && collisionparts [1])
16693 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16694 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16696 else if (collisions == 1
16698 && (collisionparts [1] || collisionparts [2]))
16700 if (collisionparts [1])
16702 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16703 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16707 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16708 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16712 /* If there are more collisions, we can't handle it by reordering.
16713 Do an lea to the last part and use only one colliding move. */
16714 else if (collisions > 1)
16720 base = part[0][nparts - 1];
16722 /* Handle the case when the last part isn't valid for lea.
16723 Happens in 64-bit mode storing the 12-byte XFmode. */
16724 if (GET_MODE (base) != Pmode)
16725 base = gen_rtx_REG (Pmode, REGNO (base));
16727 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16728 part[1][0] = replace_equiv_address (part[1][0], base);
16729 for (i = 1; i < nparts; i++)
16731 tmp = plus_constant (base, UNITS_PER_WORD * i);
16732 part[1][i] = replace_equiv_address (part[1][i], tmp);
16743 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16744 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16745 emit_move_insn (part[0][2], part[1][2]);
16747 else if (nparts == 4)
16749 emit_move_insn (part[0][3], part[1][3]);
16750 emit_move_insn (part[0][2], part[1][2]);
16755 /* In 64bit mode we don't have 32bit push available. In case this is
16756 register, it is OK - we will just use larger counterpart. We also
16757 retype memory - these comes from attempt to avoid REX prefix on
16758 moving of second half of TFmode value. */
16759 if (GET_MODE (part[1][1]) == SImode)
16761 switch (GET_CODE (part[1][1]))
16764 part[1][1] = adjust_address (part[1][1], DImode, 0);
16768 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16772 gcc_unreachable ();
16775 if (GET_MODE (part[1][0]) == SImode)
16776 part[1][0] = part[1][1];
16779 emit_move_insn (part[0][1], part[1][1]);
16780 emit_move_insn (part[0][0], part[1][0]);
16784 /* Choose correct order to not overwrite the source before it is copied. */
16785 if ((REG_P (part[0][0])
16786 && REG_P (part[1][1])
16787 && (REGNO (part[0][0]) == REGNO (part[1][1])
16789 && REGNO (part[0][0]) == REGNO (part[1][2]))
16791 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16793 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16795 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16797 operands[2 + i] = part[0][j];
16798 operands[6 + i] = part[1][j];
16803 for (i = 0; i < nparts; i++)
16805 operands[2 + i] = part[0][i];
16806 operands[6 + i] = part[1][i];
16810 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16811 if (optimize_insn_for_size_p ())
16813 for (j = 0; j < nparts - 1; j++)
16814 if (CONST_INT_P (operands[6 + j])
16815 && operands[6 + j] != const0_rtx
16816 && REG_P (operands[2 + j]))
16817 for (i = j; i < nparts - 1; i++)
16818 if (CONST_INT_P (operands[7 + i])
16819 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16820 operands[7 + i] = operands[2 + j];
16823 for (i = 0; i < nparts; i++)
16824 emit_move_insn (operands[2 + i], operands[6 + i]);
16829 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16830 left shift by a constant, either using a single shift or
16831 a sequence of add instructions. */
16834 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16838 emit_insn ((mode == DImode
16840 : gen_adddi3) (operand, operand, operand));
16842 else if (!optimize_insn_for_size_p ()
16843 && count * ix86_cost->add <= ix86_cost->shift_const)
16846 for (i=0; i<count; i++)
16848 emit_insn ((mode == DImode
16850 : gen_adddi3) (operand, operand, operand));
16854 emit_insn ((mode == DImode
16856 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16860 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16862 rtx low[2], high[2];
16864 const int single_width = mode == DImode ? 32 : 64;
16866 if (CONST_INT_P (operands[2]))
16868 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16869 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16871 if (count >= single_width)
16873 emit_move_insn (high[0], low[1]);
16874 emit_move_insn (low[0], const0_rtx);
16876 if (count > single_width)
16877 ix86_expand_ashl_const (high[0], count - single_width, mode);
16881 if (!rtx_equal_p (operands[0], operands[1]))
16882 emit_move_insn (operands[0], operands[1]);
16883 emit_insn ((mode == DImode
16885 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16886 ix86_expand_ashl_const (low[0], count, mode);
16891 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16893 if (operands[1] == const1_rtx)
16895 /* Assuming we've chosen a QImode capable registers, then 1 << N
16896 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16897 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16899 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16901 ix86_expand_clear (low[0]);
16902 ix86_expand_clear (high[0]);
16903 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16905 d = gen_lowpart (QImode, low[0]);
16906 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16907 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16908 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16910 d = gen_lowpart (QImode, high[0]);
16911 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16912 s = gen_rtx_NE (QImode, flags, const0_rtx);
16913 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16916 /* Otherwise, we can get the same results by manually performing
16917 a bit extract operation on bit 5/6, and then performing the two
16918 shifts. The two methods of getting 0/1 into low/high are exactly
16919 the same size. Avoiding the shift in the bit extract case helps
16920 pentium4 a bit; no one else seems to care much either way. */
16925 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16926 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16928 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16929 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16931 emit_insn ((mode == DImode
16933 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16934 emit_insn ((mode == DImode
16936 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16937 emit_move_insn (low[0], high[0]);
16938 emit_insn ((mode == DImode
16940 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16943 emit_insn ((mode == DImode
16945 : gen_ashldi3) (low[0], low[0], operands[2]));
16946 emit_insn ((mode == DImode
16948 : gen_ashldi3) (high[0], high[0], operands[2]));
16952 if (operands[1] == constm1_rtx)
16954 /* For -1 << N, we can avoid the shld instruction, because we
16955 know that we're shifting 0...31/63 ones into a -1. */
16956 emit_move_insn (low[0], constm1_rtx);
16957 if (optimize_insn_for_size_p ())
16958 emit_move_insn (high[0], low[0]);
16960 emit_move_insn (high[0], constm1_rtx);
16964 if (!rtx_equal_p (operands[0], operands[1]))
16965 emit_move_insn (operands[0], operands[1]);
16967 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16968 emit_insn ((mode == DImode
16970 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16973 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16975 if (TARGET_CMOVE && scratch)
16977 ix86_expand_clear (scratch);
16978 emit_insn ((mode == DImode
16979 ? gen_x86_shift_adj_1
16980 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16984 emit_insn ((mode == DImode
16985 ? gen_x86_shift_adj_2
16986 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16990 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16992 rtx low[2], high[2];
16994 const int single_width = mode == DImode ? 32 : 64;
16996 if (CONST_INT_P (operands[2]))
16998 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16999 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17001 if (count == single_width * 2 - 1)
17003 emit_move_insn (high[0], high[1]);
17004 emit_insn ((mode == DImode
17006 : gen_ashrdi3) (high[0], high[0],
17007 GEN_INT (single_width - 1)));
17008 emit_move_insn (low[0], high[0]);
17011 else if (count >= single_width)
17013 emit_move_insn (low[0], high[1]);
17014 emit_move_insn (high[0], low[0]);
17015 emit_insn ((mode == DImode
17017 : gen_ashrdi3) (high[0], high[0],
17018 GEN_INT (single_width - 1)));
17019 if (count > single_width)
17020 emit_insn ((mode == DImode
17022 : gen_ashrdi3) (low[0], low[0],
17023 GEN_INT (count - single_width)));
17027 if (!rtx_equal_p (operands[0], operands[1]))
17028 emit_move_insn (operands[0], operands[1]);
17029 emit_insn ((mode == DImode
17031 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17032 emit_insn ((mode == DImode
17034 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17039 if (!rtx_equal_p (operands[0], operands[1]))
17040 emit_move_insn (operands[0], operands[1]);
17042 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17044 emit_insn ((mode == DImode
17046 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17047 emit_insn ((mode == DImode
17049 : gen_ashrdi3) (high[0], high[0], operands[2]));
17051 if (TARGET_CMOVE && scratch)
17053 emit_move_insn (scratch, high[0]);
17054 emit_insn ((mode == DImode
17056 : gen_ashrdi3) (scratch, scratch,
17057 GEN_INT (single_width - 1)));
17058 emit_insn ((mode == DImode
17059 ? gen_x86_shift_adj_1
17060 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17064 emit_insn ((mode == DImode
17065 ? gen_x86_shift_adj_3
17066 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17071 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17073 rtx low[2], high[2];
17075 const int single_width = mode == DImode ? 32 : 64;
17077 if (CONST_INT_P (operands[2]))
17079 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17080 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17082 if (count >= single_width)
17084 emit_move_insn (low[0], high[1]);
17085 ix86_expand_clear (high[0]);
17087 if (count > single_width)
17088 emit_insn ((mode == DImode
17090 : gen_lshrdi3) (low[0], low[0],
17091 GEN_INT (count - single_width)));
17095 if (!rtx_equal_p (operands[0], operands[1]))
17096 emit_move_insn (operands[0], operands[1]);
17097 emit_insn ((mode == DImode
17099 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17100 emit_insn ((mode == DImode
17102 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17107 if (!rtx_equal_p (operands[0], operands[1]))
17108 emit_move_insn (operands[0], operands[1]);
17110 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17112 emit_insn ((mode == DImode
17114 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17115 emit_insn ((mode == DImode
17117 : gen_lshrdi3) (high[0], high[0], operands[2]));
17119 /* Heh. By reversing the arguments, we can reuse this pattern. */
17120 if (TARGET_CMOVE && scratch)
17122 ix86_expand_clear (scratch);
17123 emit_insn ((mode == DImode
17124 ? gen_x86_shift_adj_1
17125 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17129 emit_insn ((mode == DImode
17130 ? gen_x86_shift_adj_2
17131 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17135 /* Predict just emitted jump instruction to be taken with probability PROB. */
17137 predict_jump (int prob)
17139 rtx insn = get_last_insn ();
17140 gcc_assert (JUMP_P (insn));
17141 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17144 /* Helper function for the string operations below. Dest VARIABLE whether
17145 it is aligned to VALUE bytes. If true, jump to the label. */
17147 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17149 rtx label = gen_label_rtx ();
17150 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17151 if (GET_MODE (variable) == DImode)
17152 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17154 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17155 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17158 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17160 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17164 /* Adjust COUNTER by the VALUE. */
17166 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17168 if (GET_MODE (countreg) == DImode)
17169 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17171 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17174 /* Zero extend possibly SImode EXP to Pmode register. */
17176 ix86_zero_extend_to_Pmode (rtx exp)
17179 if (GET_MODE (exp) == VOIDmode)
17180 return force_reg (Pmode, exp);
17181 if (GET_MODE (exp) == Pmode)
17182 return copy_to_mode_reg (Pmode, exp);
17183 r = gen_reg_rtx (Pmode);
17184 emit_insn (gen_zero_extendsidi2 (r, exp));
17188 /* Divide COUNTREG by SCALE. */
17190 scale_counter (rtx countreg, int scale)
17193 rtx piece_size_mask;
17197 if (CONST_INT_P (countreg))
17198 return GEN_INT (INTVAL (countreg) / scale);
17199 gcc_assert (REG_P (countreg));
17201 piece_size_mask = GEN_INT (scale - 1);
17202 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17203 GEN_INT (exact_log2 (scale)),
17204 NULL, 1, OPTAB_DIRECT);
17208 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17209 DImode for constant loop counts. */
17211 static enum machine_mode
17212 counter_mode (rtx count_exp)
17214 if (GET_MODE (count_exp) != VOIDmode)
17215 return GET_MODE (count_exp);
17216 if (GET_CODE (count_exp) != CONST_INT)
17218 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17223 /* When SRCPTR is non-NULL, output simple loop to move memory
17224 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17225 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17226 equivalent loop to set memory by VALUE (supposed to be in MODE).
17228 The size is rounded down to whole number of chunk size moved at once.
17229 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17233 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17234 rtx destptr, rtx srcptr, rtx value,
17235 rtx count, enum machine_mode mode, int unroll,
17238 rtx out_label, top_label, iter, tmp;
17239 enum machine_mode iter_mode = counter_mode (count);
17240 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17241 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17247 top_label = gen_label_rtx ();
17248 out_label = gen_label_rtx ();
17249 iter = gen_reg_rtx (iter_mode);
17251 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17252 NULL, 1, OPTAB_DIRECT);
17253 /* Those two should combine. */
17254 if (piece_size == const1_rtx)
17256 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17258 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17260 emit_move_insn (iter, const0_rtx);
17262 emit_label (top_label);
17264 tmp = convert_modes (Pmode, iter_mode, iter, true);
17265 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17266 destmem = change_address (destmem, mode, x_addr);
17270 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17271 srcmem = change_address (srcmem, mode, y_addr);
17273 /* When unrolling for chips that reorder memory reads and writes,
17274 we can save registers by using single temporary.
17275 Also using 4 temporaries is overkill in 32bit mode. */
17276 if (!TARGET_64BIT && 0)
17278 for (i = 0; i < unroll; i++)
17283 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17285 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17287 emit_move_insn (destmem, srcmem);
17293 gcc_assert (unroll <= 4);
17294 for (i = 0; i < unroll; i++)
17296 tmpreg[i] = gen_reg_rtx (mode);
17300 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17302 emit_move_insn (tmpreg[i], srcmem);
17304 for (i = 0; i < unroll; i++)
17309 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17311 emit_move_insn (destmem, tmpreg[i]);
17316 for (i = 0; i < unroll; i++)
17320 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17321 emit_move_insn (destmem, value);
17324 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17325 true, OPTAB_LIB_WIDEN);
17327 emit_move_insn (iter, tmp);
17329 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17331 if (expected_size != -1)
17333 expected_size /= GET_MODE_SIZE (mode) * unroll;
17334 if (expected_size == 0)
17336 else if (expected_size > REG_BR_PROB_BASE)
17337 predict_jump (REG_BR_PROB_BASE - 1);
17339 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17342 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17343 iter = ix86_zero_extend_to_Pmode (iter);
17344 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17345 true, OPTAB_LIB_WIDEN);
17346 if (tmp != destptr)
17347 emit_move_insn (destptr, tmp);
17350 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17351 true, OPTAB_LIB_WIDEN);
17353 emit_move_insn (srcptr, tmp);
17355 emit_label (out_label);
17358 /* Output "rep; mov" instruction.
17359 Arguments have same meaning as for previous function */
17361 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17362 rtx destptr, rtx srcptr,
17364 enum machine_mode mode)
17370 /* If the size is known, it is shorter to use rep movs. */
17371 if (mode == QImode && CONST_INT_P (count)
17372 && !(INTVAL (count) & 3))
17375 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17376 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17377 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17378 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17379 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17380 if (mode != QImode)
17382 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17383 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17384 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17385 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17386 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17387 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17391 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17392 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17394 if (CONST_INT_P (count))
17396 count = GEN_INT (INTVAL (count)
17397 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17398 destmem = shallow_copy_rtx (destmem);
17399 srcmem = shallow_copy_rtx (srcmem);
17400 set_mem_size (destmem, count);
17401 set_mem_size (srcmem, count);
17405 if (MEM_SIZE (destmem))
17406 set_mem_size (destmem, NULL_RTX);
17407 if (MEM_SIZE (srcmem))
17408 set_mem_size (srcmem, NULL_RTX);
17410 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17414 /* Output "rep; stos" instruction.
17415 Arguments have same meaning as for previous function */
17417 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17418 rtx count, enum machine_mode mode,
17424 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17425 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17426 value = force_reg (mode, gen_lowpart (mode, value));
17427 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17428 if (mode != QImode)
17430 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17431 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17432 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17435 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17436 if (orig_value == const0_rtx && CONST_INT_P (count))
17438 count = GEN_INT (INTVAL (count)
17439 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17440 destmem = shallow_copy_rtx (destmem);
17441 set_mem_size (destmem, count);
17443 else if (MEM_SIZE (destmem))
17444 set_mem_size (destmem, NULL_RTX);
17445 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17449 emit_strmov (rtx destmem, rtx srcmem,
17450 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17452 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17453 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17454 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17457 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17459 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17460 rtx destptr, rtx srcptr, rtx count, int max_size)
17463 if (CONST_INT_P (count))
17465 HOST_WIDE_INT countval = INTVAL (count);
17468 if ((countval & 0x10) && max_size > 16)
17472 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17473 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17476 gcc_unreachable ();
17479 if ((countval & 0x08) && max_size > 8)
17482 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17485 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17486 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17490 if ((countval & 0x04) && max_size > 4)
17492 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17495 if ((countval & 0x02) && max_size > 2)
17497 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17500 if ((countval & 0x01) && max_size > 1)
17502 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17509 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17510 count, 1, OPTAB_DIRECT);
17511 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17512 count, QImode, 1, 4);
17516 /* When there are stringops, we can cheaply increase dest and src pointers.
17517 Otherwise we save code size by maintaining offset (zero is readily
17518 available from preceding rep operation) and using x86 addressing modes.
17520 if (TARGET_SINGLE_STRINGOP)
17524 rtx label = ix86_expand_aligntest (count, 4, true);
17525 src = change_address (srcmem, SImode, srcptr);
17526 dest = change_address (destmem, SImode, destptr);
17527 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17528 emit_label (label);
17529 LABEL_NUSES (label) = 1;
17533 rtx label = ix86_expand_aligntest (count, 2, true);
17534 src = change_address (srcmem, HImode, srcptr);
17535 dest = change_address (destmem, HImode, destptr);
17536 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17537 emit_label (label);
17538 LABEL_NUSES (label) = 1;
17542 rtx label = ix86_expand_aligntest (count, 1, true);
17543 src = change_address (srcmem, QImode, srcptr);
17544 dest = change_address (destmem, QImode, destptr);
17545 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17546 emit_label (label);
17547 LABEL_NUSES (label) = 1;
17552 rtx offset = force_reg (Pmode, const0_rtx);
17557 rtx label = ix86_expand_aligntest (count, 4, true);
17558 src = change_address (srcmem, SImode, srcptr);
17559 dest = change_address (destmem, SImode, destptr);
17560 emit_move_insn (dest, src);
17561 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17562 true, OPTAB_LIB_WIDEN);
17564 emit_move_insn (offset, tmp);
17565 emit_label (label);
17566 LABEL_NUSES (label) = 1;
17570 rtx label = ix86_expand_aligntest (count, 2, true);
17571 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17572 src = change_address (srcmem, HImode, tmp);
17573 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17574 dest = change_address (destmem, HImode, tmp);
17575 emit_move_insn (dest, src);
17576 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17577 true, OPTAB_LIB_WIDEN);
17579 emit_move_insn (offset, tmp);
17580 emit_label (label);
17581 LABEL_NUSES (label) = 1;
17585 rtx label = ix86_expand_aligntest (count, 1, true);
17586 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17587 src = change_address (srcmem, QImode, tmp);
17588 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17589 dest = change_address (destmem, QImode, tmp);
17590 emit_move_insn (dest, src);
17591 emit_label (label);
17592 LABEL_NUSES (label) = 1;
17597 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17599 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17600 rtx count, int max_size)
17603 expand_simple_binop (counter_mode (count), AND, count,
17604 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17605 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17606 gen_lowpart (QImode, value), count, QImode,
17610 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17612 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17616 if (CONST_INT_P (count))
17618 HOST_WIDE_INT countval = INTVAL (count);
17621 if ((countval & 0x10) && max_size > 16)
17625 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17626 emit_insn (gen_strset (destptr, dest, value));
17627 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17628 emit_insn (gen_strset (destptr, dest, value));
17631 gcc_unreachable ();
17634 if ((countval & 0x08) && max_size > 8)
17638 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17639 emit_insn (gen_strset (destptr, dest, value));
17643 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17644 emit_insn (gen_strset (destptr, dest, value));
17645 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17646 emit_insn (gen_strset (destptr, dest, value));
17650 if ((countval & 0x04) && max_size > 4)
17652 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17653 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17656 if ((countval & 0x02) && max_size > 2)
17658 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17659 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17662 if ((countval & 0x01) && max_size > 1)
17664 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17665 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17672 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17677 rtx label = ix86_expand_aligntest (count, 16, true);
17680 dest = change_address (destmem, DImode, destptr);
17681 emit_insn (gen_strset (destptr, dest, value));
17682 emit_insn (gen_strset (destptr, dest, value));
17686 dest = change_address (destmem, SImode, destptr);
17687 emit_insn (gen_strset (destptr, dest, value));
17688 emit_insn (gen_strset (destptr, dest, value));
17689 emit_insn (gen_strset (destptr, dest, value));
17690 emit_insn (gen_strset (destptr, dest, value));
17692 emit_label (label);
17693 LABEL_NUSES (label) = 1;
17697 rtx label = ix86_expand_aligntest (count, 8, true);
17700 dest = change_address (destmem, DImode, destptr);
17701 emit_insn (gen_strset (destptr, dest, value));
17705 dest = change_address (destmem, SImode, destptr);
17706 emit_insn (gen_strset (destptr, dest, value));
17707 emit_insn (gen_strset (destptr, dest, value));
17709 emit_label (label);
17710 LABEL_NUSES (label) = 1;
17714 rtx label = ix86_expand_aligntest (count, 4, true);
17715 dest = change_address (destmem, SImode, destptr);
17716 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17717 emit_label (label);
17718 LABEL_NUSES (label) = 1;
17722 rtx label = ix86_expand_aligntest (count, 2, true);
17723 dest = change_address (destmem, HImode, destptr);
17724 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17725 emit_label (label);
17726 LABEL_NUSES (label) = 1;
17730 rtx label = ix86_expand_aligntest (count, 1, true);
17731 dest = change_address (destmem, QImode, destptr);
17732 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17733 emit_label (label);
17734 LABEL_NUSES (label) = 1;
17738 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17739 DESIRED_ALIGNMENT. */
17741 expand_movmem_prologue (rtx destmem, rtx srcmem,
17742 rtx destptr, rtx srcptr, rtx count,
17743 int align, int desired_alignment)
17745 if (align <= 1 && desired_alignment > 1)
17747 rtx label = ix86_expand_aligntest (destptr, 1, false);
17748 srcmem = change_address (srcmem, QImode, srcptr);
17749 destmem = change_address (destmem, QImode, destptr);
17750 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17751 ix86_adjust_counter (count, 1);
17752 emit_label (label);
17753 LABEL_NUSES (label) = 1;
17755 if (align <= 2 && desired_alignment > 2)
17757 rtx label = ix86_expand_aligntest (destptr, 2, false);
17758 srcmem = change_address (srcmem, HImode, srcptr);
17759 destmem = change_address (destmem, HImode, destptr);
17760 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17761 ix86_adjust_counter (count, 2);
17762 emit_label (label);
17763 LABEL_NUSES (label) = 1;
17765 if (align <= 4 && desired_alignment > 4)
17767 rtx label = ix86_expand_aligntest (destptr, 4, false);
17768 srcmem = change_address (srcmem, SImode, srcptr);
17769 destmem = change_address (destmem, SImode, destptr);
17770 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17771 ix86_adjust_counter (count, 4);
17772 emit_label (label);
17773 LABEL_NUSES (label) = 1;
17775 gcc_assert (desired_alignment <= 8);
17778 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17779 ALIGN_BYTES is how many bytes need to be copied. */
17781 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17782 int desired_align, int align_bytes)
17785 rtx src_size, dst_size;
17787 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17788 if (src_align_bytes >= 0)
17789 src_align_bytes = desired_align - src_align_bytes;
17790 src_size = MEM_SIZE (src);
17791 dst_size = MEM_SIZE (dst);
17792 if (align_bytes & 1)
17794 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17795 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17797 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17799 if (align_bytes & 2)
17801 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17802 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17803 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17804 set_mem_align (dst, 2 * BITS_PER_UNIT);
17805 if (src_align_bytes >= 0
17806 && (src_align_bytes & 1) == (align_bytes & 1)
17807 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17808 set_mem_align (src, 2 * BITS_PER_UNIT);
17810 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17812 if (align_bytes & 4)
17814 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17815 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17816 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17817 set_mem_align (dst, 4 * BITS_PER_UNIT);
17818 if (src_align_bytes >= 0)
17820 unsigned int src_align = 0;
17821 if ((src_align_bytes & 3) == (align_bytes & 3))
17823 else if ((src_align_bytes & 1) == (align_bytes & 1))
17825 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17826 set_mem_align (src, src_align * BITS_PER_UNIT);
17829 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17831 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17832 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17833 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17834 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17835 if (src_align_bytes >= 0)
17837 unsigned int src_align = 0;
17838 if ((src_align_bytes & 7) == (align_bytes & 7))
17840 else if ((src_align_bytes & 3) == (align_bytes & 3))
17842 else if ((src_align_bytes & 1) == (align_bytes & 1))
17844 if (src_align > (unsigned int) desired_align)
17845 src_align = desired_align;
17846 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17847 set_mem_align (src, src_align * BITS_PER_UNIT);
17850 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17852 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17857 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17858 DESIRED_ALIGNMENT. */
17860 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17861 int align, int desired_alignment)
17863 if (align <= 1 && desired_alignment > 1)
17865 rtx label = ix86_expand_aligntest (destptr, 1, false);
17866 destmem = change_address (destmem, QImode, destptr);
17867 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17868 ix86_adjust_counter (count, 1);
17869 emit_label (label);
17870 LABEL_NUSES (label) = 1;
17872 if (align <= 2 && desired_alignment > 2)
17874 rtx label = ix86_expand_aligntest (destptr, 2, false);
17875 destmem = change_address (destmem, HImode, destptr);
17876 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17877 ix86_adjust_counter (count, 2);
17878 emit_label (label);
17879 LABEL_NUSES (label) = 1;
17881 if (align <= 4 && desired_alignment > 4)
17883 rtx label = ix86_expand_aligntest (destptr, 4, false);
17884 destmem = change_address (destmem, SImode, destptr);
17885 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17886 ix86_adjust_counter (count, 4);
17887 emit_label (label);
17888 LABEL_NUSES (label) = 1;
17890 gcc_assert (desired_alignment <= 8);
17893 /* Set enough from DST to align DST known to by aligned by ALIGN to
17894 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17896 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17897 int desired_align, int align_bytes)
17900 rtx dst_size = MEM_SIZE (dst);
17901 if (align_bytes & 1)
17903 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17905 emit_insn (gen_strset (destreg, dst,
17906 gen_lowpart (QImode, value)));
17908 if (align_bytes & 2)
17910 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17911 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17912 set_mem_align (dst, 2 * BITS_PER_UNIT);
17914 emit_insn (gen_strset (destreg, dst,
17915 gen_lowpart (HImode, value)));
17917 if (align_bytes & 4)
17919 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17920 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17921 set_mem_align (dst, 4 * BITS_PER_UNIT);
17923 emit_insn (gen_strset (destreg, dst,
17924 gen_lowpart (SImode, value)));
17926 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17927 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17928 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17930 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17934 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17935 static enum stringop_alg
17936 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17937 int *dynamic_check)
17939 const struct stringop_algs * algs;
17940 bool optimize_for_speed;
17941 /* Algorithms using the rep prefix want at least edi and ecx;
17942 additionally, memset wants eax and memcpy wants esi. Don't
17943 consider such algorithms if the user has appropriated those
17944 registers for their own purposes. */
17945 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17947 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17949 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17950 || (alg != rep_prefix_1_byte \
17951 && alg != rep_prefix_4_byte \
17952 && alg != rep_prefix_8_byte))
17953 const struct processor_costs *cost;
17955 /* Even if the string operation call is cold, we still might spend a lot
17956 of time processing large blocks. */
17957 if (optimize_function_for_size_p (cfun)
17958 || (optimize_insn_for_size_p ()
17959 && expected_size != -1 && expected_size < 256))
17960 optimize_for_speed = false;
17962 optimize_for_speed = true;
17964 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17966 *dynamic_check = -1;
17968 algs = &cost->memset[TARGET_64BIT != 0];
17970 algs = &cost->memcpy[TARGET_64BIT != 0];
17971 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17972 return stringop_alg;
17973 /* rep; movq or rep; movl is the smallest variant. */
17974 else if (!optimize_for_speed)
17976 if (!count || (count & 3))
17977 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17979 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17981 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17983 else if (expected_size != -1 && expected_size < 4)
17984 return loop_1_byte;
17985 else if (expected_size != -1)
17988 enum stringop_alg alg = libcall;
17989 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17991 /* We get here if the algorithms that were not libcall-based
17992 were rep-prefix based and we are unable to use rep prefixes
17993 based on global register usage. Break out of the loop and
17994 use the heuristic below. */
17995 if (algs->size[i].max == 0)
17997 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17999 enum stringop_alg candidate = algs->size[i].alg;
18001 if (candidate != libcall && ALG_USABLE_P (candidate))
18003 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18004 last non-libcall inline algorithm. */
18005 if (TARGET_INLINE_ALL_STRINGOPS)
18007 /* When the current size is best to be copied by a libcall,
18008 but we are still forced to inline, run the heuristic below
18009 that will pick code for medium sized blocks. */
18010 if (alg != libcall)
18014 else if (ALG_USABLE_P (candidate))
18018 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18020 /* When asked to inline the call anyway, try to pick meaningful choice.
18021 We look for maximal size of block that is faster to copy by hand and
18022 take blocks of at most of that size guessing that average size will
18023 be roughly half of the block.
18025 If this turns out to be bad, we might simply specify the preferred
18026 choice in ix86_costs. */
18027 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18028 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18031 enum stringop_alg alg;
18033 bool any_alg_usable_p = true;
18035 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18037 enum stringop_alg candidate = algs->size[i].alg;
18038 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18040 if (candidate != libcall && candidate
18041 && ALG_USABLE_P (candidate))
18042 max = algs->size[i].max;
18044 /* If there aren't any usable algorithms, then recursing on
18045 smaller sizes isn't going to find anything. Just return the
18046 simple byte-at-a-time copy loop. */
18047 if (!any_alg_usable_p)
18049 /* Pick something reasonable. */
18050 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18051 *dynamic_check = 128;
18052 return loop_1_byte;
18056 alg = decide_alg (count, max / 2, memset, dynamic_check);
18057 gcc_assert (*dynamic_check == -1);
18058 gcc_assert (alg != libcall);
18059 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18060 *dynamic_check = max;
18063 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18064 #undef ALG_USABLE_P
18067 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18068 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18070 decide_alignment (int align,
18071 enum stringop_alg alg,
18074 int desired_align = 0;
18078 gcc_unreachable ();
18080 case unrolled_loop:
18081 desired_align = GET_MODE_SIZE (Pmode);
18083 case rep_prefix_8_byte:
18086 case rep_prefix_4_byte:
18087 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18088 copying whole cacheline at once. */
18089 if (TARGET_PENTIUMPRO)
18094 case rep_prefix_1_byte:
18095 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18096 copying whole cacheline at once. */
18097 if (TARGET_PENTIUMPRO)
18111 if (desired_align < align)
18112 desired_align = align;
18113 if (expected_size != -1 && expected_size < 4)
18114 desired_align = align;
18115 return desired_align;
18118 /* Return the smallest power of 2 greater than VAL. */
18120 smallest_pow2_greater_than (int val)
18128 /* Expand string move (memcpy) operation. Use i386 string operations when
18129 profitable. expand_setmem contains similar code. The code depends upon
18130 architecture, block size and alignment, but always has the same
18133 1) Prologue guard: Conditional that jumps up to epilogues for small
18134 blocks that can be handled by epilogue alone. This is faster but
18135 also needed for correctness, since prologue assume the block is larger
18136 than the desired alignment.
18138 Optional dynamic check for size and libcall for large
18139 blocks is emitted here too, with -minline-stringops-dynamically.
18141 2) Prologue: copy first few bytes in order to get destination aligned
18142 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18143 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18144 We emit either a jump tree on power of two sized blocks, or a byte loop.
18146 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18147 with specified algorithm.
18149 4) Epilogue: code copying tail of the block that is too small to be
18150 handled by main body (or up to size guarded by prologue guard). */
18153 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18154 rtx expected_align_exp, rtx expected_size_exp)
18160 rtx jump_around_label = NULL;
18161 HOST_WIDE_INT align = 1;
18162 unsigned HOST_WIDE_INT count = 0;
18163 HOST_WIDE_INT expected_size = -1;
18164 int size_needed = 0, epilogue_size_needed;
18165 int desired_align = 0, align_bytes = 0;
18166 enum stringop_alg alg;
18168 bool need_zero_guard = false;
18170 if (CONST_INT_P (align_exp))
18171 align = INTVAL (align_exp);
18172 /* i386 can do misaligned access on reasonably increased cost. */
18173 if (CONST_INT_P (expected_align_exp)
18174 && INTVAL (expected_align_exp) > align)
18175 align = INTVAL (expected_align_exp);
18176 /* ALIGN is the minimum of destination and source alignment, but we care here
18177 just about destination alignment. */
18178 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18179 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18181 if (CONST_INT_P (count_exp))
18182 count = expected_size = INTVAL (count_exp);
18183 if (CONST_INT_P (expected_size_exp) && count == 0)
18184 expected_size = INTVAL (expected_size_exp);
18186 /* Make sure we don't need to care about overflow later on. */
18187 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18190 /* Step 0: Decide on preferred algorithm, desired alignment and
18191 size of chunks to be copied by main loop. */
18193 alg = decide_alg (count, expected_size, false, &dynamic_check);
18194 desired_align = decide_alignment (align, alg, expected_size);
18196 if (!TARGET_ALIGN_STRINGOPS)
18197 align = desired_align;
18199 if (alg == libcall)
18201 gcc_assert (alg != no_stringop);
18203 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18204 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18205 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18210 gcc_unreachable ();
18212 need_zero_guard = true;
18213 size_needed = GET_MODE_SIZE (Pmode);
18215 case unrolled_loop:
18216 need_zero_guard = true;
18217 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18219 case rep_prefix_8_byte:
18222 case rep_prefix_4_byte:
18225 case rep_prefix_1_byte:
18229 need_zero_guard = true;
18234 epilogue_size_needed = size_needed;
18236 /* Step 1: Prologue guard. */
18238 /* Alignment code needs count to be in register. */
18239 if (CONST_INT_P (count_exp) && desired_align > align)
18241 if (INTVAL (count_exp) > desired_align
18242 && INTVAL (count_exp) > size_needed)
18245 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18246 if (align_bytes <= 0)
18249 align_bytes = desired_align - align_bytes;
18251 if (align_bytes == 0)
18252 count_exp = force_reg (counter_mode (count_exp), count_exp);
18254 gcc_assert (desired_align >= 1 && align >= 1);
18256 /* Ensure that alignment prologue won't copy past end of block. */
18257 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18259 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18260 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18261 Make sure it is power of 2. */
18262 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18266 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18268 /* If main algorithm works on QImode, no epilogue is needed.
18269 For small sizes just don't align anything. */
18270 if (size_needed == 1)
18271 desired_align = align;
18278 label = gen_label_rtx ();
18279 emit_cmp_and_jump_insns (count_exp,
18280 GEN_INT (epilogue_size_needed),
18281 LTU, 0, counter_mode (count_exp), 1, label);
18282 if (expected_size == -1 || expected_size < epilogue_size_needed)
18283 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18285 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18289 /* Emit code to decide on runtime whether library call or inline should be
18291 if (dynamic_check != -1)
18293 if (CONST_INT_P (count_exp))
18295 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18297 emit_block_move_via_libcall (dst, src, count_exp, false);
18298 count_exp = const0_rtx;
18304 rtx hot_label = gen_label_rtx ();
18305 jump_around_label = gen_label_rtx ();
18306 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18307 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18308 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18309 emit_block_move_via_libcall (dst, src, count_exp, false);
18310 emit_jump (jump_around_label);
18311 emit_label (hot_label);
18315 /* Step 2: Alignment prologue. */
18317 if (desired_align > align)
18319 if (align_bytes == 0)
18321 /* Except for the first move in epilogue, we no longer know
18322 constant offset in aliasing info. It don't seems to worth
18323 the pain to maintain it for the first move, so throw away
18325 src = change_address (src, BLKmode, srcreg);
18326 dst = change_address (dst, BLKmode, destreg);
18327 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18332 /* If we know how many bytes need to be stored before dst is
18333 sufficiently aligned, maintain aliasing info accurately. */
18334 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18335 desired_align, align_bytes);
18336 count_exp = plus_constant (count_exp, -align_bytes);
18337 count -= align_bytes;
18339 if (need_zero_guard
18340 && (count < (unsigned HOST_WIDE_INT) size_needed
18341 || (align_bytes == 0
18342 && count < ((unsigned HOST_WIDE_INT) size_needed
18343 + desired_align - align))))
18345 /* It is possible that we copied enough so the main loop will not
18347 gcc_assert (size_needed > 1);
18348 if (label == NULL_RTX)
18349 label = gen_label_rtx ();
18350 emit_cmp_and_jump_insns (count_exp,
18351 GEN_INT (size_needed),
18352 LTU, 0, counter_mode (count_exp), 1, label);
18353 if (expected_size == -1
18354 || expected_size < (desired_align - align) / 2 + size_needed)
18355 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18357 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18360 if (label && size_needed == 1)
18362 emit_label (label);
18363 LABEL_NUSES (label) = 1;
18365 epilogue_size_needed = 1;
18367 else if (label == NULL_RTX)
18368 epilogue_size_needed = size_needed;
18370 /* Step 3: Main loop. */
18376 gcc_unreachable ();
18378 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18379 count_exp, QImode, 1, expected_size);
18382 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18383 count_exp, Pmode, 1, expected_size);
18385 case unrolled_loop:
18386 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18387 registers for 4 temporaries anyway. */
18388 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18389 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18392 case rep_prefix_8_byte:
18393 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18396 case rep_prefix_4_byte:
18397 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18400 case rep_prefix_1_byte:
18401 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18405 /* Adjust properly the offset of src and dest memory for aliasing. */
18406 if (CONST_INT_P (count_exp))
18408 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18409 (count / size_needed) * size_needed);
18410 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18411 (count / size_needed) * size_needed);
18415 src = change_address (src, BLKmode, srcreg);
18416 dst = change_address (dst, BLKmode, destreg);
18419 /* Step 4: Epilogue to copy the remaining bytes. */
18423 /* When the main loop is done, COUNT_EXP might hold original count,
18424 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18425 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18426 bytes. Compensate if needed. */
18428 if (size_needed < epilogue_size_needed)
18431 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18432 GEN_INT (size_needed - 1), count_exp, 1,
18434 if (tmp != count_exp)
18435 emit_move_insn (count_exp, tmp);
18437 emit_label (label);
18438 LABEL_NUSES (label) = 1;
18441 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18442 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18443 epilogue_size_needed);
18444 if (jump_around_label)
18445 emit_label (jump_around_label);
18449 /* Helper function for memcpy. For QImode value 0xXY produce
18450 0xXYXYXYXY of wide specified by MODE. This is essentially
18451 a * 0x10101010, but we can do slightly better than
18452 synth_mult by unwinding the sequence by hand on CPUs with
18455 promote_duplicated_reg (enum machine_mode mode, rtx val)
18457 enum machine_mode valmode = GET_MODE (val);
18459 int nops = mode == DImode ? 3 : 2;
18461 gcc_assert (mode == SImode || mode == DImode);
18462 if (val == const0_rtx)
18463 return copy_to_mode_reg (mode, const0_rtx);
18464 if (CONST_INT_P (val))
18466 HOST_WIDE_INT v = INTVAL (val) & 255;
18470 if (mode == DImode)
18471 v |= (v << 16) << 16;
18472 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18475 if (valmode == VOIDmode)
18477 if (valmode != QImode)
18478 val = gen_lowpart (QImode, val);
18479 if (mode == QImode)
18481 if (!TARGET_PARTIAL_REG_STALL)
18483 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18484 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18485 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18486 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18488 rtx reg = convert_modes (mode, QImode, val, true);
18489 tmp = promote_duplicated_reg (mode, const1_rtx);
18490 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18495 rtx reg = convert_modes (mode, QImode, val, true);
18497 if (!TARGET_PARTIAL_REG_STALL)
18498 if (mode == SImode)
18499 emit_insn (gen_movsi_insv_1 (reg, reg));
18501 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18504 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18505 NULL, 1, OPTAB_DIRECT);
18507 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18509 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18510 NULL, 1, OPTAB_DIRECT);
18511 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18512 if (mode == SImode)
18514 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18515 NULL, 1, OPTAB_DIRECT);
18516 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18521 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18522 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18523 alignment from ALIGN to DESIRED_ALIGN. */
18525 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18530 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18531 promoted_val = promote_duplicated_reg (DImode, val);
18532 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18533 promoted_val = promote_duplicated_reg (SImode, val);
18534 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18535 promoted_val = promote_duplicated_reg (HImode, val);
18537 promoted_val = val;
18539 return promoted_val;
18542 /* Expand string clear operation (bzero). Use i386 string operations when
18543 profitable. See expand_movmem comment for explanation of individual
18544 steps performed. */
18546 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18547 rtx expected_align_exp, rtx expected_size_exp)
18552 rtx jump_around_label = NULL;
18553 HOST_WIDE_INT align = 1;
18554 unsigned HOST_WIDE_INT count = 0;
18555 HOST_WIDE_INT expected_size = -1;
18556 int size_needed = 0, epilogue_size_needed;
18557 int desired_align = 0, align_bytes = 0;
18558 enum stringop_alg alg;
18559 rtx promoted_val = NULL;
18560 bool force_loopy_epilogue = false;
18562 bool need_zero_guard = false;
18564 if (CONST_INT_P (align_exp))
18565 align = INTVAL (align_exp);
18566 /* i386 can do misaligned access on reasonably increased cost. */
18567 if (CONST_INT_P (expected_align_exp)
18568 && INTVAL (expected_align_exp) > align)
18569 align = INTVAL (expected_align_exp);
18570 if (CONST_INT_P (count_exp))
18571 count = expected_size = INTVAL (count_exp);
18572 if (CONST_INT_P (expected_size_exp) && count == 0)
18573 expected_size = INTVAL (expected_size_exp);
18575 /* Make sure we don't need to care about overflow later on. */
18576 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18579 /* Step 0: Decide on preferred algorithm, desired alignment and
18580 size of chunks to be copied by main loop. */
18582 alg = decide_alg (count, expected_size, true, &dynamic_check);
18583 desired_align = decide_alignment (align, alg, expected_size);
18585 if (!TARGET_ALIGN_STRINGOPS)
18586 align = desired_align;
18588 if (alg == libcall)
18590 gcc_assert (alg != no_stringop);
18592 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18593 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18598 gcc_unreachable ();
18600 need_zero_guard = true;
18601 size_needed = GET_MODE_SIZE (Pmode);
18603 case unrolled_loop:
18604 need_zero_guard = true;
18605 size_needed = GET_MODE_SIZE (Pmode) * 4;
18607 case rep_prefix_8_byte:
18610 case rep_prefix_4_byte:
18613 case rep_prefix_1_byte:
18617 need_zero_guard = true;
18621 epilogue_size_needed = size_needed;
18623 /* Step 1: Prologue guard. */
18625 /* Alignment code needs count to be in register. */
18626 if (CONST_INT_P (count_exp) && desired_align > align)
18628 if (INTVAL (count_exp) > desired_align
18629 && INTVAL (count_exp) > size_needed)
18632 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18633 if (align_bytes <= 0)
18636 align_bytes = desired_align - align_bytes;
18638 if (align_bytes == 0)
18640 enum machine_mode mode = SImode;
18641 if (TARGET_64BIT && (count & ~0xffffffff))
18643 count_exp = force_reg (mode, count_exp);
18646 /* Do the cheap promotion to allow better CSE across the
18647 main loop and epilogue (ie one load of the big constant in the
18648 front of all code. */
18649 if (CONST_INT_P (val_exp))
18650 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18651 desired_align, align);
18652 /* Ensure that alignment prologue won't copy past end of block. */
18653 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18655 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18656 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18657 Make sure it is power of 2. */
18658 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18660 /* To improve performance of small blocks, we jump around the VAL
18661 promoting mode. This mean that if the promoted VAL is not constant,
18662 we might not use it in the epilogue and have to use byte
18664 if (epilogue_size_needed > 2 && !promoted_val)
18665 force_loopy_epilogue = true;
18668 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18670 /* If main algorithm works on QImode, no epilogue is needed.
18671 For small sizes just don't align anything. */
18672 if (size_needed == 1)
18673 desired_align = align;
18680 label = gen_label_rtx ();
18681 emit_cmp_and_jump_insns (count_exp,
18682 GEN_INT (epilogue_size_needed),
18683 LTU, 0, counter_mode (count_exp), 1, label);
18684 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18685 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18687 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18690 if (dynamic_check != -1)
18692 rtx hot_label = gen_label_rtx ();
18693 jump_around_label = gen_label_rtx ();
18694 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18695 LEU, 0, counter_mode (count_exp), 1, hot_label);
18696 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18697 set_storage_via_libcall (dst, count_exp, val_exp, false);
18698 emit_jump (jump_around_label);
18699 emit_label (hot_label);
18702 /* Step 2: Alignment prologue. */
18704 /* Do the expensive promotion once we branched off the small blocks. */
18706 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18707 desired_align, align);
18708 gcc_assert (desired_align >= 1 && align >= 1);
18710 if (desired_align > align)
18712 if (align_bytes == 0)
18714 /* Except for the first move in epilogue, we no longer know
18715 constant offset in aliasing info. It don't seems to worth
18716 the pain to maintain it for the first move, so throw away
18718 dst = change_address (dst, BLKmode, destreg);
18719 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18724 /* If we know how many bytes need to be stored before dst is
18725 sufficiently aligned, maintain aliasing info accurately. */
18726 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18727 desired_align, align_bytes);
18728 count_exp = plus_constant (count_exp, -align_bytes);
18729 count -= align_bytes;
18731 if (need_zero_guard
18732 && (count < (unsigned HOST_WIDE_INT) size_needed
18733 || (align_bytes == 0
18734 && count < ((unsigned HOST_WIDE_INT) size_needed
18735 + desired_align - align))))
18737 /* It is possible that we copied enough so the main loop will not
18739 gcc_assert (size_needed > 1);
18740 if (label == NULL_RTX)
18741 label = gen_label_rtx ();
18742 emit_cmp_and_jump_insns (count_exp,
18743 GEN_INT (size_needed),
18744 LTU, 0, counter_mode (count_exp), 1, label);
18745 if (expected_size == -1
18746 || expected_size < (desired_align - align) / 2 + size_needed)
18747 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18749 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18752 if (label && size_needed == 1)
18754 emit_label (label);
18755 LABEL_NUSES (label) = 1;
18757 promoted_val = val_exp;
18758 epilogue_size_needed = 1;
18760 else if (label == NULL_RTX)
18761 epilogue_size_needed = size_needed;
18763 /* Step 3: Main loop. */
18769 gcc_unreachable ();
18771 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18772 count_exp, QImode, 1, expected_size);
18775 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18776 count_exp, Pmode, 1, expected_size);
18778 case unrolled_loop:
18779 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18780 count_exp, Pmode, 4, expected_size);
18782 case rep_prefix_8_byte:
18783 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18786 case rep_prefix_4_byte:
18787 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18790 case rep_prefix_1_byte:
18791 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18795 /* Adjust properly the offset of src and dest memory for aliasing. */
18796 if (CONST_INT_P (count_exp))
18797 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18798 (count / size_needed) * size_needed);
18800 dst = change_address (dst, BLKmode, destreg);
18802 /* Step 4: Epilogue to copy the remaining bytes. */
18806 /* When the main loop is done, COUNT_EXP might hold original count,
18807 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18808 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18809 bytes. Compensate if needed. */
18811 if (size_needed < epilogue_size_needed)
18814 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18815 GEN_INT (size_needed - 1), count_exp, 1,
18817 if (tmp != count_exp)
18818 emit_move_insn (count_exp, tmp);
18820 emit_label (label);
18821 LABEL_NUSES (label) = 1;
18824 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18826 if (force_loopy_epilogue)
18827 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18828 epilogue_size_needed);
18830 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18831 epilogue_size_needed);
18833 if (jump_around_label)
18834 emit_label (jump_around_label);
18838 /* Expand the appropriate insns for doing strlen if not just doing
18841 out = result, initialized with the start address
18842 align_rtx = alignment of the address.
18843 scratch = scratch register, initialized with the startaddress when
18844 not aligned, otherwise undefined
18846 This is just the body. It needs the initializations mentioned above and
18847 some address computing at the end. These things are done in i386.md. */
18850 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18854 rtx align_2_label = NULL_RTX;
18855 rtx align_3_label = NULL_RTX;
18856 rtx align_4_label = gen_label_rtx ();
18857 rtx end_0_label = gen_label_rtx ();
18859 rtx tmpreg = gen_reg_rtx (SImode);
18860 rtx scratch = gen_reg_rtx (SImode);
18864 if (CONST_INT_P (align_rtx))
18865 align = INTVAL (align_rtx);
18867 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18869 /* Is there a known alignment and is it less than 4? */
18872 rtx scratch1 = gen_reg_rtx (Pmode);
18873 emit_move_insn (scratch1, out);
18874 /* Is there a known alignment and is it not 2? */
18877 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18878 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18880 /* Leave just the 3 lower bits. */
18881 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18882 NULL_RTX, 0, OPTAB_WIDEN);
18884 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18885 Pmode, 1, align_4_label);
18886 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18887 Pmode, 1, align_2_label);
18888 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18889 Pmode, 1, align_3_label);
18893 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18894 check if is aligned to 4 - byte. */
18896 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18897 NULL_RTX, 0, OPTAB_WIDEN);
18899 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18900 Pmode, 1, align_4_label);
18903 mem = change_address (src, QImode, out);
18905 /* Now compare the bytes. */
18907 /* Compare the first n unaligned byte on a byte per byte basis. */
18908 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18909 QImode, 1, end_0_label);
18911 /* Increment the address. */
18912 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18914 /* Not needed with an alignment of 2 */
18917 emit_label (align_2_label);
18919 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18922 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18924 emit_label (align_3_label);
18927 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18930 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18933 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18934 align this loop. It gives only huge programs, but does not help to
18936 emit_label (align_4_label);
18938 mem = change_address (src, SImode, out);
18939 emit_move_insn (scratch, mem);
18940 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18942 /* This formula yields a nonzero result iff one of the bytes is zero.
18943 This saves three branches inside loop and many cycles. */
18945 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18946 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18947 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18948 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18949 gen_int_mode (0x80808080, SImode)));
18950 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18955 rtx reg = gen_reg_rtx (SImode);
18956 rtx reg2 = gen_reg_rtx (Pmode);
18957 emit_move_insn (reg, tmpreg);
18958 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18960 /* If zero is not in the first two bytes, move two bytes forward. */
18961 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18962 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18963 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18964 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18965 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18968 /* Emit lea manually to avoid clobbering of flags. */
18969 emit_insn (gen_rtx_SET (SImode, reg2,
18970 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18972 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18973 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18974 emit_insn (gen_rtx_SET (VOIDmode, out,
18975 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18982 rtx end_2_label = gen_label_rtx ();
18983 /* Is zero in the first two bytes? */
18985 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18986 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18987 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18988 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18989 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18991 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18992 JUMP_LABEL (tmp) = end_2_label;
18994 /* Not in the first two. Move two bytes forward. */
18995 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18996 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18998 emit_label (end_2_label);
19002 /* Avoid branch in fixing the byte. */
19003 tmpreg = gen_lowpart (QImode, tmpreg);
19004 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19005 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19006 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19008 emit_label (end_0_label);
19011 /* Expand strlen. */
19014 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19016 rtx addr, scratch1, scratch2, scratch3, scratch4;
19018 /* The generic case of strlen expander is long. Avoid it's
19019 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19021 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19022 && !TARGET_INLINE_ALL_STRINGOPS
19023 && !optimize_insn_for_size_p ()
19024 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19027 addr = force_reg (Pmode, XEXP (src, 0));
19028 scratch1 = gen_reg_rtx (Pmode);
19030 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19031 && !optimize_insn_for_size_p ())
19033 /* Well it seems that some optimizer does not combine a call like
19034 foo(strlen(bar), strlen(bar));
19035 when the move and the subtraction is done here. It does calculate
19036 the length just once when these instructions are done inside of
19037 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19038 often used and I use one fewer register for the lifetime of
19039 output_strlen_unroll() this is better. */
19041 emit_move_insn (out, addr);
19043 ix86_expand_strlensi_unroll_1 (out, src, align);
19045 /* strlensi_unroll_1 returns the address of the zero at the end of
19046 the string, like memchr(), so compute the length by subtracting
19047 the start address. */
19048 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19054 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19055 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19058 scratch2 = gen_reg_rtx (Pmode);
19059 scratch3 = gen_reg_rtx (Pmode);
19060 scratch4 = force_reg (Pmode, constm1_rtx);
19062 emit_move_insn (scratch3, addr);
19063 eoschar = force_reg (QImode, eoschar);
19065 src = replace_equiv_address_nv (src, scratch3);
19067 /* If .md starts supporting :P, this can be done in .md. */
19068 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19069 scratch4), UNSPEC_SCAS);
19070 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19071 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19072 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19077 /* For given symbol (function) construct code to compute address of it's PLT
19078 entry in large x86-64 PIC model. */
19080 construct_plt_address (rtx symbol)
19082 rtx tmp = gen_reg_rtx (Pmode);
19083 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19085 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19086 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19088 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19089 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19094 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19096 rtx pop, int sibcall)
19098 rtx use = NULL, call;
19100 if (pop == const0_rtx)
19102 gcc_assert (!TARGET_64BIT || !pop);
19104 if (TARGET_MACHO && !TARGET_64BIT)
19107 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19108 fnaddr = machopic_indirect_call_target (fnaddr);
19113 /* Static functions and indirect calls don't need the pic register. */
19114 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19115 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19116 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19117 use_reg (&use, pic_offset_table_rtx);
19120 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19122 rtx al = gen_rtx_REG (QImode, AX_REG);
19123 emit_move_insn (al, callarg2);
19124 use_reg (&use, al);
19127 if (ix86_cmodel == CM_LARGE_PIC
19128 && GET_CODE (fnaddr) == MEM
19129 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19130 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19131 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19132 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19134 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19135 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19137 if (sibcall && TARGET_64BIT
19138 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19141 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19142 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19143 emit_move_insn (fnaddr, addr);
19144 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19147 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19149 call = gen_rtx_SET (VOIDmode, retval, call);
19152 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19153 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19154 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19157 && ix86_cfun_abi () == MS_ABI
19158 && (!callarg2 || INTVAL (callarg2) != -2))
19160 /* We need to represent that SI and DI registers are clobbered
19162 static int clobbered_registers[] = {
19163 XMM6_REG, XMM7_REG, XMM8_REG,
19164 XMM9_REG, XMM10_REG, XMM11_REG,
19165 XMM12_REG, XMM13_REG, XMM14_REG,
19166 XMM15_REG, SI_REG, DI_REG
19169 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19170 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19171 UNSPEC_MS_TO_SYSV_CALL);
19175 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19176 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19179 (SSE_REGNO_P (clobbered_registers[i])
19181 clobbered_registers[i]));
19183 call = gen_rtx_PARALLEL (VOIDmode,
19184 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19188 call = emit_call_insn (call);
19190 CALL_INSN_FUNCTION_USAGE (call) = use;
19194 /* Clear stack slot assignments remembered from previous functions.
19195 This is called from INIT_EXPANDERS once before RTL is emitted for each
19198 static struct machine_function *
19199 ix86_init_machine_status (void)
19201 struct machine_function *f;
19203 f = GGC_CNEW (struct machine_function);
19204 f->use_fast_prologue_epilogue_nregs = -1;
19205 f->tls_descriptor_call_expanded_p = 0;
19206 f->call_abi = ix86_abi;
19211 /* Return a MEM corresponding to a stack slot with mode MODE.
19212 Allocate a new slot if necessary.
19214 The RTL for a function can have several slots available: N is
19215 which slot to use. */
19218 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19220 struct stack_local_entry *s;
19222 gcc_assert (n < MAX_386_STACK_LOCALS);
19224 /* Virtual slot is valid only before vregs are instantiated. */
19225 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19227 for (s = ix86_stack_locals; s; s = s->next)
19228 if (s->mode == mode && s->n == n)
19229 return copy_rtx (s->rtl);
19231 s = (struct stack_local_entry *)
19232 ggc_alloc (sizeof (struct stack_local_entry));
19235 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19237 s->next = ix86_stack_locals;
19238 ix86_stack_locals = s;
19242 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19244 static GTY(()) rtx ix86_tls_symbol;
19246 ix86_tls_get_addr (void)
19249 if (!ix86_tls_symbol)
19251 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19252 (TARGET_ANY_GNU_TLS
19254 ? "___tls_get_addr"
19255 : "__tls_get_addr");
19258 return ix86_tls_symbol;
19261 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19263 static GTY(()) rtx ix86_tls_module_base_symbol;
19265 ix86_tls_module_base (void)
19268 if (!ix86_tls_module_base_symbol)
19270 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19271 "_TLS_MODULE_BASE_");
19272 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19273 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19276 return ix86_tls_module_base_symbol;
19279 /* Calculate the length of the memory address in the instruction
19280 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19283 memory_address_length (rtx addr)
19285 struct ix86_address parts;
19286 rtx base, index, disp;
19290 if (GET_CODE (addr) == PRE_DEC
19291 || GET_CODE (addr) == POST_INC
19292 || GET_CODE (addr) == PRE_MODIFY
19293 || GET_CODE (addr) == POST_MODIFY)
19296 ok = ix86_decompose_address (addr, &parts);
19299 if (parts.base && GET_CODE (parts.base) == SUBREG)
19300 parts.base = SUBREG_REG (parts.base);
19301 if (parts.index && GET_CODE (parts.index) == SUBREG)
19302 parts.index = SUBREG_REG (parts.index);
19305 index = parts.index;
19310 - esp as the base always wants an index,
19311 - ebp as the base always wants a displacement. */
19313 /* Register Indirect. */
19314 if (base && !index && !disp)
19316 /* esp (for its index) and ebp (for its displacement) need
19317 the two-byte modrm form. */
19318 if (addr == stack_pointer_rtx
19319 || addr == arg_pointer_rtx
19320 || addr == frame_pointer_rtx
19321 || addr == hard_frame_pointer_rtx)
19325 /* Direct Addressing. */
19326 else if (disp && !base && !index)
19331 /* Find the length of the displacement constant. */
19334 if (base && satisfies_constraint_K (disp))
19339 /* ebp always wants a displacement. */
19340 else if (base == hard_frame_pointer_rtx)
19343 /* An index requires the two-byte modrm form.... */
19345 /* ...like esp, which always wants an index. */
19346 || base == stack_pointer_rtx
19347 || base == arg_pointer_rtx
19348 || base == frame_pointer_rtx)
19355 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19356 is set, expect that insn have 8bit immediate alternative. */
19358 ix86_attr_length_immediate_default (rtx insn, int shortform)
19362 extract_insn_cached (insn);
19363 for (i = recog_data.n_operands - 1; i >= 0; --i)
19364 if (CONSTANT_P (recog_data.operand[i]))
19367 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19371 switch (get_attr_mode (insn))
19382 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19387 fatal_insn ("unknown insn mode", insn);
19393 /* Compute default value for "length_address" attribute. */
19395 ix86_attr_length_address_default (rtx insn)
19399 if (get_attr_type (insn) == TYPE_LEA)
19401 rtx set = PATTERN (insn);
19403 if (GET_CODE (set) == PARALLEL)
19404 set = XVECEXP (set, 0, 0);
19406 gcc_assert (GET_CODE (set) == SET);
19408 return memory_address_length (SET_SRC (set));
19411 extract_insn_cached (insn);
19412 for (i = recog_data.n_operands - 1; i >= 0; --i)
19413 if (MEM_P (recog_data.operand[i]))
19415 return memory_address_length (XEXP (recog_data.operand[i], 0));
19421 /* Compute default value for "length_vex" attribute. It includes
19422 2 or 3 byte VEX prefix and 1 opcode byte. */
19425 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19430 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19431 byte VEX prefix. */
19432 if (!has_0f_opcode || has_vex_w)
19435 /* We can always use 2 byte VEX prefix in 32bit. */
19439 extract_insn_cached (insn);
19441 for (i = recog_data.n_operands - 1; i >= 0; --i)
19442 if (REG_P (recog_data.operand[i]))
19444 /* REX.W bit uses 3 byte VEX prefix. */
19445 if (GET_MODE (recog_data.operand[i]) == DImode)
19450 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19451 if (MEM_P (recog_data.operand[i])
19452 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19459 /* Return the maximum number of instructions a cpu can issue. */
19462 ix86_issue_rate (void)
19466 case PROCESSOR_PENTIUM:
19467 case PROCESSOR_ATOM:
19471 case PROCESSOR_PENTIUMPRO:
19472 case PROCESSOR_PENTIUM4:
19473 case PROCESSOR_ATHLON:
19475 case PROCESSOR_AMDFAM10:
19476 case PROCESSOR_NOCONA:
19477 case PROCESSOR_GENERIC32:
19478 case PROCESSOR_GENERIC64:
19481 case PROCESSOR_CORE2:
19489 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19490 by DEP_INSN and nothing set by DEP_INSN. */
19493 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19497 /* Simplify the test for uninteresting insns. */
19498 if (insn_type != TYPE_SETCC
19499 && insn_type != TYPE_ICMOV
19500 && insn_type != TYPE_FCMOV
19501 && insn_type != TYPE_IBR)
19504 if ((set = single_set (dep_insn)) != 0)
19506 set = SET_DEST (set);
19509 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19510 && XVECLEN (PATTERN (dep_insn), 0) == 2
19511 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19512 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19514 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19515 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19520 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19523 /* This test is true if the dependent insn reads the flags but
19524 not any other potentially set register. */
19525 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19528 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19534 /* Return true iff USE_INSN has a memory address with operands set by
19538 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19541 extract_insn_cached (use_insn);
19542 for (i = recog_data.n_operands - 1; i >= 0; --i)
19543 if (MEM_P (recog_data.operand[i]))
19545 rtx addr = XEXP (recog_data.operand[i], 0);
19546 return modified_in_p (addr, set_insn) != 0;
19552 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19554 enum attr_type insn_type, dep_insn_type;
19555 enum attr_memory memory;
19557 int dep_insn_code_number;
19559 /* Anti and output dependencies have zero cost on all CPUs. */
19560 if (REG_NOTE_KIND (link) != 0)
19563 dep_insn_code_number = recog_memoized (dep_insn);
19565 /* If we can't recognize the insns, we can't really do anything. */
19566 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19569 insn_type = get_attr_type (insn);
19570 dep_insn_type = get_attr_type (dep_insn);
19574 case PROCESSOR_PENTIUM:
19575 /* Address Generation Interlock adds a cycle of latency. */
19576 if (insn_type == TYPE_LEA)
19578 rtx addr = PATTERN (insn);
19580 if (GET_CODE (addr) == PARALLEL)
19581 addr = XVECEXP (addr, 0, 0);
19583 gcc_assert (GET_CODE (addr) == SET);
19585 addr = SET_SRC (addr);
19586 if (modified_in_p (addr, dep_insn))
19589 else if (ix86_agi_dependent (dep_insn, insn))
19592 /* ??? Compares pair with jump/setcc. */
19593 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19596 /* Floating point stores require value to be ready one cycle earlier. */
19597 if (insn_type == TYPE_FMOV
19598 && get_attr_memory (insn) == MEMORY_STORE
19599 && !ix86_agi_dependent (dep_insn, insn))
19603 case PROCESSOR_PENTIUMPRO:
19604 memory = get_attr_memory (insn);
19606 /* INT->FP conversion is expensive. */
19607 if (get_attr_fp_int_src (dep_insn))
19610 /* There is one cycle extra latency between an FP op and a store. */
19611 if (insn_type == TYPE_FMOV
19612 && (set = single_set (dep_insn)) != NULL_RTX
19613 && (set2 = single_set (insn)) != NULL_RTX
19614 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19615 && MEM_P (SET_DEST (set2)))
19618 /* Show ability of reorder buffer to hide latency of load by executing
19619 in parallel with previous instruction in case
19620 previous instruction is not needed to compute the address. */
19621 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19622 && !ix86_agi_dependent (dep_insn, insn))
19624 /* Claim moves to take one cycle, as core can issue one load
19625 at time and the next load can start cycle later. */
19626 if (dep_insn_type == TYPE_IMOV
19627 || dep_insn_type == TYPE_FMOV)
19635 memory = get_attr_memory (insn);
19637 /* The esp dependency is resolved before the instruction is really
19639 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19640 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19643 /* INT->FP conversion is expensive. */
19644 if (get_attr_fp_int_src (dep_insn))
19647 /* Show ability of reorder buffer to hide latency of load by executing
19648 in parallel with previous instruction in case
19649 previous instruction is not needed to compute the address. */
19650 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19651 && !ix86_agi_dependent (dep_insn, insn))
19653 /* Claim moves to take one cycle, as core can issue one load
19654 at time and the next load can start cycle later. */
19655 if (dep_insn_type == TYPE_IMOV
19656 || dep_insn_type == TYPE_FMOV)
19665 case PROCESSOR_ATHLON:
19667 case PROCESSOR_AMDFAM10:
19668 case PROCESSOR_ATOM:
19669 case PROCESSOR_GENERIC32:
19670 case PROCESSOR_GENERIC64:
19671 memory = get_attr_memory (insn);
19673 /* Show ability of reorder buffer to hide latency of load by executing
19674 in parallel with previous instruction in case
19675 previous instruction is not needed to compute the address. */
19676 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19677 && !ix86_agi_dependent (dep_insn, insn))
19679 enum attr_unit unit = get_attr_unit (insn);
19682 /* Because of the difference between the length of integer and
19683 floating unit pipeline preparation stages, the memory operands
19684 for floating point are cheaper.
19686 ??? For Athlon it the difference is most probably 2. */
19687 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19690 loadcost = TARGET_ATHLON ? 2 : 0;
19692 if (cost >= loadcost)
19705 /* How many alternative schedules to try. This should be as wide as the
19706 scheduling freedom in the DFA, but no wider. Making this value too
19707 large results extra work for the scheduler. */
19710 ia32_multipass_dfa_lookahead (void)
19714 case PROCESSOR_PENTIUM:
19717 case PROCESSOR_PENTIUMPRO:
19727 /* Compute the alignment given to a constant that is being placed in memory.
19728 EXP is the constant and ALIGN is the alignment that the object would
19730 The value of this function is used instead of that alignment to align
19734 ix86_constant_alignment (tree exp, int align)
19736 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19737 || TREE_CODE (exp) == INTEGER_CST)
19739 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19741 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19744 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19745 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19746 return BITS_PER_WORD;
19751 /* Compute the alignment for a static variable.
19752 TYPE is the data type, and ALIGN is the alignment that
19753 the object would ordinarily have. The value of this function is used
19754 instead of that alignment to align the object. */
19757 ix86_data_alignment (tree type, int align)
19759 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19761 if (AGGREGATE_TYPE_P (type)
19762 && TYPE_SIZE (type)
19763 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19764 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19765 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19766 && align < max_align)
19769 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19770 to 16byte boundary. */
19773 if (AGGREGATE_TYPE_P (type)
19774 && TYPE_SIZE (type)
19775 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19776 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19777 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19781 if (TREE_CODE (type) == ARRAY_TYPE)
19783 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19785 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19788 else if (TREE_CODE (type) == COMPLEX_TYPE)
19791 if (TYPE_MODE (type) == DCmode && align < 64)
19793 if ((TYPE_MODE (type) == XCmode
19794 || TYPE_MODE (type) == TCmode) && align < 128)
19797 else if ((TREE_CODE (type) == RECORD_TYPE
19798 || TREE_CODE (type) == UNION_TYPE
19799 || TREE_CODE (type) == QUAL_UNION_TYPE)
19800 && TYPE_FIELDS (type))
19802 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19804 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19807 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19808 || TREE_CODE (type) == INTEGER_TYPE)
19810 if (TYPE_MODE (type) == DFmode && align < 64)
19812 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19819 /* Compute the alignment for a local variable or a stack slot. EXP is
19820 the data type or decl itself, MODE is the widest mode available and
19821 ALIGN is the alignment that the object would ordinarily have. The
19822 value of this macro is used instead of that alignment to align the
19826 ix86_local_alignment (tree exp, enum machine_mode mode,
19827 unsigned int align)
19831 if (exp && DECL_P (exp))
19833 type = TREE_TYPE (exp);
19842 /* Don't do dynamic stack realignment for long long objects with
19843 -mpreferred-stack-boundary=2. */
19846 && ix86_preferred_stack_boundary < 64
19847 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19848 && (!type || !TYPE_USER_ALIGN (type))
19849 && (!decl || !DECL_USER_ALIGN (decl)))
19852 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19853 register in MODE. We will return the largest alignment of XF
19857 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19858 align = GET_MODE_ALIGNMENT (DFmode);
19862 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19863 to 16byte boundary. */
19866 if (AGGREGATE_TYPE_P (type)
19867 && TYPE_SIZE (type)
19868 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19869 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19870 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19873 if (TREE_CODE (type) == ARRAY_TYPE)
19875 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19877 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19880 else if (TREE_CODE (type) == COMPLEX_TYPE)
19882 if (TYPE_MODE (type) == DCmode && align < 64)
19884 if ((TYPE_MODE (type) == XCmode
19885 || TYPE_MODE (type) == TCmode) && align < 128)
19888 else if ((TREE_CODE (type) == RECORD_TYPE
19889 || TREE_CODE (type) == UNION_TYPE
19890 || TREE_CODE (type) == QUAL_UNION_TYPE)
19891 && TYPE_FIELDS (type))
19893 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19895 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19898 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19899 || TREE_CODE (type) == INTEGER_TYPE)
19902 if (TYPE_MODE (type) == DFmode && align < 64)
19904 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19910 /* Emit RTL insns to initialize the variable parts of a trampoline.
19911 FNADDR is an RTX for the address of the function's pure code.
19912 CXT is an RTX for the static chain value for the function. */
19914 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19918 /* Compute offset from the end of the jmp to the target function. */
19919 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19920 plus_constant (tramp, 10),
19921 NULL_RTX, 1, OPTAB_DIRECT);
19922 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19923 gen_int_mode (0xb9, QImode));
19924 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19925 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19926 gen_int_mode (0xe9, QImode));
19927 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19932 /* Try to load address using shorter movl instead of movabs.
19933 We may want to support movq for kernel mode, but kernel does not use
19934 trampolines at the moment. */
19935 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19937 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19938 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19939 gen_int_mode (0xbb41, HImode));
19940 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19941 gen_lowpart (SImode, fnaddr));
19946 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19947 gen_int_mode (0xbb49, HImode));
19948 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19952 /* Load static chain using movabs to r10. */
19953 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19954 gen_int_mode (0xba49, HImode));
19955 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19958 /* Jump to the r11 */
19959 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19960 gen_int_mode (0xff49, HImode));
19961 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19962 gen_int_mode (0xe3, QImode));
19964 gcc_assert (offset <= TRAMPOLINE_SIZE);
19967 #ifdef ENABLE_EXECUTE_STACK
19968 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19969 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19973 /* Codes for all the SSE/MMX builtins. */
19976 IX86_BUILTIN_ADDPS,
19977 IX86_BUILTIN_ADDSS,
19978 IX86_BUILTIN_DIVPS,
19979 IX86_BUILTIN_DIVSS,
19980 IX86_BUILTIN_MULPS,
19981 IX86_BUILTIN_MULSS,
19982 IX86_BUILTIN_SUBPS,
19983 IX86_BUILTIN_SUBSS,
19985 IX86_BUILTIN_CMPEQPS,
19986 IX86_BUILTIN_CMPLTPS,
19987 IX86_BUILTIN_CMPLEPS,
19988 IX86_BUILTIN_CMPGTPS,
19989 IX86_BUILTIN_CMPGEPS,
19990 IX86_BUILTIN_CMPNEQPS,
19991 IX86_BUILTIN_CMPNLTPS,
19992 IX86_BUILTIN_CMPNLEPS,
19993 IX86_BUILTIN_CMPNGTPS,
19994 IX86_BUILTIN_CMPNGEPS,
19995 IX86_BUILTIN_CMPORDPS,
19996 IX86_BUILTIN_CMPUNORDPS,
19997 IX86_BUILTIN_CMPEQSS,
19998 IX86_BUILTIN_CMPLTSS,
19999 IX86_BUILTIN_CMPLESS,
20000 IX86_BUILTIN_CMPNEQSS,
20001 IX86_BUILTIN_CMPNLTSS,
20002 IX86_BUILTIN_CMPNLESS,
20003 IX86_BUILTIN_CMPNGTSS,
20004 IX86_BUILTIN_CMPNGESS,
20005 IX86_BUILTIN_CMPORDSS,
20006 IX86_BUILTIN_CMPUNORDSS,
20008 IX86_BUILTIN_COMIEQSS,
20009 IX86_BUILTIN_COMILTSS,
20010 IX86_BUILTIN_COMILESS,
20011 IX86_BUILTIN_COMIGTSS,
20012 IX86_BUILTIN_COMIGESS,
20013 IX86_BUILTIN_COMINEQSS,
20014 IX86_BUILTIN_UCOMIEQSS,
20015 IX86_BUILTIN_UCOMILTSS,
20016 IX86_BUILTIN_UCOMILESS,
20017 IX86_BUILTIN_UCOMIGTSS,
20018 IX86_BUILTIN_UCOMIGESS,
20019 IX86_BUILTIN_UCOMINEQSS,
20021 IX86_BUILTIN_CVTPI2PS,
20022 IX86_BUILTIN_CVTPS2PI,
20023 IX86_BUILTIN_CVTSI2SS,
20024 IX86_BUILTIN_CVTSI642SS,
20025 IX86_BUILTIN_CVTSS2SI,
20026 IX86_BUILTIN_CVTSS2SI64,
20027 IX86_BUILTIN_CVTTPS2PI,
20028 IX86_BUILTIN_CVTTSS2SI,
20029 IX86_BUILTIN_CVTTSS2SI64,
20031 IX86_BUILTIN_MAXPS,
20032 IX86_BUILTIN_MAXSS,
20033 IX86_BUILTIN_MINPS,
20034 IX86_BUILTIN_MINSS,
20036 IX86_BUILTIN_LOADUPS,
20037 IX86_BUILTIN_STOREUPS,
20038 IX86_BUILTIN_MOVSS,
20040 IX86_BUILTIN_MOVHLPS,
20041 IX86_BUILTIN_MOVLHPS,
20042 IX86_BUILTIN_LOADHPS,
20043 IX86_BUILTIN_LOADLPS,
20044 IX86_BUILTIN_STOREHPS,
20045 IX86_BUILTIN_STORELPS,
20047 IX86_BUILTIN_MASKMOVQ,
20048 IX86_BUILTIN_MOVMSKPS,
20049 IX86_BUILTIN_PMOVMSKB,
20051 IX86_BUILTIN_MOVNTPS,
20052 IX86_BUILTIN_MOVNTQ,
20054 IX86_BUILTIN_LOADDQU,
20055 IX86_BUILTIN_STOREDQU,
20057 IX86_BUILTIN_PACKSSWB,
20058 IX86_BUILTIN_PACKSSDW,
20059 IX86_BUILTIN_PACKUSWB,
20061 IX86_BUILTIN_PADDB,
20062 IX86_BUILTIN_PADDW,
20063 IX86_BUILTIN_PADDD,
20064 IX86_BUILTIN_PADDQ,
20065 IX86_BUILTIN_PADDSB,
20066 IX86_BUILTIN_PADDSW,
20067 IX86_BUILTIN_PADDUSB,
20068 IX86_BUILTIN_PADDUSW,
20069 IX86_BUILTIN_PSUBB,
20070 IX86_BUILTIN_PSUBW,
20071 IX86_BUILTIN_PSUBD,
20072 IX86_BUILTIN_PSUBQ,
20073 IX86_BUILTIN_PSUBSB,
20074 IX86_BUILTIN_PSUBSW,
20075 IX86_BUILTIN_PSUBUSB,
20076 IX86_BUILTIN_PSUBUSW,
20079 IX86_BUILTIN_PANDN,
20083 IX86_BUILTIN_PAVGB,
20084 IX86_BUILTIN_PAVGW,
20086 IX86_BUILTIN_PCMPEQB,
20087 IX86_BUILTIN_PCMPEQW,
20088 IX86_BUILTIN_PCMPEQD,
20089 IX86_BUILTIN_PCMPGTB,
20090 IX86_BUILTIN_PCMPGTW,
20091 IX86_BUILTIN_PCMPGTD,
20093 IX86_BUILTIN_PMADDWD,
20095 IX86_BUILTIN_PMAXSW,
20096 IX86_BUILTIN_PMAXUB,
20097 IX86_BUILTIN_PMINSW,
20098 IX86_BUILTIN_PMINUB,
20100 IX86_BUILTIN_PMULHUW,
20101 IX86_BUILTIN_PMULHW,
20102 IX86_BUILTIN_PMULLW,
20104 IX86_BUILTIN_PSADBW,
20105 IX86_BUILTIN_PSHUFW,
20107 IX86_BUILTIN_PSLLW,
20108 IX86_BUILTIN_PSLLD,
20109 IX86_BUILTIN_PSLLQ,
20110 IX86_BUILTIN_PSRAW,
20111 IX86_BUILTIN_PSRAD,
20112 IX86_BUILTIN_PSRLW,
20113 IX86_BUILTIN_PSRLD,
20114 IX86_BUILTIN_PSRLQ,
20115 IX86_BUILTIN_PSLLWI,
20116 IX86_BUILTIN_PSLLDI,
20117 IX86_BUILTIN_PSLLQI,
20118 IX86_BUILTIN_PSRAWI,
20119 IX86_BUILTIN_PSRADI,
20120 IX86_BUILTIN_PSRLWI,
20121 IX86_BUILTIN_PSRLDI,
20122 IX86_BUILTIN_PSRLQI,
20124 IX86_BUILTIN_PUNPCKHBW,
20125 IX86_BUILTIN_PUNPCKHWD,
20126 IX86_BUILTIN_PUNPCKHDQ,
20127 IX86_BUILTIN_PUNPCKLBW,
20128 IX86_BUILTIN_PUNPCKLWD,
20129 IX86_BUILTIN_PUNPCKLDQ,
20131 IX86_BUILTIN_SHUFPS,
20133 IX86_BUILTIN_RCPPS,
20134 IX86_BUILTIN_RCPSS,
20135 IX86_BUILTIN_RSQRTPS,
20136 IX86_BUILTIN_RSQRTPS_NR,
20137 IX86_BUILTIN_RSQRTSS,
20138 IX86_BUILTIN_RSQRTF,
20139 IX86_BUILTIN_SQRTPS,
20140 IX86_BUILTIN_SQRTPS_NR,
20141 IX86_BUILTIN_SQRTSS,
20143 IX86_BUILTIN_UNPCKHPS,
20144 IX86_BUILTIN_UNPCKLPS,
20146 IX86_BUILTIN_ANDPS,
20147 IX86_BUILTIN_ANDNPS,
20149 IX86_BUILTIN_XORPS,
20152 IX86_BUILTIN_LDMXCSR,
20153 IX86_BUILTIN_STMXCSR,
20154 IX86_BUILTIN_SFENCE,
20156 /* 3DNow! Original */
20157 IX86_BUILTIN_FEMMS,
20158 IX86_BUILTIN_PAVGUSB,
20159 IX86_BUILTIN_PF2ID,
20160 IX86_BUILTIN_PFACC,
20161 IX86_BUILTIN_PFADD,
20162 IX86_BUILTIN_PFCMPEQ,
20163 IX86_BUILTIN_PFCMPGE,
20164 IX86_BUILTIN_PFCMPGT,
20165 IX86_BUILTIN_PFMAX,
20166 IX86_BUILTIN_PFMIN,
20167 IX86_BUILTIN_PFMUL,
20168 IX86_BUILTIN_PFRCP,
20169 IX86_BUILTIN_PFRCPIT1,
20170 IX86_BUILTIN_PFRCPIT2,
20171 IX86_BUILTIN_PFRSQIT1,
20172 IX86_BUILTIN_PFRSQRT,
20173 IX86_BUILTIN_PFSUB,
20174 IX86_BUILTIN_PFSUBR,
20175 IX86_BUILTIN_PI2FD,
20176 IX86_BUILTIN_PMULHRW,
20178 /* 3DNow! Athlon Extensions */
20179 IX86_BUILTIN_PF2IW,
20180 IX86_BUILTIN_PFNACC,
20181 IX86_BUILTIN_PFPNACC,
20182 IX86_BUILTIN_PI2FW,
20183 IX86_BUILTIN_PSWAPDSI,
20184 IX86_BUILTIN_PSWAPDSF,
20187 IX86_BUILTIN_ADDPD,
20188 IX86_BUILTIN_ADDSD,
20189 IX86_BUILTIN_DIVPD,
20190 IX86_BUILTIN_DIVSD,
20191 IX86_BUILTIN_MULPD,
20192 IX86_BUILTIN_MULSD,
20193 IX86_BUILTIN_SUBPD,
20194 IX86_BUILTIN_SUBSD,
20196 IX86_BUILTIN_CMPEQPD,
20197 IX86_BUILTIN_CMPLTPD,
20198 IX86_BUILTIN_CMPLEPD,
20199 IX86_BUILTIN_CMPGTPD,
20200 IX86_BUILTIN_CMPGEPD,
20201 IX86_BUILTIN_CMPNEQPD,
20202 IX86_BUILTIN_CMPNLTPD,
20203 IX86_BUILTIN_CMPNLEPD,
20204 IX86_BUILTIN_CMPNGTPD,
20205 IX86_BUILTIN_CMPNGEPD,
20206 IX86_BUILTIN_CMPORDPD,
20207 IX86_BUILTIN_CMPUNORDPD,
20208 IX86_BUILTIN_CMPEQSD,
20209 IX86_BUILTIN_CMPLTSD,
20210 IX86_BUILTIN_CMPLESD,
20211 IX86_BUILTIN_CMPNEQSD,
20212 IX86_BUILTIN_CMPNLTSD,
20213 IX86_BUILTIN_CMPNLESD,
20214 IX86_BUILTIN_CMPORDSD,
20215 IX86_BUILTIN_CMPUNORDSD,
20217 IX86_BUILTIN_COMIEQSD,
20218 IX86_BUILTIN_COMILTSD,
20219 IX86_BUILTIN_COMILESD,
20220 IX86_BUILTIN_COMIGTSD,
20221 IX86_BUILTIN_COMIGESD,
20222 IX86_BUILTIN_COMINEQSD,
20223 IX86_BUILTIN_UCOMIEQSD,
20224 IX86_BUILTIN_UCOMILTSD,
20225 IX86_BUILTIN_UCOMILESD,
20226 IX86_BUILTIN_UCOMIGTSD,
20227 IX86_BUILTIN_UCOMIGESD,
20228 IX86_BUILTIN_UCOMINEQSD,
20230 IX86_BUILTIN_MAXPD,
20231 IX86_BUILTIN_MAXSD,
20232 IX86_BUILTIN_MINPD,
20233 IX86_BUILTIN_MINSD,
20235 IX86_BUILTIN_ANDPD,
20236 IX86_BUILTIN_ANDNPD,
20238 IX86_BUILTIN_XORPD,
20240 IX86_BUILTIN_SQRTPD,
20241 IX86_BUILTIN_SQRTSD,
20243 IX86_BUILTIN_UNPCKHPD,
20244 IX86_BUILTIN_UNPCKLPD,
20246 IX86_BUILTIN_SHUFPD,
20248 IX86_BUILTIN_LOADUPD,
20249 IX86_BUILTIN_STOREUPD,
20250 IX86_BUILTIN_MOVSD,
20252 IX86_BUILTIN_LOADHPD,
20253 IX86_BUILTIN_LOADLPD,
20255 IX86_BUILTIN_CVTDQ2PD,
20256 IX86_BUILTIN_CVTDQ2PS,
20258 IX86_BUILTIN_CVTPD2DQ,
20259 IX86_BUILTIN_CVTPD2PI,
20260 IX86_BUILTIN_CVTPD2PS,
20261 IX86_BUILTIN_CVTTPD2DQ,
20262 IX86_BUILTIN_CVTTPD2PI,
20264 IX86_BUILTIN_CVTPI2PD,
20265 IX86_BUILTIN_CVTSI2SD,
20266 IX86_BUILTIN_CVTSI642SD,
20268 IX86_BUILTIN_CVTSD2SI,
20269 IX86_BUILTIN_CVTSD2SI64,
20270 IX86_BUILTIN_CVTSD2SS,
20271 IX86_BUILTIN_CVTSS2SD,
20272 IX86_BUILTIN_CVTTSD2SI,
20273 IX86_BUILTIN_CVTTSD2SI64,
20275 IX86_BUILTIN_CVTPS2DQ,
20276 IX86_BUILTIN_CVTPS2PD,
20277 IX86_BUILTIN_CVTTPS2DQ,
20279 IX86_BUILTIN_MOVNTI,
20280 IX86_BUILTIN_MOVNTPD,
20281 IX86_BUILTIN_MOVNTDQ,
20283 IX86_BUILTIN_MOVQ128,
20286 IX86_BUILTIN_MASKMOVDQU,
20287 IX86_BUILTIN_MOVMSKPD,
20288 IX86_BUILTIN_PMOVMSKB128,
20290 IX86_BUILTIN_PACKSSWB128,
20291 IX86_BUILTIN_PACKSSDW128,
20292 IX86_BUILTIN_PACKUSWB128,
20294 IX86_BUILTIN_PADDB128,
20295 IX86_BUILTIN_PADDW128,
20296 IX86_BUILTIN_PADDD128,
20297 IX86_BUILTIN_PADDQ128,
20298 IX86_BUILTIN_PADDSB128,
20299 IX86_BUILTIN_PADDSW128,
20300 IX86_BUILTIN_PADDUSB128,
20301 IX86_BUILTIN_PADDUSW128,
20302 IX86_BUILTIN_PSUBB128,
20303 IX86_BUILTIN_PSUBW128,
20304 IX86_BUILTIN_PSUBD128,
20305 IX86_BUILTIN_PSUBQ128,
20306 IX86_BUILTIN_PSUBSB128,
20307 IX86_BUILTIN_PSUBSW128,
20308 IX86_BUILTIN_PSUBUSB128,
20309 IX86_BUILTIN_PSUBUSW128,
20311 IX86_BUILTIN_PAND128,
20312 IX86_BUILTIN_PANDN128,
20313 IX86_BUILTIN_POR128,
20314 IX86_BUILTIN_PXOR128,
20316 IX86_BUILTIN_PAVGB128,
20317 IX86_BUILTIN_PAVGW128,
20319 IX86_BUILTIN_PCMPEQB128,
20320 IX86_BUILTIN_PCMPEQW128,
20321 IX86_BUILTIN_PCMPEQD128,
20322 IX86_BUILTIN_PCMPGTB128,
20323 IX86_BUILTIN_PCMPGTW128,
20324 IX86_BUILTIN_PCMPGTD128,
20326 IX86_BUILTIN_PMADDWD128,
20328 IX86_BUILTIN_PMAXSW128,
20329 IX86_BUILTIN_PMAXUB128,
20330 IX86_BUILTIN_PMINSW128,
20331 IX86_BUILTIN_PMINUB128,
20333 IX86_BUILTIN_PMULUDQ,
20334 IX86_BUILTIN_PMULUDQ128,
20335 IX86_BUILTIN_PMULHUW128,
20336 IX86_BUILTIN_PMULHW128,
20337 IX86_BUILTIN_PMULLW128,
20339 IX86_BUILTIN_PSADBW128,
20340 IX86_BUILTIN_PSHUFHW,
20341 IX86_BUILTIN_PSHUFLW,
20342 IX86_BUILTIN_PSHUFD,
20344 IX86_BUILTIN_PSLLDQI128,
20345 IX86_BUILTIN_PSLLWI128,
20346 IX86_BUILTIN_PSLLDI128,
20347 IX86_BUILTIN_PSLLQI128,
20348 IX86_BUILTIN_PSRAWI128,
20349 IX86_BUILTIN_PSRADI128,
20350 IX86_BUILTIN_PSRLDQI128,
20351 IX86_BUILTIN_PSRLWI128,
20352 IX86_BUILTIN_PSRLDI128,
20353 IX86_BUILTIN_PSRLQI128,
20355 IX86_BUILTIN_PSLLDQ128,
20356 IX86_BUILTIN_PSLLW128,
20357 IX86_BUILTIN_PSLLD128,
20358 IX86_BUILTIN_PSLLQ128,
20359 IX86_BUILTIN_PSRAW128,
20360 IX86_BUILTIN_PSRAD128,
20361 IX86_BUILTIN_PSRLW128,
20362 IX86_BUILTIN_PSRLD128,
20363 IX86_BUILTIN_PSRLQ128,
20365 IX86_BUILTIN_PUNPCKHBW128,
20366 IX86_BUILTIN_PUNPCKHWD128,
20367 IX86_BUILTIN_PUNPCKHDQ128,
20368 IX86_BUILTIN_PUNPCKHQDQ128,
20369 IX86_BUILTIN_PUNPCKLBW128,
20370 IX86_BUILTIN_PUNPCKLWD128,
20371 IX86_BUILTIN_PUNPCKLDQ128,
20372 IX86_BUILTIN_PUNPCKLQDQ128,
20374 IX86_BUILTIN_CLFLUSH,
20375 IX86_BUILTIN_MFENCE,
20376 IX86_BUILTIN_LFENCE,
20379 IX86_BUILTIN_ADDSUBPS,
20380 IX86_BUILTIN_HADDPS,
20381 IX86_BUILTIN_HSUBPS,
20382 IX86_BUILTIN_MOVSHDUP,
20383 IX86_BUILTIN_MOVSLDUP,
20384 IX86_BUILTIN_ADDSUBPD,
20385 IX86_BUILTIN_HADDPD,
20386 IX86_BUILTIN_HSUBPD,
20387 IX86_BUILTIN_LDDQU,
20389 IX86_BUILTIN_MONITOR,
20390 IX86_BUILTIN_MWAIT,
20393 IX86_BUILTIN_PHADDW,
20394 IX86_BUILTIN_PHADDD,
20395 IX86_BUILTIN_PHADDSW,
20396 IX86_BUILTIN_PHSUBW,
20397 IX86_BUILTIN_PHSUBD,
20398 IX86_BUILTIN_PHSUBSW,
20399 IX86_BUILTIN_PMADDUBSW,
20400 IX86_BUILTIN_PMULHRSW,
20401 IX86_BUILTIN_PSHUFB,
20402 IX86_BUILTIN_PSIGNB,
20403 IX86_BUILTIN_PSIGNW,
20404 IX86_BUILTIN_PSIGND,
20405 IX86_BUILTIN_PALIGNR,
20406 IX86_BUILTIN_PABSB,
20407 IX86_BUILTIN_PABSW,
20408 IX86_BUILTIN_PABSD,
20410 IX86_BUILTIN_PHADDW128,
20411 IX86_BUILTIN_PHADDD128,
20412 IX86_BUILTIN_PHADDSW128,
20413 IX86_BUILTIN_PHSUBW128,
20414 IX86_BUILTIN_PHSUBD128,
20415 IX86_BUILTIN_PHSUBSW128,
20416 IX86_BUILTIN_PMADDUBSW128,
20417 IX86_BUILTIN_PMULHRSW128,
20418 IX86_BUILTIN_PSHUFB128,
20419 IX86_BUILTIN_PSIGNB128,
20420 IX86_BUILTIN_PSIGNW128,
20421 IX86_BUILTIN_PSIGND128,
20422 IX86_BUILTIN_PALIGNR128,
20423 IX86_BUILTIN_PABSB128,
20424 IX86_BUILTIN_PABSW128,
20425 IX86_BUILTIN_PABSD128,
20427 /* AMDFAM10 - SSE4A New Instructions. */
20428 IX86_BUILTIN_MOVNTSD,
20429 IX86_BUILTIN_MOVNTSS,
20430 IX86_BUILTIN_EXTRQI,
20431 IX86_BUILTIN_EXTRQ,
20432 IX86_BUILTIN_INSERTQI,
20433 IX86_BUILTIN_INSERTQ,
20436 IX86_BUILTIN_BLENDPD,
20437 IX86_BUILTIN_BLENDPS,
20438 IX86_BUILTIN_BLENDVPD,
20439 IX86_BUILTIN_BLENDVPS,
20440 IX86_BUILTIN_PBLENDVB128,
20441 IX86_BUILTIN_PBLENDW128,
20446 IX86_BUILTIN_INSERTPS128,
20448 IX86_BUILTIN_MOVNTDQA,
20449 IX86_BUILTIN_MPSADBW128,
20450 IX86_BUILTIN_PACKUSDW128,
20451 IX86_BUILTIN_PCMPEQQ,
20452 IX86_BUILTIN_PHMINPOSUW128,
20454 IX86_BUILTIN_PMAXSB128,
20455 IX86_BUILTIN_PMAXSD128,
20456 IX86_BUILTIN_PMAXUD128,
20457 IX86_BUILTIN_PMAXUW128,
20459 IX86_BUILTIN_PMINSB128,
20460 IX86_BUILTIN_PMINSD128,
20461 IX86_BUILTIN_PMINUD128,
20462 IX86_BUILTIN_PMINUW128,
20464 IX86_BUILTIN_PMOVSXBW128,
20465 IX86_BUILTIN_PMOVSXBD128,
20466 IX86_BUILTIN_PMOVSXBQ128,
20467 IX86_BUILTIN_PMOVSXWD128,
20468 IX86_BUILTIN_PMOVSXWQ128,
20469 IX86_BUILTIN_PMOVSXDQ128,
20471 IX86_BUILTIN_PMOVZXBW128,
20472 IX86_BUILTIN_PMOVZXBD128,
20473 IX86_BUILTIN_PMOVZXBQ128,
20474 IX86_BUILTIN_PMOVZXWD128,
20475 IX86_BUILTIN_PMOVZXWQ128,
20476 IX86_BUILTIN_PMOVZXDQ128,
20478 IX86_BUILTIN_PMULDQ128,
20479 IX86_BUILTIN_PMULLD128,
20481 IX86_BUILTIN_ROUNDPD,
20482 IX86_BUILTIN_ROUNDPS,
20483 IX86_BUILTIN_ROUNDSD,
20484 IX86_BUILTIN_ROUNDSS,
20486 IX86_BUILTIN_PTESTZ,
20487 IX86_BUILTIN_PTESTC,
20488 IX86_BUILTIN_PTESTNZC,
20490 IX86_BUILTIN_VEC_INIT_V2SI,
20491 IX86_BUILTIN_VEC_INIT_V4HI,
20492 IX86_BUILTIN_VEC_INIT_V8QI,
20493 IX86_BUILTIN_VEC_EXT_V2DF,
20494 IX86_BUILTIN_VEC_EXT_V2DI,
20495 IX86_BUILTIN_VEC_EXT_V4SF,
20496 IX86_BUILTIN_VEC_EXT_V4SI,
20497 IX86_BUILTIN_VEC_EXT_V8HI,
20498 IX86_BUILTIN_VEC_EXT_V2SI,
20499 IX86_BUILTIN_VEC_EXT_V4HI,
20500 IX86_BUILTIN_VEC_EXT_V16QI,
20501 IX86_BUILTIN_VEC_SET_V2DI,
20502 IX86_BUILTIN_VEC_SET_V4SF,
20503 IX86_BUILTIN_VEC_SET_V4SI,
20504 IX86_BUILTIN_VEC_SET_V8HI,
20505 IX86_BUILTIN_VEC_SET_V4HI,
20506 IX86_BUILTIN_VEC_SET_V16QI,
20508 IX86_BUILTIN_VEC_PACK_SFIX,
20511 IX86_BUILTIN_CRC32QI,
20512 IX86_BUILTIN_CRC32HI,
20513 IX86_BUILTIN_CRC32SI,
20514 IX86_BUILTIN_CRC32DI,
20516 IX86_BUILTIN_PCMPESTRI128,
20517 IX86_BUILTIN_PCMPESTRM128,
20518 IX86_BUILTIN_PCMPESTRA128,
20519 IX86_BUILTIN_PCMPESTRC128,
20520 IX86_BUILTIN_PCMPESTRO128,
20521 IX86_BUILTIN_PCMPESTRS128,
20522 IX86_BUILTIN_PCMPESTRZ128,
20523 IX86_BUILTIN_PCMPISTRI128,
20524 IX86_BUILTIN_PCMPISTRM128,
20525 IX86_BUILTIN_PCMPISTRA128,
20526 IX86_BUILTIN_PCMPISTRC128,
20527 IX86_BUILTIN_PCMPISTRO128,
20528 IX86_BUILTIN_PCMPISTRS128,
20529 IX86_BUILTIN_PCMPISTRZ128,
20531 IX86_BUILTIN_PCMPGTQ,
20533 /* AES instructions */
20534 IX86_BUILTIN_AESENC128,
20535 IX86_BUILTIN_AESENCLAST128,
20536 IX86_BUILTIN_AESDEC128,
20537 IX86_BUILTIN_AESDECLAST128,
20538 IX86_BUILTIN_AESIMC128,
20539 IX86_BUILTIN_AESKEYGENASSIST128,
20541 /* PCLMUL instruction */
20542 IX86_BUILTIN_PCLMULQDQ128,
20545 IX86_BUILTIN_ADDPD256,
20546 IX86_BUILTIN_ADDPS256,
20547 IX86_BUILTIN_ADDSUBPD256,
20548 IX86_BUILTIN_ADDSUBPS256,
20549 IX86_BUILTIN_ANDPD256,
20550 IX86_BUILTIN_ANDPS256,
20551 IX86_BUILTIN_ANDNPD256,
20552 IX86_BUILTIN_ANDNPS256,
20553 IX86_BUILTIN_BLENDPD256,
20554 IX86_BUILTIN_BLENDPS256,
20555 IX86_BUILTIN_BLENDVPD256,
20556 IX86_BUILTIN_BLENDVPS256,
20557 IX86_BUILTIN_DIVPD256,
20558 IX86_BUILTIN_DIVPS256,
20559 IX86_BUILTIN_DPPS256,
20560 IX86_BUILTIN_HADDPD256,
20561 IX86_BUILTIN_HADDPS256,
20562 IX86_BUILTIN_HSUBPD256,
20563 IX86_BUILTIN_HSUBPS256,
20564 IX86_BUILTIN_MAXPD256,
20565 IX86_BUILTIN_MAXPS256,
20566 IX86_BUILTIN_MINPD256,
20567 IX86_BUILTIN_MINPS256,
20568 IX86_BUILTIN_MULPD256,
20569 IX86_BUILTIN_MULPS256,
20570 IX86_BUILTIN_ORPD256,
20571 IX86_BUILTIN_ORPS256,
20572 IX86_BUILTIN_SHUFPD256,
20573 IX86_BUILTIN_SHUFPS256,
20574 IX86_BUILTIN_SUBPD256,
20575 IX86_BUILTIN_SUBPS256,
20576 IX86_BUILTIN_XORPD256,
20577 IX86_BUILTIN_XORPS256,
20578 IX86_BUILTIN_CMPSD,
20579 IX86_BUILTIN_CMPSS,
20580 IX86_BUILTIN_CMPPD,
20581 IX86_BUILTIN_CMPPS,
20582 IX86_BUILTIN_CMPPD256,
20583 IX86_BUILTIN_CMPPS256,
20584 IX86_BUILTIN_CVTDQ2PD256,
20585 IX86_BUILTIN_CVTDQ2PS256,
20586 IX86_BUILTIN_CVTPD2PS256,
20587 IX86_BUILTIN_CVTPS2DQ256,
20588 IX86_BUILTIN_CVTPS2PD256,
20589 IX86_BUILTIN_CVTTPD2DQ256,
20590 IX86_BUILTIN_CVTPD2DQ256,
20591 IX86_BUILTIN_CVTTPS2DQ256,
20592 IX86_BUILTIN_EXTRACTF128PD256,
20593 IX86_BUILTIN_EXTRACTF128PS256,
20594 IX86_BUILTIN_EXTRACTF128SI256,
20595 IX86_BUILTIN_VZEROALL,
20596 IX86_BUILTIN_VZEROUPPER,
20597 IX86_BUILTIN_VZEROUPPER_REX64,
20598 IX86_BUILTIN_VPERMILVARPD,
20599 IX86_BUILTIN_VPERMILVARPS,
20600 IX86_BUILTIN_VPERMILVARPD256,
20601 IX86_BUILTIN_VPERMILVARPS256,
20602 IX86_BUILTIN_VPERMILPD,
20603 IX86_BUILTIN_VPERMILPS,
20604 IX86_BUILTIN_VPERMILPD256,
20605 IX86_BUILTIN_VPERMILPS256,
20606 IX86_BUILTIN_VPERM2F128PD256,
20607 IX86_BUILTIN_VPERM2F128PS256,
20608 IX86_BUILTIN_VPERM2F128SI256,
20609 IX86_BUILTIN_VBROADCASTSS,
20610 IX86_BUILTIN_VBROADCASTSD256,
20611 IX86_BUILTIN_VBROADCASTSS256,
20612 IX86_BUILTIN_VBROADCASTPD256,
20613 IX86_BUILTIN_VBROADCASTPS256,
20614 IX86_BUILTIN_VINSERTF128PD256,
20615 IX86_BUILTIN_VINSERTF128PS256,
20616 IX86_BUILTIN_VINSERTF128SI256,
20617 IX86_BUILTIN_LOADUPD256,
20618 IX86_BUILTIN_LOADUPS256,
20619 IX86_BUILTIN_STOREUPD256,
20620 IX86_BUILTIN_STOREUPS256,
20621 IX86_BUILTIN_LDDQU256,
20622 IX86_BUILTIN_MOVNTDQ256,
20623 IX86_BUILTIN_MOVNTPD256,
20624 IX86_BUILTIN_MOVNTPS256,
20625 IX86_BUILTIN_LOADDQU256,
20626 IX86_BUILTIN_STOREDQU256,
20627 IX86_BUILTIN_MASKLOADPD,
20628 IX86_BUILTIN_MASKLOADPS,
20629 IX86_BUILTIN_MASKSTOREPD,
20630 IX86_BUILTIN_MASKSTOREPS,
20631 IX86_BUILTIN_MASKLOADPD256,
20632 IX86_BUILTIN_MASKLOADPS256,
20633 IX86_BUILTIN_MASKSTOREPD256,
20634 IX86_BUILTIN_MASKSTOREPS256,
20635 IX86_BUILTIN_MOVSHDUP256,
20636 IX86_BUILTIN_MOVSLDUP256,
20637 IX86_BUILTIN_MOVDDUP256,
20639 IX86_BUILTIN_SQRTPD256,
20640 IX86_BUILTIN_SQRTPS256,
20641 IX86_BUILTIN_SQRTPS_NR256,
20642 IX86_BUILTIN_RSQRTPS256,
20643 IX86_BUILTIN_RSQRTPS_NR256,
20645 IX86_BUILTIN_RCPPS256,
20647 IX86_BUILTIN_ROUNDPD256,
20648 IX86_BUILTIN_ROUNDPS256,
20650 IX86_BUILTIN_UNPCKHPD256,
20651 IX86_BUILTIN_UNPCKLPD256,
20652 IX86_BUILTIN_UNPCKHPS256,
20653 IX86_BUILTIN_UNPCKLPS256,
20655 IX86_BUILTIN_SI256_SI,
20656 IX86_BUILTIN_PS256_PS,
20657 IX86_BUILTIN_PD256_PD,
20658 IX86_BUILTIN_SI_SI256,
20659 IX86_BUILTIN_PS_PS256,
20660 IX86_BUILTIN_PD_PD256,
20662 IX86_BUILTIN_VTESTZPD,
20663 IX86_BUILTIN_VTESTCPD,
20664 IX86_BUILTIN_VTESTNZCPD,
20665 IX86_BUILTIN_VTESTZPS,
20666 IX86_BUILTIN_VTESTCPS,
20667 IX86_BUILTIN_VTESTNZCPS,
20668 IX86_BUILTIN_VTESTZPD256,
20669 IX86_BUILTIN_VTESTCPD256,
20670 IX86_BUILTIN_VTESTNZCPD256,
20671 IX86_BUILTIN_VTESTZPS256,
20672 IX86_BUILTIN_VTESTCPS256,
20673 IX86_BUILTIN_VTESTNZCPS256,
20674 IX86_BUILTIN_PTESTZ256,
20675 IX86_BUILTIN_PTESTC256,
20676 IX86_BUILTIN_PTESTNZC256,
20678 IX86_BUILTIN_MOVMSKPD256,
20679 IX86_BUILTIN_MOVMSKPS256,
20681 /* TFmode support builtins. */
20683 IX86_BUILTIN_HUGE_VALQ,
20684 IX86_BUILTIN_FABSQ,
20685 IX86_BUILTIN_COPYSIGNQ,
20687 /* SSE5 instructions */
20688 IX86_BUILTIN_FMADDSS,
20689 IX86_BUILTIN_FMADDSD,
20690 IX86_BUILTIN_FMADDPS,
20691 IX86_BUILTIN_FMADDPD,
20692 IX86_BUILTIN_FMSUBSS,
20693 IX86_BUILTIN_FMSUBSD,
20694 IX86_BUILTIN_FMSUBPS,
20695 IX86_BUILTIN_FMSUBPD,
20696 IX86_BUILTIN_FNMADDSS,
20697 IX86_BUILTIN_FNMADDSD,
20698 IX86_BUILTIN_FNMADDPS,
20699 IX86_BUILTIN_FNMADDPD,
20700 IX86_BUILTIN_FNMSUBSS,
20701 IX86_BUILTIN_FNMSUBSD,
20702 IX86_BUILTIN_FNMSUBPS,
20703 IX86_BUILTIN_FNMSUBPD,
20704 IX86_BUILTIN_PCMOV,
20705 IX86_BUILTIN_PCMOV_V2DI,
20706 IX86_BUILTIN_PCMOV_V4SI,
20707 IX86_BUILTIN_PCMOV_V8HI,
20708 IX86_BUILTIN_PCMOV_V16QI,
20709 IX86_BUILTIN_PCMOV_V4SF,
20710 IX86_BUILTIN_PCMOV_V2DF,
20711 IX86_BUILTIN_PPERM,
20712 IX86_BUILTIN_PERMPS,
20713 IX86_BUILTIN_PERMPD,
20714 IX86_BUILTIN_PMACSSWW,
20715 IX86_BUILTIN_PMACSWW,
20716 IX86_BUILTIN_PMACSSWD,
20717 IX86_BUILTIN_PMACSWD,
20718 IX86_BUILTIN_PMACSSDD,
20719 IX86_BUILTIN_PMACSDD,
20720 IX86_BUILTIN_PMACSSDQL,
20721 IX86_BUILTIN_PMACSSDQH,
20722 IX86_BUILTIN_PMACSDQL,
20723 IX86_BUILTIN_PMACSDQH,
20724 IX86_BUILTIN_PMADCSSWD,
20725 IX86_BUILTIN_PMADCSWD,
20726 IX86_BUILTIN_PHADDBW,
20727 IX86_BUILTIN_PHADDBD,
20728 IX86_BUILTIN_PHADDBQ,
20729 IX86_BUILTIN_PHADDWD,
20730 IX86_BUILTIN_PHADDWQ,
20731 IX86_BUILTIN_PHADDDQ,
20732 IX86_BUILTIN_PHADDUBW,
20733 IX86_BUILTIN_PHADDUBD,
20734 IX86_BUILTIN_PHADDUBQ,
20735 IX86_BUILTIN_PHADDUWD,
20736 IX86_BUILTIN_PHADDUWQ,
20737 IX86_BUILTIN_PHADDUDQ,
20738 IX86_BUILTIN_PHSUBBW,
20739 IX86_BUILTIN_PHSUBWD,
20740 IX86_BUILTIN_PHSUBDQ,
20741 IX86_BUILTIN_PROTB,
20742 IX86_BUILTIN_PROTW,
20743 IX86_BUILTIN_PROTD,
20744 IX86_BUILTIN_PROTQ,
20745 IX86_BUILTIN_PROTB_IMM,
20746 IX86_BUILTIN_PROTW_IMM,
20747 IX86_BUILTIN_PROTD_IMM,
20748 IX86_BUILTIN_PROTQ_IMM,
20749 IX86_BUILTIN_PSHLB,
20750 IX86_BUILTIN_PSHLW,
20751 IX86_BUILTIN_PSHLD,
20752 IX86_BUILTIN_PSHLQ,
20753 IX86_BUILTIN_PSHAB,
20754 IX86_BUILTIN_PSHAW,
20755 IX86_BUILTIN_PSHAD,
20756 IX86_BUILTIN_PSHAQ,
20757 IX86_BUILTIN_FRCZSS,
20758 IX86_BUILTIN_FRCZSD,
20759 IX86_BUILTIN_FRCZPS,
20760 IX86_BUILTIN_FRCZPD,
20761 IX86_BUILTIN_CVTPH2PS,
20762 IX86_BUILTIN_CVTPS2PH,
20764 IX86_BUILTIN_COMEQSS,
20765 IX86_BUILTIN_COMNESS,
20766 IX86_BUILTIN_COMLTSS,
20767 IX86_BUILTIN_COMLESS,
20768 IX86_BUILTIN_COMGTSS,
20769 IX86_BUILTIN_COMGESS,
20770 IX86_BUILTIN_COMUEQSS,
20771 IX86_BUILTIN_COMUNESS,
20772 IX86_BUILTIN_COMULTSS,
20773 IX86_BUILTIN_COMULESS,
20774 IX86_BUILTIN_COMUGTSS,
20775 IX86_BUILTIN_COMUGESS,
20776 IX86_BUILTIN_COMORDSS,
20777 IX86_BUILTIN_COMUNORDSS,
20778 IX86_BUILTIN_COMFALSESS,
20779 IX86_BUILTIN_COMTRUESS,
20781 IX86_BUILTIN_COMEQSD,
20782 IX86_BUILTIN_COMNESD,
20783 IX86_BUILTIN_COMLTSD,
20784 IX86_BUILTIN_COMLESD,
20785 IX86_BUILTIN_COMGTSD,
20786 IX86_BUILTIN_COMGESD,
20787 IX86_BUILTIN_COMUEQSD,
20788 IX86_BUILTIN_COMUNESD,
20789 IX86_BUILTIN_COMULTSD,
20790 IX86_BUILTIN_COMULESD,
20791 IX86_BUILTIN_COMUGTSD,
20792 IX86_BUILTIN_COMUGESD,
20793 IX86_BUILTIN_COMORDSD,
20794 IX86_BUILTIN_COMUNORDSD,
20795 IX86_BUILTIN_COMFALSESD,
20796 IX86_BUILTIN_COMTRUESD,
20798 IX86_BUILTIN_COMEQPS,
20799 IX86_BUILTIN_COMNEPS,
20800 IX86_BUILTIN_COMLTPS,
20801 IX86_BUILTIN_COMLEPS,
20802 IX86_BUILTIN_COMGTPS,
20803 IX86_BUILTIN_COMGEPS,
20804 IX86_BUILTIN_COMUEQPS,
20805 IX86_BUILTIN_COMUNEPS,
20806 IX86_BUILTIN_COMULTPS,
20807 IX86_BUILTIN_COMULEPS,
20808 IX86_BUILTIN_COMUGTPS,
20809 IX86_BUILTIN_COMUGEPS,
20810 IX86_BUILTIN_COMORDPS,
20811 IX86_BUILTIN_COMUNORDPS,
20812 IX86_BUILTIN_COMFALSEPS,
20813 IX86_BUILTIN_COMTRUEPS,
20815 IX86_BUILTIN_COMEQPD,
20816 IX86_BUILTIN_COMNEPD,
20817 IX86_BUILTIN_COMLTPD,
20818 IX86_BUILTIN_COMLEPD,
20819 IX86_BUILTIN_COMGTPD,
20820 IX86_BUILTIN_COMGEPD,
20821 IX86_BUILTIN_COMUEQPD,
20822 IX86_BUILTIN_COMUNEPD,
20823 IX86_BUILTIN_COMULTPD,
20824 IX86_BUILTIN_COMULEPD,
20825 IX86_BUILTIN_COMUGTPD,
20826 IX86_BUILTIN_COMUGEPD,
20827 IX86_BUILTIN_COMORDPD,
20828 IX86_BUILTIN_COMUNORDPD,
20829 IX86_BUILTIN_COMFALSEPD,
20830 IX86_BUILTIN_COMTRUEPD,
20832 IX86_BUILTIN_PCOMEQUB,
20833 IX86_BUILTIN_PCOMNEUB,
20834 IX86_BUILTIN_PCOMLTUB,
20835 IX86_BUILTIN_PCOMLEUB,
20836 IX86_BUILTIN_PCOMGTUB,
20837 IX86_BUILTIN_PCOMGEUB,
20838 IX86_BUILTIN_PCOMFALSEUB,
20839 IX86_BUILTIN_PCOMTRUEUB,
20840 IX86_BUILTIN_PCOMEQUW,
20841 IX86_BUILTIN_PCOMNEUW,
20842 IX86_BUILTIN_PCOMLTUW,
20843 IX86_BUILTIN_PCOMLEUW,
20844 IX86_BUILTIN_PCOMGTUW,
20845 IX86_BUILTIN_PCOMGEUW,
20846 IX86_BUILTIN_PCOMFALSEUW,
20847 IX86_BUILTIN_PCOMTRUEUW,
20848 IX86_BUILTIN_PCOMEQUD,
20849 IX86_BUILTIN_PCOMNEUD,
20850 IX86_BUILTIN_PCOMLTUD,
20851 IX86_BUILTIN_PCOMLEUD,
20852 IX86_BUILTIN_PCOMGTUD,
20853 IX86_BUILTIN_PCOMGEUD,
20854 IX86_BUILTIN_PCOMFALSEUD,
20855 IX86_BUILTIN_PCOMTRUEUD,
20856 IX86_BUILTIN_PCOMEQUQ,
20857 IX86_BUILTIN_PCOMNEUQ,
20858 IX86_BUILTIN_PCOMLTUQ,
20859 IX86_BUILTIN_PCOMLEUQ,
20860 IX86_BUILTIN_PCOMGTUQ,
20861 IX86_BUILTIN_PCOMGEUQ,
20862 IX86_BUILTIN_PCOMFALSEUQ,
20863 IX86_BUILTIN_PCOMTRUEUQ,
20865 IX86_BUILTIN_PCOMEQB,
20866 IX86_BUILTIN_PCOMNEB,
20867 IX86_BUILTIN_PCOMLTB,
20868 IX86_BUILTIN_PCOMLEB,
20869 IX86_BUILTIN_PCOMGTB,
20870 IX86_BUILTIN_PCOMGEB,
20871 IX86_BUILTIN_PCOMFALSEB,
20872 IX86_BUILTIN_PCOMTRUEB,
20873 IX86_BUILTIN_PCOMEQW,
20874 IX86_BUILTIN_PCOMNEW,
20875 IX86_BUILTIN_PCOMLTW,
20876 IX86_BUILTIN_PCOMLEW,
20877 IX86_BUILTIN_PCOMGTW,
20878 IX86_BUILTIN_PCOMGEW,
20879 IX86_BUILTIN_PCOMFALSEW,
20880 IX86_BUILTIN_PCOMTRUEW,
20881 IX86_BUILTIN_PCOMEQD,
20882 IX86_BUILTIN_PCOMNED,
20883 IX86_BUILTIN_PCOMLTD,
20884 IX86_BUILTIN_PCOMLED,
20885 IX86_BUILTIN_PCOMGTD,
20886 IX86_BUILTIN_PCOMGED,
20887 IX86_BUILTIN_PCOMFALSED,
20888 IX86_BUILTIN_PCOMTRUED,
20889 IX86_BUILTIN_PCOMEQQ,
20890 IX86_BUILTIN_PCOMNEQ,
20891 IX86_BUILTIN_PCOMLTQ,
20892 IX86_BUILTIN_PCOMLEQ,
20893 IX86_BUILTIN_PCOMGTQ,
20894 IX86_BUILTIN_PCOMGEQ,
20895 IX86_BUILTIN_PCOMFALSEQ,
20896 IX86_BUILTIN_PCOMTRUEQ,
20901 /* Table for the ix86 builtin decls. */
20902 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20904 /* Table of all of the builtin functions that are possible with different ISA's
20905 but are waiting to be built until a function is declared to use that
20907 struct GTY(()) builtin_isa {
20908 tree type; /* builtin type to use in the declaration */
20909 const char *name; /* function name */
20910 int isa; /* isa_flags this builtin is defined for */
20911 bool const_p; /* true if the declaration is constant */
20914 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20917 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20918 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20919 * function decl in the ix86_builtins array. Returns the function decl or
20920 * NULL_TREE, if the builtin was not added.
20922 * If the front end has a special hook for builtin functions, delay adding
20923 * builtin functions that aren't in the current ISA until the ISA is changed
20924 * with function specific optimization. Doing so, can save about 300K for the
20925 * default compiler. When the builtin is expanded, check at that time whether
20928 * If the front end doesn't have a special hook, record all builtins, even if
20929 * it isn't an instruction set in the current ISA in case the user uses
20930 * function specific options for a different ISA, so that we don't get scope
20931 * errors if a builtin is added in the middle of a function scope. */
20934 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20936 tree decl = NULL_TREE;
20938 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20940 ix86_builtins_isa[(int) code].isa = mask;
20942 if ((mask & ix86_isa_flags) != 0
20943 || (lang_hooks.builtin_function
20944 == lang_hooks.builtin_function_ext_scope))
20947 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20949 ix86_builtins[(int) code] = decl;
20950 ix86_builtins_isa[(int) code].type = NULL_TREE;
20954 ix86_builtins[(int) code] = NULL_TREE;
20955 ix86_builtins_isa[(int) code].const_p = false;
20956 ix86_builtins_isa[(int) code].type = type;
20957 ix86_builtins_isa[(int) code].name = name;
20964 /* Like def_builtin, but also marks the function decl "const". */
20967 def_builtin_const (int mask, const char *name, tree type,
20968 enum ix86_builtins code)
20970 tree decl = def_builtin (mask, name, type, code);
20972 TREE_READONLY (decl) = 1;
20974 ix86_builtins_isa[(int) code].const_p = true;
20979 /* Add any new builtin functions for a given ISA that may not have been
20980 declared. This saves a bit of space compared to adding all of the
20981 declarations to the tree, even if we didn't use them. */
20984 ix86_add_new_builtins (int isa)
20989 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20991 if ((ix86_builtins_isa[i].isa & isa) != 0
20992 && ix86_builtins_isa[i].type != NULL_TREE)
20994 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20995 ix86_builtins_isa[i].type,
20996 i, BUILT_IN_MD, NULL,
20999 ix86_builtins[i] = decl;
21000 ix86_builtins_isa[i].type = NULL_TREE;
21001 if (ix86_builtins_isa[i].const_p)
21002 TREE_READONLY (decl) = 1;
21007 /* Bits for builtin_description.flag. */
21009 /* Set when we don't support the comparison natively, and should
21010 swap_comparison in order to support it. */
21011 #define BUILTIN_DESC_SWAP_OPERANDS 1
21013 struct builtin_description
21015 const unsigned int mask;
21016 const enum insn_code icode;
21017 const char *const name;
21018 const enum ix86_builtins code;
21019 const enum rtx_code comparison;
21023 static const struct builtin_description bdesc_comi[] =
21025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21051 static const struct builtin_description bdesc_pcmpestr[] =
21054 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21063 static const struct builtin_description bdesc_pcmpistr[] =
21066 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21067 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21068 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21069 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21070 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21071 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21072 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21075 /* Special builtin types */
21076 enum ix86_special_builtin_type
21078 SPECIAL_FTYPE_UNKNOWN,
21080 V32QI_FTYPE_PCCHAR,
21081 V16QI_FTYPE_PCCHAR,
21083 V8SF_FTYPE_PCFLOAT,
21085 V4DF_FTYPE_PCDOUBLE,
21086 V4SF_FTYPE_PCFLOAT,
21087 V2DF_FTYPE_PCDOUBLE,
21088 V8SF_FTYPE_PCV8SF_V8SF,
21089 V4DF_FTYPE_PCV4DF_V4DF,
21090 V4SF_FTYPE_V4SF_PCV2SF,
21091 V4SF_FTYPE_PCV4SF_V4SF,
21092 V2DF_FTYPE_V2DF_PCDOUBLE,
21093 V2DF_FTYPE_PCV2DF_V2DF,
21095 VOID_FTYPE_PV2SF_V4SF,
21096 VOID_FTYPE_PV4DI_V4DI,
21097 VOID_FTYPE_PV2DI_V2DI,
21098 VOID_FTYPE_PCHAR_V32QI,
21099 VOID_FTYPE_PCHAR_V16QI,
21100 VOID_FTYPE_PFLOAT_V8SF,
21101 VOID_FTYPE_PFLOAT_V4SF,
21102 VOID_FTYPE_PDOUBLE_V4DF,
21103 VOID_FTYPE_PDOUBLE_V2DF,
21105 VOID_FTYPE_PINT_INT,
21106 VOID_FTYPE_PV8SF_V8SF_V8SF,
21107 VOID_FTYPE_PV4DF_V4DF_V4DF,
21108 VOID_FTYPE_PV4SF_V4SF_V4SF,
21109 VOID_FTYPE_PV2DF_V2DF_V2DF
21112 /* Builtin types */
21113 enum ix86_builtin_type
21116 FLOAT128_FTYPE_FLOAT128,
21118 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21119 INT_FTYPE_V8SF_V8SF_PTEST,
21120 INT_FTYPE_V4DI_V4DI_PTEST,
21121 INT_FTYPE_V4DF_V4DF_PTEST,
21122 INT_FTYPE_V4SF_V4SF_PTEST,
21123 INT_FTYPE_V2DI_V2DI_PTEST,
21124 INT_FTYPE_V2DF_V2DF_PTEST,
21156 V4SF_FTYPE_V4SF_VEC_MERGE,
21165 V2DF_FTYPE_V2DF_VEC_MERGE,
21176 V16QI_FTYPE_V16QI_V16QI,
21177 V16QI_FTYPE_V8HI_V8HI,
21178 V8QI_FTYPE_V8QI_V8QI,
21179 V8QI_FTYPE_V4HI_V4HI,
21180 V8HI_FTYPE_V8HI_V8HI,
21181 V8HI_FTYPE_V8HI_V8HI_COUNT,
21182 V8HI_FTYPE_V16QI_V16QI,
21183 V8HI_FTYPE_V4SI_V4SI,
21184 V8HI_FTYPE_V8HI_SI_COUNT,
21185 V8SF_FTYPE_V8SF_V8SF,
21186 V8SF_FTYPE_V8SF_V8SI,
21187 V4SI_FTYPE_V4SI_V4SI,
21188 V4SI_FTYPE_V4SI_V4SI_COUNT,
21189 V4SI_FTYPE_V8HI_V8HI,
21190 V4SI_FTYPE_V4SF_V4SF,
21191 V4SI_FTYPE_V2DF_V2DF,
21192 V4SI_FTYPE_V4SI_SI_COUNT,
21193 V4HI_FTYPE_V4HI_V4HI,
21194 V4HI_FTYPE_V4HI_V4HI_COUNT,
21195 V4HI_FTYPE_V8QI_V8QI,
21196 V4HI_FTYPE_V2SI_V2SI,
21197 V4HI_FTYPE_V4HI_SI_COUNT,
21198 V4DF_FTYPE_V4DF_V4DF,
21199 V4DF_FTYPE_V4DF_V4DI,
21200 V4SF_FTYPE_V4SF_V4SF,
21201 V4SF_FTYPE_V4SF_V4SF_SWAP,
21202 V4SF_FTYPE_V4SF_V4SI,
21203 V4SF_FTYPE_V4SF_V2SI,
21204 V4SF_FTYPE_V4SF_V2DF,
21205 V4SF_FTYPE_V4SF_DI,
21206 V4SF_FTYPE_V4SF_SI,
21207 V2DI_FTYPE_V2DI_V2DI,
21208 V2DI_FTYPE_V2DI_V2DI_COUNT,
21209 V2DI_FTYPE_V16QI_V16QI,
21210 V2DI_FTYPE_V4SI_V4SI,
21211 V2DI_FTYPE_V2DI_V16QI,
21212 V2DI_FTYPE_V2DF_V2DF,
21213 V2DI_FTYPE_V2DI_SI_COUNT,
21214 V2SI_FTYPE_V2SI_V2SI,
21215 V2SI_FTYPE_V2SI_V2SI_COUNT,
21216 V2SI_FTYPE_V4HI_V4HI,
21217 V2SI_FTYPE_V2SF_V2SF,
21218 V2SI_FTYPE_V2SI_SI_COUNT,
21219 V2DF_FTYPE_V2DF_V2DF,
21220 V2DF_FTYPE_V2DF_V2DF_SWAP,
21221 V2DF_FTYPE_V2DF_V4SF,
21222 V2DF_FTYPE_V2DF_V2DI,
21223 V2DF_FTYPE_V2DF_DI,
21224 V2DF_FTYPE_V2DF_SI,
21225 V2SF_FTYPE_V2SF_V2SF,
21226 V1DI_FTYPE_V1DI_V1DI,
21227 V1DI_FTYPE_V1DI_V1DI_COUNT,
21228 V1DI_FTYPE_V8QI_V8QI,
21229 V1DI_FTYPE_V2SI_V2SI,
21230 V1DI_FTYPE_V1DI_SI_COUNT,
21231 UINT64_FTYPE_UINT64_UINT64,
21232 UINT_FTYPE_UINT_UINT,
21233 UINT_FTYPE_UINT_USHORT,
21234 UINT_FTYPE_UINT_UCHAR,
21235 V8HI_FTYPE_V8HI_INT,
21236 V4SI_FTYPE_V4SI_INT,
21237 V4HI_FTYPE_V4HI_INT,
21238 V8SF_FTYPE_V8SF_INT,
21239 V4SI_FTYPE_V8SI_INT,
21240 V4SF_FTYPE_V8SF_INT,
21241 V2DF_FTYPE_V4DF_INT,
21242 V4DF_FTYPE_V4DF_INT,
21243 V4SF_FTYPE_V4SF_INT,
21244 V2DI_FTYPE_V2DI_INT,
21245 V2DI2TI_FTYPE_V2DI_INT,
21246 V2DF_FTYPE_V2DF_INT,
21247 V16QI_FTYPE_V16QI_V16QI_V16QI,
21248 V8SF_FTYPE_V8SF_V8SF_V8SF,
21249 V4DF_FTYPE_V4DF_V4DF_V4DF,
21250 V4SF_FTYPE_V4SF_V4SF_V4SF,
21251 V2DF_FTYPE_V2DF_V2DF_V2DF,
21252 V16QI_FTYPE_V16QI_V16QI_INT,
21253 V8SI_FTYPE_V8SI_V8SI_INT,
21254 V8SI_FTYPE_V8SI_V4SI_INT,
21255 V8HI_FTYPE_V8HI_V8HI_INT,
21256 V8SF_FTYPE_V8SF_V8SF_INT,
21257 V8SF_FTYPE_V8SF_V4SF_INT,
21258 V4SI_FTYPE_V4SI_V4SI_INT,
21259 V4DF_FTYPE_V4DF_V4DF_INT,
21260 V4DF_FTYPE_V4DF_V2DF_INT,
21261 V4SF_FTYPE_V4SF_V4SF_INT,
21262 V2DI_FTYPE_V2DI_V2DI_INT,
21263 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21264 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21265 V2DF_FTYPE_V2DF_V2DF_INT,
21266 V2DI_FTYPE_V2DI_UINT_UINT,
21267 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21270 /* Special builtins with variable number of arguments. */
21271 static const struct builtin_description bdesc_special_args[] =
21274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21289 /* SSE or 3DNow!A */
21290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21308 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21311 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21314 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21315 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21320 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21350 /* Builtins with variable number of arguments. */
21351 static const struct builtin_description bdesc_args[] =
21354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21418 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21419 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21423 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21424 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21425 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21426 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21427 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21428 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21429 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21430 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21431 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21432 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21435 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21440 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21441 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21442 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21443 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21444 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21445 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21456 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21459 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21463 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21464 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21495 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21496 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21500 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21502 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21503 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21513 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21515 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21521 /* SSE MMX or 3Dnow!A */
21522 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21523 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21524 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21526 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21527 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21528 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21531 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21532 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21534 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21555 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21556 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21597 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21645 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21671 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21691 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21692 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21703 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21705 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21708 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21709 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21712 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21713 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21715 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21716 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21717 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21718 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21719 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21720 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21756 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21757 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21798 /* SSE4.1 and SSE5 */
21799 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21800 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21801 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21802 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21804 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21805 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21806 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21809 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21810 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21811 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21812 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21813 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21816 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21817 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21818 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21819 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21825 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21831 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21834 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21835 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21905 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21949 enum multi_arg_type {
21959 MULTI_ARG_3_PERMPS,
21960 MULTI_ARG_3_PERMPD,
21967 MULTI_ARG_2_DI_IMM,
21968 MULTI_ARG_2_SI_IMM,
21969 MULTI_ARG_2_HI_IMM,
21970 MULTI_ARG_2_QI_IMM,
21971 MULTI_ARG_2_SF_CMP,
21972 MULTI_ARG_2_DF_CMP,
21973 MULTI_ARG_2_DI_CMP,
21974 MULTI_ARG_2_SI_CMP,
21975 MULTI_ARG_2_HI_CMP,
21976 MULTI_ARG_2_QI_CMP,
21999 static const struct builtin_description bdesc_multi_arg[] =
22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22237 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22238 in the current target ISA to allow the user to compile particular modules
22239 with different target specific options that differ from the command line
22242 ix86_init_mmx_sse_builtins (void)
22244 const struct builtin_description * d;
22247 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22248 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22249 tree V1DI_type_node
22250 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22251 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22252 tree V2DI_type_node
22253 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22254 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22255 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22256 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22257 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22258 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22259 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22261 tree pchar_type_node = build_pointer_type (char_type_node);
22262 tree pcchar_type_node
22263 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22264 tree pfloat_type_node = build_pointer_type (float_type_node);
22265 tree pcfloat_type_node
22266 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22267 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22268 tree pcv2sf_type_node
22269 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22270 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22271 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22274 tree int_ftype_v4sf_v4sf
22275 = build_function_type_list (integer_type_node,
22276 V4SF_type_node, V4SF_type_node, NULL_TREE);
22277 tree v4si_ftype_v4sf_v4sf
22278 = build_function_type_list (V4SI_type_node,
22279 V4SF_type_node, V4SF_type_node, NULL_TREE);
22280 /* MMX/SSE/integer conversions. */
22281 tree int_ftype_v4sf
22282 = build_function_type_list (integer_type_node,
22283 V4SF_type_node, NULL_TREE);
22284 tree int64_ftype_v4sf
22285 = build_function_type_list (long_long_integer_type_node,
22286 V4SF_type_node, NULL_TREE);
22287 tree int_ftype_v8qi
22288 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22289 tree v4sf_ftype_v4sf_int
22290 = build_function_type_list (V4SF_type_node,
22291 V4SF_type_node, integer_type_node, NULL_TREE);
22292 tree v4sf_ftype_v4sf_int64
22293 = build_function_type_list (V4SF_type_node,
22294 V4SF_type_node, long_long_integer_type_node,
22296 tree v4sf_ftype_v4sf_v2si
22297 = build_function_type_list (V4SF_type_node,
22298 V4SF_type_node, V2SI_type_node, NULL_TREE);
22300 /* Miscellaneous. */
22301 tree v8qi_ftype_v4hi_v4hi
22302 = build_function_type_list (V8QI_type_node,
22303 V4HI_type_node, V4HI_type_node, NULL_TREE);
22304 tree v4hi_ftype_v2si_v2si
22305 = build_function_type_list (V4HI_type_node,
22306 V2SI_type_node, V2SI_type_node, NULL_TREE);
22307 tree v4sf_ftype_v4sf_v4sf_int
22308 = build_function_type_list (V4SF_type_node,
22309 V4SF_type_node, V4SF_type_node,
22310 integer_type_node, NULL_TREE);
22311 tree v2si_ftype_v4hi_v4hi
22312 = build_function_type_list (V2SI_type_node,
22313 V4HI_type_node, V4HI_type_node, NULL_TREE);
22314 tree v4hi_ftype_v4hi_int
22315 = build_function_type_list (V4HI_type_node,
22316 V4HI_type_node, integer_type_node, NULL_TREE);
22317 tree v2si_ftype_v2si_int
22318 = build_function_type_list (V2SI_type_node,
22319 V2SI_type_node, integer_type_node, NULL_TREE);
22320 tree v1di_ftype_v1di_int
22321 = build_function_type_list (V1DI_type_node,
22322 V1DI_type_node, integer_type_node, NULL_TREE);
22324 tree void_ftype_void
22325 = build_function_type (void_type_node, void_list_node);
22326 tree void_ftype_unsigned
22327 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22328 tree void_ftype_unsigned_unsigned
22329 = build_function_type_list (void_type_node, unsigned_type_node,
22330 unsigned_type_node, NULL_TREE);
22331 tree void_ftype_pcvoid_unsigned_unsigned
22332 = build_function_type_list (void_type_node, const_ptr_type_node,
22333 unsigned_type_node, unsigned_type_node,
22335 tree unsigned_ftype_void
22336 = build_function_type (unsigned_type_node, void_list_node);
22337 tree v2si_ftype_v4sf
22338 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22339 /* Loads/stores. */
22340 tree void_ftype_v8qi_v8qi_pchar
22341 = build_function_type_list (void_type_node,
22342 V8QI_type_node, V8QI_type_node,
22343 pchar_type_node, NULL_TREE);
22344 tree v4sf_ftype_pcfloat
22345 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22346 tree v4sf_ftype_v4sf_pcv2sf
22347 = build_function_type_list (V4SF_type_node,
22348 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22349 tree void_ftype_pv2sf_v4sf
22350 = build_function_type_list (void_type_node,
22351 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22352 tree void_ftype_pfloat_v4sf
22353 = build_function_type_list (void_type_node,
22354 pfloat_type_node, V4SF_type_node, NULL_TREE);
22355 tree void_ftype_pdi_di
22356 = build_function_type_list (void_type_node,
22357 pdi_type_node, long_long_unsigned_type_node,
22359 tree void_ftype_pv2di_v2di
22360 = build_function_type_list (void_type_node,
22361 pv2di_type_node, V2DI_type_node, NULL_TREE);
22362 /* Normal vector unops. */
22363 tree v4sf_ftype_v4sf
22364 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22365 tree v16qi_ftype_v16qi
22366 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22367 tree v8hi_ftype_v8hi
22368 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22369 tree v4si_ftype_v4si
22370 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22371 tree v8qi_ftype_v8qi
22372 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22373 tree v4hi_ftype_v4hi
22374 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22376 /* Normal vector binops. */
22377 tree v4sf_ftype_v4sf_v4sf
22378 = build_function_type_list (V4SF_type_node,
22379 V4SF_type_node, V4SF_type_node, NULL_TREE);
22380 tree v8qi_ftype_v8qi_v8qi
22381 = build_function_type_list (V8QI_type_node,
22382 V8QI_type_node, V8QI_type_node, NULL_TREE);
22383 tree v4hi_ftype_v4hi_v4hi
22384 = build_function_type_list (V4HI_type_node,
22385 V4HI_type_node, V4HI_type_node, NULL_TREE);
22386 tree v2si_ftype_v2si_v2si
22387 = build_function_type_list (V2SI_type_node,
22388 V2SI_type_node, V2SI_type_node, NULL_TREE);
22389 tree v1di_ftype_v1di_v1di
22390 = build_function_type_list (V1DI_type_node,
22391 V1DI_type_node, V1DI_type_node, NULL_TREE);
22392 tree v1di_ftype_v1di_v1di_int
22393 = build_function_type_list (V1DI_type_node,
22394 V1DI_type_node, V1DI_type_node,
22395 integer_type_node, NULL_TREE);
22396 tree v2si_ftype_v2sf
22397 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22398 tree v2sf_ftype_v2si
22399 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22400 tree v2si_ftype_v2si
22401 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22402 tree v2sf_ftype_v2sf
22403 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22404 tree v2sf_ftype_v2sf_v2sf
22405 = build_function_type_list (V2SF_type_node,
22406 V2SF_type_node, V2SF_type_node, NULL_TREE);
22407 tree v2si_ftype_v2sf_v2sf
22408 = build_function_type_list (V2SI_type_node,
22409 V2SF_type_node, V2SF_type_node, NULL_TREE);
22410 tree pint_type_node = build_pointer_type (integer_type_node);
22411 tree pdouble_type_node = build_pointer_type (double_type_node);
22412 tree pcdouble_type_node = build_pointer_type (
22413 build_type_variant (double_type_node, 1, 0));
22414 tree int_ftype_v2df_v2df
22415 = build_function_type_list (integer_type_node,
22416 V2DF_type_node, V2DF_type_node, NULL_TREE);
22418 tree void_ftype_pcvoid
22419 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22420 tree v4sf_ftype_v4si
22421 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22422 tree v4si_ftype_v4sf
22423 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22424 tree v2df_ftype_v4si
22425 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22426 tree v4si_ftype_v2df
22427 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22428 tree v4si_ftype_v2df_v2df
22429 = build_function_type_list (V4SI_type_node,
22430 V2DF_type_node, V2DF_type_node, NULL_TREE);
22431 tree v2si_ftype_v2df
22432 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22433 tree v4sf_ftype_v2df
22434 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22435 tree v2df_ftype_v2si
22436 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22437 tree v2df_ftype_v4sf
22438 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22439 tree int_ftype_v2df
22440 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22441 tree int64_ftype_v2df
22442 = build_function_type_list (long_long_integer_type_node,
22443 V2DF_type_node, NULL_TREE);
22444 tree v2df_ftype_v2df_int
22445 = build_function_type_list (V2DF_type_node,
22446 V2DF_type_node, integer_type_node, NULL_TREE);
22447 tree v2df_ftype_v2df_int64
22448 = build_function_type_list (V2DF_type_node,
22449 V2DF_type_node, long_long_integer_type_node,
22451 tree v4sf_ftype_v4sf_v2df
22452 = build_function_type_list (V4SF_type_node,
22453 V4SF_type_node, V2DF_type_node, NULL_TREE);
22454 tree v2df_ftype_v2df_v4sf
22455 = build_function_type_list (V2DF_type_node,
22456 V2DF_type_node, V4SF_type_node, NULL_TREE);
22457 tree v2df_ftype_v2df_v2df_int
22458 = build_function_type_list (V2DF_type_node,
22459 V2DF_type_node, V2DF_type_node,
22462 tree v2df_ftype_v2df_pcdouble
22463 = build_function_type_list (V2DF_type_node,
22464 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22465 tree void_ftype_pdouble_v2df
22466 = build_function_type_list (void_type_node,
22467 pdouble_type_node, V2DF_type_node, NULL_TREE);
22468 tree void_ftype_pint_int
22469 = build_function_type_list (void_type_node,
22470 pint_type_node, integer_type_node, NULL_TREE);
22471 tree void_ftype_v16qi_v16qi_pchar
22472 = build_function_type_list (void_type_node,
22473 V16QI_type_node, V16QI_type_node,
22474 pchar_type_node, NULL_TREE);
22475 tree v2df_ftype_pcdouble
22476 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22477 tree v2df_ftype_v2df_v2df
22478 = build_function_type_list (V2DF_type_node,
22479 V2DF_type_node, V2DF_type_node, NULL_TREE);
22480 tree v16qi_ftype_v16qi_v16qi
22481 = build_function_type_list (V16QI_type_node,
22482 V16QI_type_node, V16QI_type_node, NULL_TREE);
22483 tree v8hi_ftype_v8hi_v8hi
22484 = build_function_type_list (V8HI_type_node,
22485 V8HI_type_node, V8HI_type_node, NULL_TREE);
22486 tree v4si_ftype_v4si_v4si
22487 = build_function_type_list (V4SI_type_node,
22488 V4SI_type_node, V4SI_type_node, NULL_TREE);
22489 tree v2di_ftype_v2di_v2di
22490 = build_function_type_list (V2DI_type_node,
22491 V2DI_type_node, V2DI_type_node, NULL_TREE);
22492 tree v2di_ftype_v2df_v2df
22493 = build_function_type_list (V2DI_type_node,
22494 V2DF_type_node, V2DF_type_node, NULL_TREE);
22495 tree v2df_ftype_v2df
22496 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22497 tree v2di_ftype_v2di_int
22498 = build_function_type_list (V2DI_type_node,
22499 V2DI_type_node, integer_type_node, NULL_TREE);
22500 tree v2di_ftype_v2di_v2di_int
22501 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22502 V2DI_type_node, integer_type_node, NULL_TREE);
22503 tree v4si_ftype_v4si_int
22504 = build_function_type_list (V4SI_type_node,
22505 V4SI_type_node, integer_type_node, NULL_TREE);
22506 tree v8hi_ftype_v8hi_int
22507 = build_function_type_list (V8HI_type_node,
22508 V8HI_type_node, integer_type_node, NULL_TREE);
22509 tree v4si_ftype_v8hi_v8hi
22510 = build_function_type_list (V4SI_type_node,
22511 V8HI_type_node, V8HI_type_node, NULL_TREE);
22512 tree v1di_ftype_v8qi_v8qi
22513 = build_function_type_list (V1DI_type_node,
22514 V8QI_type_node, V8QI_type_node, NULL_TREE);
22515 tree v1di_ftype_v2si_v2si
22516 = build_function_type_list (V1DI_type_node,
22517 V2SI_type_node, V2SI_type_node, NULL_TREE);
22518 tree v2di_ftype_v16qi_v16qi
22519 = build_function_type_list (V2DI_type_node,
22520 V16QI_type_node, V16QI_type_node, NULL_TREE);
22521 tree v2di_ftype_v4si_v4si
22522 = build_function_type_list (V2DI_type_node,
22523 V4SI_type_node, V4SI_type_node, NULL_TREE);
22524 tree int_ftype_v16qi
22525 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22526 tree v16qi_ftype_pcchar
22527 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22528 tree void_ftype_pchar_v16qi
22529 = build_function_type_list (void_type_node,
22530 pchar_type_node, V16QI_type_node, NULL_TREE);
22532 tree v2di_ftype_v2di_unsigned_unsigned
22533 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22534 unsigned_type_node, unsigned_type_node,
22536 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22537 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22538 unsigned_type_node, unsigned_type_node,
22540 tree v2di_ftype_v2di_v16qi
22541 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22543 tree v2df_ftype_v2df_v2df_v2df
22544 = build_function_type_list (V2DF_type_node,
22545 V2DF_type_node, V2DF_type_node,
22546 V2DF_type_node, NULL_TREE);
22547 tree v4sf_ftype_v4sf_v4sf_v4sf
22548 = build_function_type_list (V4SF_type_node,
22549 V4SF_type_node, V4SF_type_node,
22550 V4SF_type_node, NULL_TREE);
22551 tree v8hi_ftype_v16qi
22552 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22554 tree v4si_ftype_v16qi
22555 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22557 tree v2di_ftype_v16qi
22558 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22560 tree v4si_ftype_v8hi
22561 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22563 tree v2di_ftype_v8hi
22564 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22566 tree v2di_ftype_v4si
22567 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22569 tree v2di_ftype_pv2di
22570 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22572 tree v16qi_ftype_v16qi_v16qi_int
22573 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22574 V16QI_type_node, integer_type_node,
22576 tree v16qi_ftype_v16qi_v16qi_v16qi
22577 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22578 V16QI_type_node, V16QI_type_node,
22580 tree v8hi_ftype_v8hi_v8hi_int
22581 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22582 V8HI_type_node, integer_type_node,
22584 tree v4si_ftype_v4si_v4si_int
22585 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22586 V4SI_type_node, integer_type_node,
22588 tree int_ftype_v2di_v2di
22589 = build_function_type_list (integer_type_node,
22590 V2DI_type_node, V2DI_type_node,
22592 tree int_ftype_v16qi_int_v16qi_int_int
22593 = build_function_type_list (integer_type_node,
22600 tree v16qi_ftype_v16qi_int_v16qi_int_int
22601 = build_function_type_list (V16QI_type_node,
22608 tree int_ftype_v16qi_v16qi_int
22609 = build_function_type_list (integer_type_node,
22615 /* SSE5 instructions */
22616 tree v2di_ftype_v2di_v2di_v2di
22617 = build_function_type_list (V2DI_type_node,
22623 tree v4si_ftype_v4si_v4si_v4si
22624 = build_function_type_list (V4SI_type_node,
22630 tree v4si_ftype_v4si_v4si_v2di
22631 = build_function_type_list (V4SI_type_node,
22637 tree v8hi_ftype_v8hi_v8hi_v8hi
22638 = build_function_type_list (V8HI_type_node,
22644 tree v8hi_ftype_v8hi_v8hi_v4si
22645 = build_function_type_list (V8HI_type_node,
22651 tree v2df_ftype_v2df_v2df_v16qi
22652 = build_function_type_list (V2DF_type_node,
22658 tree v4sf_ftype_v4sf_v4sf_v16qi
22659 = build_function_type_list (V4SF_type_node,
22665 tree v2di_ftype_v2di_si
22666 = build_function_type_list (V2DI_type_node,
22671 tree v4si_ftype_v4si_si
22672 = build_function_type_list (V4SI_type_node,
22677 tree v8hi_ftype_v8hi_si
22678 = build_function_type_list (V8HI_type_node,
22683 tree v16qi_ftype_v16qi_si
22684 = build_function_type_list (V16QI_type_node,
22688 tree v4sf_ftype_v4hi
22689 = build_function_type_list (V4SF_type_node,
22693 tree v4hi_ftype_v4sf
22694 = build_function_type_list (V4HI_type_node,
22698 tree v2di_ftype_v2di
22699 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22701 tree v16qi_ftype_v8hi_v8hi
22702 = build_function_type_list (V16QI_type_node,
22703 V8HI_type_node, V8HI_type_node,
22705 tree v8hi_ftype_v4si_v4si
22706 = build_function_type_list (V8HI_type_node,
22707 V4SI_type_node, V4SI_type_node,
22709 tree v8hi_ftype_v16qi_v16qi
22710 = build_function_type_list (V8HI_type_node,
22711 V16QI_type_node, V16QI_type_node,
22713 tree v4hi_ftype_v8qi_v8qi
22714 = build_function_type_list (V4HI_type_node,
22715 V8QI_type_node, V8QI_type_node,
22717 tree unsigned_ftype_unsigned_uchar
22718 = build_function_type_list (unsigned_type_node,
22719 unsigned_type_node,
22720 unsigned_char_type_node,
22722 tree unsigned_ftype_unsigned_ushort
22723 = build_function_type_list (unsigned_type_node,
22724 unsigned_type_node,
22725 short_unsigned_type_node,
22727 tree unsigned_ftype_unsigned_unsigned
22728 = build_function_type_list (unsigned_type_node,
22729 unsigned_type_node,
22730 unsigned_type_node,
22732 tree uint64_ftype_uint64_uint64
22733 = build_function_type_list (long_long_unsigned_type_node,
22734 long_long_unsigned_type_node,
22735 long_long_unsigned_type_node,
22737 tree float_ftype_float
22738 = build_function_type_list (float_type_node,
22743 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22745 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22747 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22749 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22751 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22753 tree v8sf_ftype_v8sf
22754 = build_function_type_list (V8SF_type_node,
22757 tree v8si_ftype_v8sf
22758 = build_function_type_list (V8SI_type_node,
22761 tree v8sf_ftype_v8si
22762 = build_function_type_list (V8SF_type_node,
22765 tree v4si_ftype_v4df
22766 = build_function_type_list (V4SI_type_node,
22769 tree v4df_ftype_v4df
22770 = build_function_type_list (V4DF_type_node,
22773 tree v4df_ftype_v4si
22774 = build_function_type_list (V4DF_type_node,
22777 tree v4df_ftype_v4sf
22778 = build_function_type_list (V4DF_type_node,
22781 tree v4sf_ftype_v4df
22782 = build_function_type_list (V4SF_type_node,
22785 tree v8sf_ftype_v8sf_v8sf
22786 = build_function_type_list (V8SF_type_node,
22787 V8SF_type_node, V8SF_type_node,
22789 tree v4df_ftype_v4df_v4df
22790 = build_function_type_list (V4DF_type_node,
22791 V4DF_type_node, V4DF_type_node,
22793 tree v8sf_ftype_v8sf_int
22794 = build_function_type_list (V8SF_type_node,
22795 V8SF_type_node, integer_type_node,
22797 tree v4si_ftype_v8si_int
22798 = build_function_type_list (V4SI_type_node,
22799 V8SI_type_node, integer_type_node,
22801 tree v4df_ftype_v4df_int
22802 = build_function_type_list (V4DF_type_node,
22803 V4DF_type_node, integer_type_node,
22805 tree v4sf_ftype_v8sf_int
22806 = build_function_type_list (V4SF_type_node,
22807 V8SF_type_node, integer_type_node,
22809 tree v2df_ftype_v4df_int
22810 = build_function_type_list (V2DF_type_node,
22811 V4DF_type_node, integer_type_node,
22813 tree v8sf_ftype_v8sf_v8sf_int
22814 = build_function_type_list (V8SF_type_node,
22815 V8SF_type_node, V8SF_type_node,
22818 tree v8sf_ftype_v8sf_v8sf_v8sf
22819 = build_function_type_list (V8SF_type_node,
22820 V8SF_type_node, V8SF_type_node,
22823 tree v4df_ftype_v4df_v4df_v4df
22824 = build_function_type_list (V4DF_type_node,
22825 V4DF_type_node, V4DF_type_node,
22828 tree v8si_ftype_v8si_v8si_int
22829 = build_function_type_list (V8SI_type_node,
22830 V8SI_type_node, V8SI_type_node,
22833 tree v4df_ftype_v4df_v4df_int
22834 = build_function_type_list (V4DF_type_node,
22835 V4DF_type_node, V4DF_type_node,
22838 tree v8sf_ftype_pcfloat
22839 = build_function_type_list (V8SF_type_node,
22842 tree v4df_ftype_pcdouble
22843 = build_function_type_list (V4DF_type_node,
22844 pcdouble_type_node,
22846 tree pcv4sf_type_node
22847 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22848 tree pcv2df_type_node
22849 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22850 tree v8sf_ftype_pcv4sf
22851 = build_function_type_list (V8SF_type_node,
22854 tree v4df_ftype_pcv2df
22855 = build_function_type_list (V4DF_type_node,
22858 tree v32qi_ftype_pcchar
22859 = build_function_type_list (V32QI_type_node,
22862 tree void_ftype_pchar_v32qi
22863 = build_function_type_list (void_type_node,
22864 pchar_type_node, V32QI_type_node,
22866 tree v8si_ftype_v8si_v4si_int
22867 = build_function_type_list (V8SI_type_node,
22868 V8SI_type_node, V4SI_type_node,
22871 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22872 tree void_ftype_pv4di_v4di
22873 = build_function_type_list (void_type_node,
22874 pv4di_type_node, V4DI_type_node,
22876 tree v8sf_ftype_v8sf_v4sf_int
22877 = build_function_type_list (V8SF_type_node,
22878 V8SF_type_node, V4SF_type_node,
22881 tree v4df_ftype_v4df_v2df_int
22882 = build_function_type_list (V4DF_type_node,
22883 V4DF_type_node, V2DF_type_node,
22886 tree void_ftype_pfloat_v8sf
22887 = build_function_type_list (void_type_node,
22888 pfloat_type_node, V8SF_type_node,
22890 tree void_ftype_pdouble_v4df
22891 = build_function_type_list (void_type_node,
22892 pdouble_type_node, V4DF_type_node,
22894 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22895 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22896 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22897 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22898 tree pcv8sf_type_node
22899 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22900 tree pcv4df_type_node
22901 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22902 tree v8sf_ftype_pcv8sf_v8sf
22903 = build_function_type_list (V8SF_type_node,
22904 pcv8sf_type_node, V8SF_type_node,
22906 tree v4df_ftype_pcv4df_v4df
22907 = build_function_type_list (V4DF_type_node,
22908 pcv4df_type_node, V4DF_type_node,
22910 tree v4sf_ftype_pcv4sf_v4sf
22911 = build_function_type_list (V4SF_type_node,
22912 pcv4sf_type_node, V4SF_type_node,
22914 tree v2df_ftype_pcv2df_v2df
22915 = build_function_type_list (V2DF_type_node,
22916 pcv2df_type_node, V2DF_type_node,
22918 tree void_ftype_pv8sf_v8sf_v8sf
22919 = build_function_type_list (void_type_node,
22920 pv8sf_type_node, V8SF_type_node,
22923 tree void_ftype_pv4df_v4df_v4df
22924 = build_function_type_list (void_type_node,
22925 pv4df_type_node, V4DF_type_node,
22928 tree void_ftype_pv4sf_v4sf_v4sf
22929 = build_function_type_list (void_type_node,
22930 pv4sf_type_node, V4SF_type_node,
22933 tree void_ftype_pv2df_v2df_v2df
22934 = build_function_type_list (void_type_node,
22935 pv2df_type_node, V2DF_type_node,
22938 tree v4df_ftype_v2df
22939 = build_function_type_list (V4DF_type_node,
22942 tree v8sf_ftype_v4sf
22943 = build_function_type_list (V8SF_type_node,
22946 tree v8si_ftype_v4si
22947 = build_function_type_list (V8SI_type_node,
22950 tree v2df_ftype_v4df
22951 = build_function_type_list (V2DF_type_node,
22954 tree v4sf_ftype_v8sf
22955 = build_function_type_list (V4SF_type_node,
22958 tree v4si_ftype_v8si
22959 = build_function_type_list (V4SI_type_node,
22962 tree int_ftype_v4df
22963 = build_function_type_list (integer_type_node,
22966 tree int_ftype_v8sf
22967 = build_function_type_list (integer_type_node,
22970 tree int_ftype_v8sf_v8sf
22971 = build_function_type_list (integer_type_node,
22972 V8SF_type_node, V8SF_type_node,
22974 tree int_ftype_v4di_v4di
22975 = build_function_type_list (integer_type_node,
22976 V4DI_type_node, V4DI_type_node,
22978 tree int_ftype_v4df_v4df
22979 = build_function_type_list (integer_type_node,
22980 V4DF_type_node, V4DF_type_node,
22982 tree v8sf_ftype_v8sf_v8si
22983 = build_function_type_list (V8SF_type_node,
22984 V8SF_type_node, V8SI_type_node,
22986 tree v4df_ftype_v4df_v4di
22987 = build_function_type_list (V4DF_type_node,
22988 V4DF_type_node, V4DI_type_node,
22990 tree v4sf_ftype_v4sf_v4si
22991 = build_function_type_list (V4SF_type_node,
22992 V4SF_type_node, V4SI_type_node, NULL_TREE);
22993 tree v2df_ftype_v2df_v2di
22994 = build_function_type_list (V2DF_type_node,
22995 V2DF_type_node, V2DI_type_node, NULL_TREE);
22999 /* Add all special builtins with variable number of operands. */
23000 for (i = 0, d = bdesc_special_args;
23001 i < ARRAY_SIZE (bdesc_special_args);
23009 switch ((enum ix86_special_builtin_type) d->flag)
23011 case VOID_FTYPE_VOID:
23012 type = void_ftype_void;
23014 case V32QI_FTYPE_PCCHAR:
23015 type = v32qi_ftype_pcchar;
23017 case V16QI_FTYPE_PCCHAR:
23018 type = v16qi_ftype_pcchar;
23020 case V8SF_FTYPE_PCV4SF:
23021 type = v8sf_ftype_pcv4sf;
23023 case V8SF_FTYPE_PCFLOAT:
23024 type = v8sf_ftype_pcfloat;
23026 case V4DF_FTYPE_PCV2DF:
23027 type = v4df_ftype_pcv2df;
23029 case V4DF_FTYPE_PCDOUBLE:
23030 type = v4df_ftype_pcdouble;
23032 case V4SF_FTYPE_PCFLOAT:
23033 type = v4sf_ftype_pcfloat;
23035 case V2DI_FTYPE_PV2DI:
23036 type = v2di_ftype_pv2di;
23038 case V2DF_FTYPE_PCDOUBLE:
23039 type = v2df_ftype_pcdouble;
23041 case V8SF_FTYPE_PCV8SF_V8SF:
23042 type = v8sf_ftype_pcv8sf_v8sf;
23044 case V4DF_FTYPE_PCV4DF_V4DF:
23045 type = v4df_ftype_pcv4df_v4df;
23047 case V4SF_FTYPE_V4SF_PCV2SF:
23048 type = v4sf_ftype_v4sf_pcv2sf;
23050 case V4SF_FTYPE_PCV4SF_V4SF:
23051 type = v4sf_ftype_pcv4sf_v4sf;
23053 case V2DF_FTYPE_V2DF_PCDOUBLE:
23054 type = v2df_ftype_v2df_pcdouble;
23056 case V2DF_FTYPE_PCV2DF_V2DF:
23057 type = v2df_ftype_pcv2df_v2df;
23059 case VOID_FTYPE_PV2SF_V4SF:
23060 type = void_ftype_pv2sf_v4sf;
23062 case VOID_FTYPE_PV4DI_V4DI:
23063 type = void_ftype_pv4di_v4di;
23065 case VOID_FTYPE_PV2DI_V2DI:
23066 type = void_ftype_pv2di_v2di;
23068 case VOID_FTYPE_PCHAR_V32QI:
23069 type = void_ftype_pchar_v32qi;
23071 case VOID_FTYPE_PCHAR_V16QI:
23072 type = void_ftype_pchar_v16qi;
23074 case VOID_FTYPE_PFLOAT_V8SF:
23075 type = void_ftype_pfloat_v8sf;
23077 case VOID_FTYPE_PFLOAT_V4SF:
23078 type = void_ftype_pfloat_v4sf;
23080 case VOID_FTYPE_PDOUBLE_V4DF:
23081 type = void_ftype_pdouble_v4df;
23083 case VOID_FTYPE_PDOUBLE_V2DF:
23084 type = void_ftype_pdouble_v2df;
23086 case VOID_FTYPE_PDI_DI:
23087 type = void_ftype_pdi_di;
23089 case VOID_FTYPE_PINT_INT:
23090 type = void_ftype_pint_int;
23092 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23093 type = void_ftype_pv8sf_v8sf_v8sf;
23095 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23096 type = void_ftype_pv4df_v4df_v4df;
23098 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23099 type = void_ftype_pv4sf_v4sf_v4sf;
23101 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23102 type = void_ftype_pv2df_v2df_v2df;
23105 gcc_unreachable ();
23108 def_builtin (d->mask, d->name, type, d->code);
23111 /* Add all builtins with variable number of operands. */
23112 for (i = 0, d = bdesc_args;
23113 i < ARRAY_SIZE (bdesc_args);
23121 switch ((enum ix86_builtin_type) d->flag)
23123 case FLOAT_FTYPE_FLOAT:
23124 type = float_ftype_float;
23126 case INT_FTYPE_V8SF_V8SF_PTEST:
23127 type = int_ftype_v8sf_v8sf;
23129 case INT_FTYPE_V4DI_V4DI_PTEST:
23130 type = int_ftype_v4di_v4di;
23132 case INT_FTYPE_V4DF_V4DF_PTEST:
23133 type = int_ftype_v4df_v4df;
23135 case INT_FTYPE_V4SF_V4SF_PTEST:
23136 type = int_ftype_v4sf_v4sf;
23138 case INT_FTYPE_V2DI_V2DI_PTEST:
23139 type = int_ftype_v2di_v2di;
23141 case INT_FTYPE_V2DF_V2DF_PTEST:
23142 type = int_ftype_v2df_v2df;
23144 case INT64_FTYPE_V4SF:
23145 type = int64_ftype_v4sf;
23147 case INT64_FTYPE_V2DF:
23148 type = int64_ftype_v2df;
23150 case INT_FTYPE_V16QI:
23151 type = int_ftype_v16qi;
23153 case INT_FTYPE_V8QI:
23154 type = int_ftype_v8qi;
23156 case INT_FTYPE_V8SF:
23157 type = int_ftype_v8sf;
23159 case INT_FTYPE_V4DF:
23160 type = int_ftype_v4df;
23162 case INT_FTYPE_V4SF:
23163 type = int_ftype_v4sf;
23165 case INT_FTYPE_V2DF:
23166 type = int_ftype_v2df;
23168 case V16QI_FTYPE_V16QI:
23169 type = v16qi_ftype_v16qi;
23171 case V8SI_FTYPE_V8SF:
23172 type = v8si_ftype_v8sf;
23174 case V8SI_FTYPE_V4SI:
23175 type = v8si_ftype_v4si;
23177 case V8HI_FTYPE_V8HI:
23178 type = v8hi_ftype_v8hi;
23180 case V8HI_FTYPE_V16QI:
23181 type = v8hi_ftype_v16qi;
23183 case V8QI_FTYPE_V8QI:
23184 type = v8qi_ftype_v8qi;
23186 case V8SF_FTYPE_V8SF:
23187 type = v8sf_ftype_v8sf;
23189 case V8SF_FTYPE_V8SI:
23190 type = v8sf_ftype_v8si;
23192 case V8SF_FTYPE_V4SF:
23193 type = v8sf_ftype_v4sf;
23195 case V4SI_FTYPE_V4DF:
23196 type = v4si_ftype_v4df;
23198 case V4SI_FTYPE_V4SI:
23199 type = v4si_ftype_v4si;
23201 case V4SI_FTYPE_V16QI:
23202 type = v4si_ftype_v16qi;
23204 case V4SI_FTYPE_V8SI:
23205 type = v4si_ftype_v8si;
23207 case V4SI_FTYPE_V8HI:
23208 type = v4si_ftype_v8hi;
23210 case V4SI_FTYPE_V4SF:
23211 type = v4si_ftype_v4sf;
23213 case V4SI_FTYPE_V2DF:
23214 type = v4si_ftype_v2df;
23216 case V4HI_FTYPE_V4HI:
23217 type = v4hi_ftype_v4hi;
23219 case V4DF_FTYPE_V4DF:
23220 type = v4df_ftype_v4df;
23222 case V4DF_FTYPE_V4SI:
23223 type = v4df_ftype_v4si;
23225 case V4DF_FTYPE_V4SF:
23226 type = v4df_ftype_v4sf;
23228 case V4DF_FTYPE_V2DF:
23229 type = v4df_ftype_v2df;
23231 case V4SF_FTYPE_V4SF:
23232 case V4SF_FTYPE_V4SF_VEC_MERGE:
23233 type = v4sf_ftype_v4sf;
23235 case V4SF_FTYPE_V8SF:
23236 type = v4sf_ftype_v8sf;
23238 case V4SF_FTYPE_V4SI:
23239 type = v4sf_ftype_v4si;
23241 case V4SF_FTYPE_V4DF:
23242 type = v4sf_ftype_v4df;
23244 case V4SF_FTYPE_V2DF:
23245 type = v4sf_ftype_v2df;
23247 case V2DI_FTYPE_V2DI:
23248 type = v2di_ftype_v2di;
23250 case V2DI_FTYPE_V16QI:
23251 type = v2di_ftype_v16qi;
23253 case V2DI_FTYPE_V8HI:
23254 type = v2di_ftype_v8hi;
23256 case V2DI_FTYPE_V4SI:
23257 type = v2di_ftype_v4si;
23259 case V2SI_FTYPE_V2SI:
23260 type = v2si_ftype_v2si;
23262 case V2SI_FTYPE_V4SF:
23263 type = v2si_ftype_v4sf;
23265 case V2SI_FTYPE_V2DF:
23266 type = v2si_ftype_v2df;
23268 case V2SI_FTYPE_V2SF:
23269 type = v2si_ftype_v2sf;
23271 case V2DF_FTYPE_V4DF:
23272 type = v2df_ftype_v4df;
23274 case V2DF_FTYPE_V4SF:
23275 type = v2df_ftype_v4sf;
23277 case V2DF_FTYPE_V2DF:
23278 case V2DF_FTYPE_V2DF_VEC_MERGE:
23279 type = v2df_ftype_v2df;
23281 case V2DF_FTYPE_V2SI:
23282 type = v2df_ftype_v2si;
23284 case V2DF_FTYPE_V4SI:
23285 type = v2df_ftype_v4si;
23287 case V2SF_FTYPE_V2SF:
23288 type = v2sf_ftype_v2sf;
23290 case V2SF_FTYPE_V2SI:
23291 type = v2sf_ftype_v2si;
23293 case V16QI_FTYPE_V16QI_V16QI:
23294 type = v16qi_ftype_v16qi_v16qi;
23296 case V16QI_FTYPE_V8HI_V8HI:
23297 type = v16qi_ftype_v8hi_v8hi;
23299 case V8QI_FTYPE_V8QI_V8QI:
23300 type = v8qi_ftype_v8qi_v8qi;
23302 case V8QI_FTYPE_V4HI_V4HI:
23303 type = v8qi_ftype_v4hi_v4hi;
23305 case V8HI_FTYPE_V8HI_V8HI:
23306 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23307 type = v8hi_ftype_v8hi_v8hi;
23309 case V8HI_FTYPE_V16QI_V16QI:
23310 type = v8hi_ftype_v16qi_v16qi;
23312 case V8HI_FTYPE_V4SI_V4SI:
23313 type = v8hi_ftype_v4si_v4si;
23315 case V8HI_FTYPE_V8HI_SI_COUNT:
23316 type = v8hi_ftype_v8hi_int;
23318 case V8SF_FTYPE_V8SF_V8SF:
23319 type = v8sf_ftype_v8sf_v8sf;
23321 case V8SF_FTYPE_V8SF_V8SI:
23322 type = v8sf_ftype_v8sf_v8si;
23324 case V4SI_FTYPE_V4SI_V4SI:
23325 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23326 type = v4si_ftype_v4si_v4si;
23328 case V4SI_FTYPE_V8HI_V8HI:
23329 type = v4si_ftype_v8hi_v8hi;
23331 case V4SI_FTYPE_V4SF_V4SF:
23332 type = v4si_ftype_v4sf_v4sf;
23334 case V4SI_FTYPE_V2DF_V2DF:
23335 type = v4si_ftype_v2df_v2df;
23337 case V4SI_FTYPE_V4SI_SI_COUNT:
23338 type = v4si_ftype_v4si_int;
23340 case V4HI_FTYPE_V4HI_V4HI:
23341 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23342 type = v4hi_ftype_v4hi_v4hi;
23344 case V4HI_FTYPE_V8QI_V8QI:
23345 type = v4hi_ftype_v8qi_v8qi;
23347 case V4HI_FTYPE_V2SI_V2SI:
23348 type = v4hi_ftype_v2si_v2si;
23350 case V4HI_FTYPE_V4HI_SI_COUNT:
23351 type = v4hi_ftype_v4hi_int;
23353 case V4DF_FTYPE_V4DF_V4DF:
23354 type = v4df_ftype_v4df_v4df;
23356 case V4DF_FTYPE_V4DF_V4DI:
23357 type = v4df_ftype_v4df_v4di;
23359 case V4SF_FTYPE_V4SF_V4SF:
23360 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23361 type = v4sf_ftype_v4sf_v4sf;
23363 case V4SF_FTYPE_V4SF_V4SI:
23364 type = v4sf_ftype_v4sf_v4si;
23366 case V4SF_FTYPE_V4SF_V2SI:
23367 type = v4sf_ftype_v4sf_v2si;
23369 case V4SF_FTYPE_V4SF_V2DF:
23370 type = v4sf_ftype_v4sf_v2df;
23372 case V4SF_FTYPE_V4SF_DI:
23373 type = v4sf_ftype_v4sf_int64;
23375 case V4SF_FTYPE_V4SF_SI:
23376 type = v4sf_ftype_v4sf_int;
23378 case V2DI_FTYPE_V2DI_V2DI:
23379 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23380 type = v2di_ftype_v2di_v2di;
23382 case V2DI_FTYPE_V16QI_V16QI:
23383 type = v2di_ftype_v16qi_v16qi;
23385 case V2DI_FTYPE_V4SI_V4SI:
23386 type = v2di_ftype_v4si_v4si;
23388 case V2DI_FTYPE_V2DI_V16QI:
23389 type = v2di_ftype_v2di_v16qi;
23391 case V2DI_FTYPE_V2DF_V2DF:
23392 type = v2di_ftype_v2df_v2df;
23394 case V2DI_FTYPE_V2DI_SI_COUNT:
23395 type = v2di_ftype_v2di_int;
23397 case V2SI_FTYPE_V2SI_V2SI:
23398 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23399 type = v2si_ftype_v2si_v2si;
23401 case V2SI_FTYPE_V4HI_V4HI:
23402 type = v2si_ftype_v4hi_v4hi;
23404 case V2SI_FTYPE_V2SF_V2SF:
23405 type = v2si_ftype_v2sf_v2sf;
23407 case V2SI_FTYPE_V2SI_SI_COUNT:
23408 type = v2si_ftype_v2si_int;
23410 case V2DF_FTYPE_V2DF_V2DF:
23411 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23412 type = v2df_ftype_v2df_v2df;
23414 case V2DF_FTYPE_V2DF_V4SF:
23415 type = v2df_ftype_v2df_v4sf;
23417 case V2DF_FTYPE_V2DF_V2DI:
23418 type = v2df_ftype_v2df_v2di;
23420 case V2DF_FTYPE_V2DF_DI:
23421 type = v2df_ftype_v2df_int64;
23423 case V2DF_FTYPE_V2DF_SI:
23424 type = v2df_ftype_v2df_int;
23426 case V2SF_FTYPE_V2SF_V2SF:
23427 type = v2sf_ftype_v2sf_v2sf;
23429 case V1DI_FTYPE_V1DI_V1DI:
23430 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23431 type = v1di_ftype_v1di_v1di;
23433 case V1DI_FTYPE_V8QI_V8QI:
23434 type = v1di_ftype_v8qi_v8qi;
23436 case V1DI_FTYPE_V2SI_V2SI:
23437 type = v1di_ftype_v2si_v2si;
23439 case V1DI_FTYPE_V1DI_SI_COUNT:
23440 type = v1di_ftype_v1di_int;
23442 case UINT64_FTYPE_UINT64_UINT64:
23443 type = uint64_ftype_uint64_uint64;
23445 case UINT_FTYPE_UINT_UINT:
23446 type = unsigned_ftype_unsigned_unsigned;
23448 case UINT_FTYPE_UINT_USHORT:
23449 type = unsigned_ftype_unsigned_ushort;
23451 case UINT_FTYPE_UINT_UCHAR:
23452 type = unsigned_ftype_unsigned_uchar;
23454 case V8HI_FTYPE_V8HI_INT:
23455 type = v8hi_ftype_v8hi_int;
23457 case V8SF_FTYPE_V8SF_INT:
23458 type = v8sf_ftype_v8sf_int;
23460 case V4SI_FTYPE_V4SI_INT:
23461 type = v4si_ftype_v4si_int;
23463 case V4SI_FTYPE_V8SI_INT:
23464 type = v4si_ftype_v8si_int;
23466 case V4HI_FTYPE_V4HI_INT:
23467 type = v4hi_ftype_v4hi_int;
23469 case V4DF_FTYPE_V4DF_INT:
23470 type = v4df_ftype_v4df_int;
23472 case V4SF_FTYPE_V4SF_INT:
23473 type = v4sf_ftype_v4sf_int;
23475 case V4SF_FTYPE_V8SF_INT:
23476 type = v4sf_ftype_v8sf_int;
23478 case V2DI_FTYPE_V2DI_INT:
23479 case V2DI2TI_FTYPE_V2DI_INT:
23480 type = v2di_ftype_v2di_int;
23482 case V2DF_FTYPE_V2DF_INT:
23483 type = v2df_ftype_v2df_int;
23485 case V2DF_FTYPE_V4DF_INT:
23486 type = v2df_ftype_v4df_int;
23488 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23489 type = v16qi_ftype_v16qi_v16qi_v16qi;
23491 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23492 type = v8sf_ftype_v8sf_v8sf_v8sf;
23494 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23495 type = v4df_ftype_v4df_v4df_v4df;
23497 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23498 type = v4sf_ftype_v4sf_v4sf_v4sf;
23500 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23501 type = v2df_ftype_v2df_v2df_v2df;
23503 case V16QI_FTYPE_V16QI_V16QI_INT:
23504 type = v16qi_ftype_v16qi_v16qi_int;
23506 case V8SI_FTYPE_V8SI_V8SI_INT:
23507 type = v8si_ftype_v8si_v8si_int;
23509 case V8SI_FTYPE_V8SI_V4SI_INT:
23510 type = v8si_ftype_v8si_v4si_int;
23512 case V8HI_FTYPE_V8HI_V8HI_INT:
23513 type = v8hi_ftype_v8hi_v8hi_int;
23515 case V8SF_FTYPE_V8SF_V8SF_INT:
23516 type = v8sf_ftype_v8sf_v8sf_int;
23518 case V8SF_FTYPE_V8SF_V4SF_INT:
23519 type = v8sf_ftype_v8sf_v4sf_int;
23521 case V4SI_FTYPE_V4SI_V4SI_INT:
23522 type = v4si_ftype_v4si_v4si_int;
23524 case V4DF_FTYPE_V4DF_V4DF_INT:
23525 type = v4df_ftype_v4df_v4df_int;
23527 case V4DF_FTYPE_V4DF_V2DF_INT:
23528 type = v4df_ftype_v4df_v2df_int;
23530 case V4SF_FTYPE_V4SF_V4SF_INT:
23531 type = v4sf_ftype_v4sf_v4sf_int;
23533 case V2DI_FTYPE_V2DI_V2DI_INT:
23534 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23535 type = v2di_ftype_v2di_v2di_int;
23537 case V2DF_FTYPE_V2DF_V2DF_INT:
23538 type = v2df_ftype_v2df_v2df_int;
23540 case V2DI_FTYPE_V2DI_UINT_UINT:
23541 type = v2di_ftype_v2di_unsigned_unsigned;
23543 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23544 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23546 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23547 type = v1di_ftype_v1di_v1di_int;
23550 gcc_unreachable ();
23553 def_builtin_const (d->mask, d->name, type, d->code);
23556 /* pcmpestr[im] insns. */
23557 for (i = 0, d = bdesc_pcmpestr;
23558 i < ARRAY_SIZE (bdesc_pcmpestr);
23561 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23562 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23564 ftype = int_ftype_v16qi_int_v16qi_int_int;
23565 def_builtin_const (d->mask, d->name, ftype, d->code);
23568 /* pcmpistr[im] insns. */
23569 for (i = 0, d = bdesc_pcmpistr;
23570 i < ARRAY_SIZE (bdesc_pcmpistr);
23573 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23574 ftype = v16qi_ftype_v16qi_v16qi_int;
23576 ftype = int_ftype_v16qi_v16qi_int;
23577 def_builtin_const (d->mask, d->name, ftype, d->code);
23580 /* comi/ucomi insns. */
23581 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23582 if (d->mask == OPTION_MASK_ISA_SSE2)
23583 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23585 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23588 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23589 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23591 /* SSE or 3DNow!A */
23592 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23595 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23597 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23598 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23601 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23602 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23605 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23606 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23607 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23609 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23613 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23616 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23617 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23619 /* Access to the vec_init patterns. */
23620 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23621 integer_type_node, NULL_TREE);
23622 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23624 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23625 short_integer_type_node,
23626 short_integer_type_node,
23627 short_integer_type_node, NULL_TREE);
23628 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23630 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23631 char_type_node, char_type_node,
23632 char_type_node, char_type_node,
23633 char_type_node, char_type_node,
23634 char_type_node, NULL_TREE);
23635 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23637 /* Access to the vec_extract patterns. */
23638 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23639 integer_type_node, NULL_TREE);
23640 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23642 ftype = build_function_type_list (long_long_integer_type_node,
23643 V2DI_type_node, integer_type_node,
23645 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23647 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23648 integer_type_node, NULL_TREE);
23649 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23651 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23652 integer_type_node, NULL_TREE);
23653 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23655 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23656 integer_type_node, NULL_TREE);
23657 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23659 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23660 integer_type_node, NULL_TREE);
23661 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23663 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23664 integer_type_node, NULL_TREE);
23665 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23667 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23668 integer_type_node, NULL_TREE);
23669 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23671 /* Access to the vec_set patterns. */
23672 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23674 integer_type_node, NULL_TREE);
23675 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23677 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23679 integer_type_node, NULL_TREE);
23680 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23682 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23684 integer_type_node, NULL_TREE);
23685 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23687 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23689 integer_type_node, NULL_TREE);
23690 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23692 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23694 integer_type_node, NULL_TREE);
23695 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23697 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23699 integer_type_node, NULL_TREE);
23700 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23702 /* Add SSE5 multi-arg argument instructions */
23703 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23705 tree mtype = NULL_TREE;
23710 switch ((enum multi_arg_type)d->flag)
23712 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23713 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23714 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23715 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23716 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23717 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23718 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23719 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23720 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23721 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23722 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23723 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23724 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23725 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23726 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23727 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23728 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23729 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23730 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23731 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23732 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23733 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23734 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23735 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23736 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23737 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23738 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23739 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23740 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23741 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23742 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23743 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23744 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23745 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23746 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23747 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23748 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23749 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23750 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23751 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23752 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23753 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23754 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23755 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23756 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23757 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23758 case MULTI_ARG_UNKNOWN:
23760 gcc_unreachable ();
23764 def_builtin_const (d->mask, d->name, mtype, d->code);
23768 /* Internal method for ix86_init_builtins. */
23771 ix86_init_builtins_va_builtins_abi (void)
23773 tree ms_va_ref, sysv_va_ref;
23774 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23775 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23776 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23777 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23781 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23782 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23783 ms_va_ref = build_reference_type (ms_va_list_type_node);
23785 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23788 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23789 fnvoid_va_start_ms =
23790 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23791 fnvoid_va_end_sysv =
23792 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23793 fnvoid_va_start_sysv =
23794 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23796 fnvoid_va_copy_ms =
23797 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23799 fnvoid_va_copy_sysv =
23800 build_function_type_list (void_type_node, sysv_va_ref,
23801 sysv_va_ref, NULL_TREE);
23803 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23804 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23805 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23806 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23807 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23808 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23809 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23810 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23811 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23812 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23813 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23814 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23818 ix86_init_builtins (void)
23820 tree float128_type_node = make_node (REAL_TYPE);
23823 /* The __float80 type. */
23824 if (TYPE_MODE (long_double_type_node) == XFmode)
23825 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23829 /* The __float80 type. */
23830 tree float80_type_node = make_node (REAL_TYPE);
23832 TYPE_PRECISION (float80_type_node) = 80;
23833 layout_type (float80_type_node);
23834 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23838 /* The __float128 type. */
23839 TYPE_PRECISION (float128_type_node) = 128;
23840 layout_type (float128_type_node);
23841 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23844 /* TFmode support builtins. */
23845 ftype = build_function_type (float128_type_node, void_list_node);
23846 decl = add_builtin_function ("__builtin_infq", ftype,
23847 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23849 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23851 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23852 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23854 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23856 /* We will expand them to normal call if SSE2 isn't available since
23857 they are used by libgcc. */
23858 ftype = build_function_type_list (float128_type_node,
23859 float128_type_node,
23861 decl = add_builtin_function ("__builtin_fabsq", ftype,
23862 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23863 "__fabstf2", NULL_TREE);
23864 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23865 TREE_READONLY (decl) = 1;
23867 ftype = build_function_type_list (float128_type_node,
23868 float128_type_node,
23869 float128_type_node,
23871 decl = add_builtin_function ("__builtin_copysignq", ftype,
23872 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23873 "__copysigntf3", NULL_TREE);
23874 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23875 TREE_READONLY (decl) = 1;
23877 ix86_init_mmx_sse_builtins ();
23879 ix86_init_builtins_va_builtins_abi ();
23882 /* Errors in the source file can cause expand_expr to return const0_rtx
23883 where we expect a vector. To avoid crashing, use one of the vector
23884 clear instructions. */
23886 safe_vector_operand (rtx x, enum machine_mode mode)
23888 if (x == const0_rtx)
23889 x = CONST0_RTX (mode);
23893 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23896 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23899 tree arg0 = CALL_EXPR_ARG (exp, 0);
23900 tree arg1 = CALL_EXPR_ARG (exp, 1);
23901 rtx op0 = expand_normal (arg0);
23902 rtx op1 = expand_normal (arg1);
23903 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23904 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23905 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23907 if (VECTOR_MODE_P (mode0))
23908 op0 = safe_vector_operand (op0, mode0);
23909 if (VECTOR_MODE_P (mode1))
23910 op1 = safe_vector_operand (op1, mode1);
23912 if (optimize || !target
23913 || GET_MODE (target) != tmode
23914 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23915 target = gen_reg_rtx (tmode);
23917 if (GET_MODE (op1) == SImode && mode1 == TImode)
23919 rtx x = gen_reg_rtx (V4SImode);
23920 emit_insn (gen_sse2_loadd (x, op1));
23921 op1 = gen_lowpart (TImode, x);
23924 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23925 op0 = copy_to_mode_reg (mode0, op0);
23926 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23927 op1 = copy_to_mode_reg (mode1, op1);
23929 pat = GEN_FCN (icode) (target, op0, op1);
23938 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23941 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23942 enum multi_arg_type m_type,
23943 enum rtx_code sub_code)
23948 bool comparison_p = false;
23950 bool last_arg_constant = false;
23951 int num_memory = 0;
23954 enum machine_mode mode;
23957 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23961 case MULTI_ARG_3_SF:
23962 case MULTI_ARG_3_DF:
23963 case MULTI_ARG_3_DI:
23964 case MULTI_ARG_3_SI:
23965 case MULTI_ARG_3_SI_DI:
23966 case MULTI_ARG_3_HI:
23967 case MULTI_ARG_3_HI_SI:
23968 case MULTI_ARG_3_QI:
23969 case MULTI_ARG_3_PERMPS:
23970 case MULTI_ARG_3_PERMPD:
23974 case MULTI_ARG_2_SF:
23975 case MULTI_ARG_2_DF:
23976 case MULTI_ARG_2_DI:
23977 case MULTI_ARG_2_SI:
23978 case MULTI_ARG_2_HI:
23979 case MULTI_ARG_2_QI:
23983 case MULTI_ARG_2_DI_IMM:
23984 case MULTI_ARG_2_SI_IMM:
23985 case MULTI_ARG_2_HI_IMM:
23986 case MULTI_ARG_2_QI_IMM:
23988 last_arg_constant = true;
23991 case MULTI_ARG_1_SF:
23992 case MULTI_ARG_1_DF:
23993 case MULTI_ARG_1_DI:
23994 case MULTI_ARG_1_SI:
23995 case MULTI_ARG_1_HI:
23996 case MULTI_ARG_1_QI:
23997 case MULTI_ARG_1_SI_DI:
23998 case MULTI_ARG_1_HI_DI:
23999 case MULTI_ARG_1_HI_SI:
24000 case MULTI_ARG_1_QI_DI:
24001 case MULTI_ARG_1_QI_SI:
24002 case MULTI_ARG_1_QI_HI:
24003 case MULTI_ARG_1_PH2PS:
24004 case MULTI_ARG_1_PS2PH:
24008 case MULTI_ARG_2_SF_CMP:
24009 case MULTI_ARG_2_DF_CMP:
24010 case MULTI_ARG_2_DI_CMP:
24011 case MULTI_ARG_2_SI_CMP:
24012 case MULTI_ARG_2_HI_CMP:
24013 case MULTI_ARG_2_QI_CMP:
24015 comparison_p = true;
24018 case MULTI_ARG_2_SF_TF:
24019 case MULTI_ARG_2_DF_TF:
24020 case MULTI_ARG_2_DI_TF:
24021 case MULTI_ARG_2_SI_TF:
24022 case MULTI_ARG_2_HI_TF:
24023 case MULTI_ARG_2_QI_TF:
24028 case MULTI_ARG_UNKNOWN:
24030 gcc_unreachable ();
24033 if (optimize || !target
24034 || GET_MODE (target) != tmode
24035 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24036 target = gen_reg_rtx (tmode);
24038 gcc_assert (nargs <= 4);
24040 for (i = 0; i < nargs; i++)
24042 tree arg = CALL_EXPR_ARG (exp, i);
24043 rtx op = expand_normal (arg);
24044 int adjust = (comparison_p) ? 1 : 0;
24045 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24047 if (last_arg_constant && i == nargs-1)
24049 if (GET_CODE (op) != CONST_INT)
24051 error ("last argument must be an immediate");
24052 return gen_reg_rtx (tmode);
24057 if (VECTOR_MODE_P (mode))
24058 op = safe_vector_operand (op, mode);
24060 /* If we aren't optimizing, only allow one memory operand to be
24062 if (memory_operand (op, mode))
24065 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24068 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24070 op = force_reg (mode, op);
24074 args[i].mode = mode;
24080 pat = GEN_FCN (icode) (target, args[0].op);
24085 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24086 GEN_INT ((int)sub_code));
24087 else if (! comparison_p)
24088 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24091 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24095 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24100 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24104 gcc_unreachable ();
24114 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24115 insns with vec_merge. */
24118 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24122 tree arg0 = CALL_EXPR_ARG (exp, 0);
24123 rtx op1, op0 = expand_normal (arg0);
24124 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24125 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24127 if (optimize || !target
24128 || GET_MODE (target) != tmode
24129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24130 target = gen_reg_rtx (tmode);
24132 if (VECTOR_MODE_P (mode0))
24133 op0 = safe_vector_operand (op0, mode0);
24135 if ((optimize && !register_operand (op0, mode0))
24136 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24137 op0 = copy_to_mode_reg (mode0, op0);
24140 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24141 op1 = copy_to_mode_reg (mode0, op1);
24143 pat = GEN_FCN (icode) (target, op0, op1);
24150 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24153 ix86_expand_sse_compare (const struct builtin_description *d,
24154 tree exp, rtx target, bool swap)
24157 tree arg0 = CALL_EXPR_ARG (exp, 0);
24158 tree arg1 = CALL_EXPR_ARG (exp, 1);
24159 rtx op0 = expand_normal (arg0);
24160 rtx op1 = expand_normal (arg1);
24162 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24163 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24164 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24165 enum rtx_code comparison = d->comparison;
24167 if (VECTOR_MODE_P (mode0))
24168 op0 = safe_vector_operand (op0, mode0);
24169 if (VECTOR_MODE_P (mode1))
24170 op1 = safe_vector_operand (op1, mode1);
24172 /* Swap operands if we have a comparison that isn't available in
24176 rtx tmp = gen_reg_rtx (mode1);
24177 emit_move_insn (tmp, op1);
24182 if (optimize || !target
24183 || GET_MODE (target) != tmode
24184 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24185 target = gen_reg_rtx (tmode);
24187 if ((optimize && !register_operand (op0, mode0))
24188 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24189 op0 = copy_to_mode_reg (mode0, op0);
24190 if ((optimize && !register_operand (op1, mode1))
24191 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24192 op1 = copy_to_mode_reg (mode1, op1);
24194 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24195 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24202 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24205 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24209 tree arg0 = CALL_EXPR_ARG (exp, 0);
24210 tree arg1 = CALL_EXPR_ARG (exp, 1);
24211 rtx op0 = expand_normal (arg0);
24212 rtx op1 = expand_normal (arg1);
24213 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24214 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24215 enum rtx_code comparison = d->comparison;
24217 if (VECTOR_MODE_P (mode0))
24218 op0 = safe_vector_operand (op0, mode0);
24219 if (VECTOR_MODE_P (mode1))
24220 op1 = safe_vector_operand (op1, mode1);
24222 /* Swap operands if we have a comparison that isn't available in
24224 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24231 target = gen_reg_rtx (SImode);
24232 emit_move_insn (target, const0_rtx);
24233 target = gen_rtx_SUBREG (QImode, target, 0);
24235 if ((optimize && !register_operand (op0, mode0))
24236 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24237 op0 = copy_to_mode_reg (mode0, op0);
24238 if ((optimize && !register_operand (op1, mode1))
24239 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24240 op1 = copy_to_mode_reg (mode1, op1);
24242 pat = GEN_FCN (d->icode) (op0, op1);
24246 emit_insn (gen_rtx_SET (VOIDmode,
24247 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24248 gen_rtx_fmt_ee (comparison, QImode,
24252 return SUBREG_REG (target);
24255 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24258 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24262 tree arg0 = CALL_EXPR_ARG (exp, 0);
24263 tree arg1 = CALL_EXPR_ARG (exp, 1);
24264 rtx op0 = expand_normal (arg0);
24265 rtx op1 = expand_normal (arg1);
24266 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24267 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24268 enum rtx_code comparison = d->comparison;
24270 if (VECTOR_MODE_P (mode0))
24271 op0 = safe_vector_operand (op0, mode0);
24272 if (VECTOR_MODE_P (mode1))
24273 op1 = safe_vector_operand (op1, mode1);
24275 target = gen_reg_rtx (SImode);
24276 emit_move_insn (target, const0_rtx);
24277 target = gen_rtx_SUBREG (QImode, target, 0);
24279 if ((optimize && !register_operand (op0, mode0))
24280 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24281 op0 = copy_to_mode_reg (mode0, op0);
24282 if ((optimize && !register_operand (op1, mode1))
24283 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24284 op1 = copy_to_mode_reg (mode1, op1);
24286 pat = GEN_FCN (d->icode) (op0, op1);
24290 emit_insn (gen_rtx_SET (VOIDmode,
24291 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24292 gen_rtx_fmt_ee (comparison, QImode,
24296 return SUBREG_REG (target);
24299 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24302 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24303 tree exp, rtx target)
24306 tree arg0 = CALL_EXPR_ARG (exp, 0);
24307 tree arg1 = CALL_EXPR_ARG (exp, 1);
24308 tree arg2 = CALL_EXPR_ARG (exp, 2);
24309 tree arg3 = CALL_EXPR_ARG (exp, 3);
24310 tree arg4 = CALL_EXPR_ARG (exp, 4);
24311 rtx scratch0, scratch1;
24312 rtx op0 = expand_normal (arg0);
24313 rtx op1 = expand_normal (arg1);
24314 rtx op2 = expand_normal (arg2);
24315 rtx op3 = expand_normal (arg3);
24316 rtx op4 = expand_normal (arg4);
24317 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24319 tmode0 = insn_data[d->icode].operand[0].mode;
24320 tmode1 = insn_data[d->icode].operand[1].mode;
24321 modev2 = insn_data[d->icode].operand[2].mode;
24322 modei3 = insn_data[d->icode].operand[3].mode;
24323 modev4 = insn_data[d->icode].operand[4].mode;
24324 modei5 = insn_data[d->icode].operand[5].mode;
24325 modeimm = insn_data[d->icode].operand[6].mode;
24327 if (VECTOR_MODE_P (modev2))
24328 op0 = safe_vector_operand (op0, modev2);
24329 if (VECTOR_MODE_P (modev4))
24330 op2 = safe_vector_operand (op2, modev4);
24332 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24333 op0 = copy_to_mode_reg (modev2, op0);
24334 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24335 op1 = copy_to_mode_reg (modei3, op1);
24336 if ((optimize && !register_operand (op2, modev4))
24337 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24338 op2 = copy_to_mode_reg (modev4, op2);
24339 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24340 op3 = copy_to_mode_reg (modei5, op3);
24342 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24344 error ("the fifth argument must be a 8-bit immediate");
24348 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24350 if (optimize || !target
24351 || GET_MODE (target) != tmode0
24352 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24353 target = gen_reg_rtx (tmode0);
24355 scratch1 = gen_reg_rtx (tmode1);
24357 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24359 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24361 if (optimize || !target
24362 || GET_MODE (target) != tmode1
24363 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24364 target = gen_reg_rtx (tmode1);
24366 scratch0 = gen_reg_rtx (tmode0);
24368 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24372 gcc_assert (d->flag);
24374 scratch0 = gen_reg_rtx (tmode0);
24375 scratch1 = gen_reg_rtx (tmode1);
24377 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24387 target = gen_reg_rtx (SImode);
24388 emit_move_insn (target, const0_rtx);
24389 target = gen_rtx_SUBREG (QImode, target, 0);
24392 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24393 gen_rtx_fmt_ee (EQ, QImode,
24394 gen_rtx_REG ((enum machine_mode) d->flag,
24397 return SUBREG_REG (target);
24404 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24407 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24408 tree exp, rtx target)
24411 tree arg0 = CALL_EXPR_ARG (exp, 0);
24412 tree arg1 = CALL_EXPR_ARG (exp, 1);
24413 tree arg2 = CALL_EXPR_ARG (exp, 2);
24414 rtx scratch0, scratch1;
24415 rtx op0 = expand_normal (arg0);
24416 rtx op1 = expand_normal (arg1);
24417 rtx op2 = expand_normal (arg2);
24418 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24420 tmode0 = insn_data[d->icode].operand[0].mode;
24421 tmode1 = insn_data[d->icode].operand[1].mode;
24422 modev2 = insn_data[d->icode].operand[2].mode;
24423 modev3 = insn_data[d->icode].operand[3].mode;
24424 modeimm = insn_data[d->icode].operand[4].mode;
24426 if (VECTOR_MODE_P (modev2))
24427 op0 = safe_vector_operand (op0, modev2);
24428 if (VECTOR_MODE_P (modev3))
24429 op1 = safe_vector_operand (op1, modev3);
24431 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24432 op0 = copy_to_mode_reg (modev2, op0);
24433 if ((optimize && !register_operand (op1, modev3))
24434 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24435 op1 = copy_to_mode_reg (modev3, op1);
24437 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24439 error ("the third argument must be a 8-bit immediate");
24443 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24445 if (optimize || !target
24446 || GET_MODE (target) != tmode0
24447 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24448 target = gen_reg_rtx (tmode0);
24450 scratch1 = gen_reg_rtx (tmode1);
24452 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24454 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24456 if (optimize || !target
24457 || GET_MODE (target) != tmode1
24458 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24459 target = gen_reg_rtx (tmode1);
24461 scratch0 = gen_reg_rtx (tmode0);
24463 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24467 gcc_assert (d->flag);
24469 scratch0 = gen_reg_rtx (tmode0);
24470 scratch1 = gen_reg_rtx (tmode1);
24472 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24482 target = gen_reg_rtx (SImode);
24483 emit_move_insn (target, const0_rtx);
24484 target = gen_rtx_SUBREG (QImode, target, 0);
24487 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24488 gen_rtx_fmt_ee (EQ, QImode,
24489 gen_rtx_REG ((enum machine_mode) d->flag,
24492 return SUBREG_REG (target);
24498 /* Subroutine of ix86_expand_builtin to take care of insns with
24499 variable number of operands. */
24502 ix86_expand_args_builtin (const struct builtin_description *d,
24503 tree exp, rtx target)
24505 rtx pat, real_target;
24506 unsigned int i, nargs;
24507 unsigned int nargs_constant = 0;
24508 int num_memory = 0;
24512 enum machine_mode mode;
24514 bool last_arg_count = false;
24515 enum insn_code icode = d->icode;
24516 const struct insn_data *insn_p = &insn_data[icode];
24517 enum machine_mode tmode = insn_p->operand[0].mode;
24518 enum machine_mode rmode = VOIDmode;
24520 enum rtx_code comparison = d->comparison;
24522 switch ((enum ix86_builtin_type) d->flag)
24524 case INT_FTYPE_V8SF_V8SF_PTEST:
24525 case INT_FTYPE_V4DI_V4DI_PTEST:
24526 case INT_FTYPE_V4DF_V4DF_PTEST:
24527 case INT_FTYPE_V4SF_V4SF_PTEST:
24528 case INT_FTYPE_V2DI_V2DI_PTEST:
24529 case INT_FTYPE_V2DF_V2DF_PTEST:
24530 return ix86_expand_sse_ptest (d, exp, target);
24531 case FLOAT128_FTYPE_FLOAT128:
24532 case FLOAT_FTYPE_FLOAT:
24533 case INT64_FTYPE_V4SF:
24534 case INT64_FTYPE_V2DF:
24535 case INT_FTYPE_V16QI:
24536 case INT_FTYPE_V8QI:
24537 case INT_FTYPE_V8SF:
24538 case INT_FTYPE_V4DF:
24539 case INT_FTYPE_V4SF:
24540 case INT_FTYPE_V2DF:
24541 case V16QI_FTYPE_V16QI:
24542 case V8SI_FTYPE_V8SF:
24543 case V8SI_FTYPE_V4SI:
24544 case V8HI_FTYPE_V8HI:
24545 case V8HI_FTYPE_V16QI:
24546 case V8QI_FTYPE_V8QI:
24547 case V8SF_FTYPE_V8SF:
24548 case V8SF_FTYPE_V8SI:
24549 case V8SF_FTYPE_V4SF:
24550 case V4SI_FTYPE_V4SI:
24551 case V4SI_FTYPE_V16QI:
24552 case V4SI_FTYPE_V4SF:
24553 case V4SI_FTYPE_V8SI:
24554 case V4SI_FTYPE_V8HI:
24555 case V4SI_FTYPE_V4DF:
24556 case V4SI_FTYPE_V2DF:
24557 case V4HI_FTYPE_V4HI:
24558 case V4DF_FTYPE_V4DF:
24559 case V4DF_FTYPE_V4SI:
24560 case V4DF_FTYPE_V4SF:
24561 case V4DF_FTYPE_V2DF:
24562 case V4SF_FTYPE_V4SF:
24563 case V4SF_FTYPE_V4SI:
24564 case V4SF_FTYPE_V8SF:
24565 case V4SF_FTYPE_V4DF:
24566 case V4SF_FTYPE_V2DF:
24567 case V2DI_FTYPE_V2DI:
24568 case V2DI_FTYPE_V16QI:
24569 case V2DI_FTYPE_V8HI:
24570 case V2DI_FTYPE_V4SI:
24571 case V2DF_FTYPE_V2DF:
24572 case V2DF_FTYPE_V4SI:
24573 case V2DF_FTYPE_V4DF:
24574 case V2DF_FTYPE_V4SF:
24575 case V2DF_FTYPE_V2SI:
24576 case V2SI_FTYPE_V2SI:
24577 case V2SI_FTYPE_V4SF:
24578 case V2SI_FTYPE_V2SF:
24579 case V2SI_FTYPE_V2DF:
24580 case V2SF_FTYPE_V2SF:
24581 case V2SF_FTYPE_V2SI:
24584 case V4SF_FTYPE_V4SF_VEC_MERGE:
24585 case V2DF_FTYPE_V2DF_VEC_MERGE:
24586 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24587 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24588 case V16QI_FTYPE_V16QI_V16QI:
24589 case V16QI_FTYPE_V8HI_V8HI:
24590 case V8QI_FTYPE_V8QI_V8QI:
24591 case V8QI_FTYPE_V4HI_V4HI:
24592 case V8HI_FTYPE_V8HI_V8HI:
24593 case V8HI_FTYPE_V16QI_V16QI:
24594 case V8HI_FTYPE_V4SI_V4SI:
24595 case V8SF_FTYPE_V8SF_V8SF:
24596 case V8SF_FTYPE_V8SF_V8SI:
24597 case V4SI_FTYPE_V4SI_V4SI:
24598 case V4SI_FTYPE_V8HI_V8HI:
24599 case V4SI_FTYPE_V4SF_V4SF:
24600 case V4SI_FTYPE_V2DF_V2DF:
24601 case V4HI_FTYPE_V4HI_V4HI:
24602 case V4HI_FTYPE_V8QI_V8QI:
24603 case V4HI_FTYPE_V2SI_V2SI:
24604 case V4DF_FTYPE_V4DF_V4DF:
24605 case V4DF_FTYPE_V4DF_V4DI:
24606 case V4SF_FTYPE_V4SF_V4SF:
24607 case V4SF_FTYPE_V4SF_V4SI:
24608 case V4SF_FTYPE_V4SF_V2SI:
24609 case V4SF_FTYPE_V4SF_V2DF:
24610 case V4SF_FTYPE_V4SF_DI:
24611 case V4SF_FTYPE_V4SF_SI:
24612 case V2DI_FTYPE_V2DI_V2DI:
24613 case V2DI_FTYPE_V16QI_V16QI:
24614 case V2DI_FTYPE_V4SI_V4SI:
24615 case V2DI_FTYPE_V2DI_V16QI:
24616 case V2DI_FTYPE_V2DF_V2DF:
24617 case V2SI_FTYPE_V2SI_V2SI:
24618 case V2SI_FTYPE_V4HI_V4HI:
24619 case V2SI_FTYPE_V2SF_V2SF:
24620 case V2DF_FTYPE_V2DF_V2DF:
24621 case V2DF_FTYPE_V2DF_V4SF:
24622 case V2DF_FTYPE_V2DF_V2DI:
24623 case V2DF_FTYPE_V2DF_DI:
24624 case V2DF_FTYPE_V2DF_SI:
24625 case V2SF_FTYPE_V2SF_V2SF:
24626 case V1DI_FTYPE_V1DI_V1DI:
24627 case V1DI_FTYPE_V8QI_V8QI:
24628 case V1DI_FTYPE_V2SI_V2SI:
24629 if (comparison == UNKNOWN)
24630 return ix86_expand_binop_builtin (icode, exp, target);
24633 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24634 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24635 gcc_assert (comparison != UNKNOWN);
24639 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24640 case V8HI_FTYPE_V8HI_SI_COUNT:
24641 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24642 case V4SI_FTYPE_V4SI_SI_COUNT:
24643 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24644 case V4HI_FTYPE_V4HI_SI_COUNT:
24645 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24646 case V2DI_FTYPE_V2DI_SI_COUNT:
24647 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24648 case V2SI_FTYPE_V2SI_SI_COUNT:
24649 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24650 case V1DI_FTYPE_V1DI_SI_COUNT:
24652 last_arg_count = true;
24654 case UINT64_FTYPE_UINT64_UINT64:
24655 case UINT_FTYPE_UINT_UINT:
24656 case UINT_FTYPE_UINT_USHORT:
24657 case UINT_FTYPE_UINT_UCHAR:
24660 case V2DI2TI_FTYPE_V2DI_INT:
24663 nargs_constant = 1;
24665 case V8HI_FTYPE_V8HI_INT:
24666 case V8SF_FTYPE_V8SF_INT:
24667 case V4SI_FTYPE_V4SI_INT:
24668 case V4SI_FTYPE_V8SI_INT:
24669 case V4HI_FTYPE_V4HI_INT:
24670 case V4DF_FTYPE_V4DF_INT:
24671 case V4SF_FTYPE_V4SF_INT:
24672 case V4SF_FTYPE_V8SF_INT:
24673 case V2DI_FTYPE_V2DI_INT:
24674 case V2DF_FTYPE_V2DF_INT:
24675 case V2DF_FTYPE_V4DF_INT:
24677 nargs_constant = 1;
24679 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24680 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24681 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24682 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24683 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24686 case V16QI_FTYPE_V16QI_V16QI_INT:
24687 case V8HI_FTYPE_V8HI_V8HI_INT:
24688 case V8SI_FTYPE_V8SI_V8SI_INT:
24689 case V8SI_FTYPE_V8SI_V4SI_INT:
24690 case V8SF_FTYPE_V8SF_V8SF_INT:
24691 case V8SF_FTYPE_V8SF_V4SF_INT:
24692 case V4SI_FTYPE_V4SI_V4SI_INT:
24693 case V4DF_FTYPE_V4DF_V4DF_INT:
24694 case V4DF_FTYPE_V4DF_V2DF_INT:
24695 case V4SF_FTYPE_V4SF_V4SF_INT:
24696 case V2DI_FTYPE_V2DI_V2DI_INT:
24697 case V2DF_FTYPE_V2DF_V2DF_INT:
24699 nargs_constant = 1;
24701 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24704 nargs_constant = 1;
24706 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24709 nargs_constant = 1;
24711 case V2DI_FTYPE_V2DI_UINT_UINT:
24713 nargs_constant = 2;
24715 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24717 nargs_constant = 2;
24720 gcc_unreachable ();
24723 gcc_assert (nargs <= ARRAY_SIZE (args));
24725 if (comparison != UNKNOWN)
24727 gcc_assert (nargs == 2);
24728 return ix86_expand_sse_compare (d, exp, target, swap);
24731 if (rmode == VOIDmode || rmode == tmode)
24735 || GET_MODE (target) != tmode
24736 || ! (*insn_p->operand[0].predicate) (target, tmode))
24737 target = gen_reg_rtx (tmode);
24738 real_target = target;
24742 target = gen_reg_rtx (rmode);
24743 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24746 for (i = 0; i < nargs; i++)
24748 tree arg = CALL_EXPR_ARG (exp, i);
24749 rtx op = expand_normal (arg);
24750 enum machine_mode mode = insn_p->operand[i + 1].mode;
24751 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24753 if (last_arg_count && (i + 1) == nargs)
24755 /* SIMD shift insns take either an 8-bit immediate or
24756 register as count. But builtin functions take int as
24757 count. If count doesn't match, we put it in register. */
24760 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24761 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24762 op = copy_to_reg (op);
24765 else if ((nargs - i) <= nargs_constant)
24770 case CODE_FOR_sse4_1_roundpd:
24771 case CODE_FOR_sse4_1_roundps:
24772 case CODE_FOR_sse4_1_roundsd:
24773 case CODE_FOR_sse4_1_roundss:
24774 case CODE_FOR_sse4_1_blendps:
24775 case CODE_FOR_avx_blendpd256:
24776 case CODE_FOR_avx_vpermilv4df:
24777 case CODE_FOR_avx_roundpd256:
24778 case CODE_FOR_avx_roundps256:
24779 error ("the last argument must be a 4-bit immediate");
24782 case CODE_FOR_sse4_1_blendpd:
24783 case CODE_FOR_avx_vpermilv2df:
24784 error ("the last argument must be a 2-bit immediate");
24787 case CODE_FOR_avx_vextractf128v4df:
24788 case CODE_FOR_avx_vextractf128v8sf:
24789 case CODE_FOR_avx_vextractf128v8si:
24790 case CODE_FOR_avx_vinsertf128v4df:
24791 case CODE_FOR_avx_vinsertf128v8sf:
24792 case CODE_FOR_avx_vinsertf128v8si:
24793 error ("the last argument must be a 1-bit immediate");
24796 case CODE_FOR_avx_cmpsdv2df3:
24797 case CODE_FOR_avx_cmpssv4sf3:
24798 case CODE_FOR_avx_cmppdv2df3:
24799 case CODE_FOR_avx_cmppsv4sf3:
24800 case CODE_FOR_avx_cmppdv4df3:
24801 case CODE_FOR_avx_cmppsv8sf3:
24802 error ("the last argument must be a 5-bit immediate");
24806 switch (nargs_constant)
24809 if ((nargs - i) == nargs_constant)
24811 error ("the next to last argument must be an 8-bit immediate");
24815 error ("the last argument must be an 8-bit immediate");
24818 gcc_unreachable ();
24825 if (VECTOR_MODE_P (mode))
24826 op = safe_vector_operand (op, mode);
24828 /* If we aren't optimizing, only allow one memory operand to
24830 if (memory_operand (op, mode))
24833 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24835 if (optimize || !match || num_memory > 1)
24836 op = copy_to_mode_reg (mode, op);
24840 op = copy_to_reg (op);
24841 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24846 args[i].mode = mode;
24852 pat = GEN_FCN (icode) (real_target, args[0].op);
24855 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24858 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24862 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24863 args[2].op, args[3].op);
24866 gcc_unreachable ();
24876 /* Subroutine of ix86_expand_builtin to take care of special insns
24877 with variable number of operands. */
24880 ix86_expand_special_args_builtin (const struct builtin_description *d,
24881 tree exp, rtx target)
24885 unsigned int i, nargs, arg_adjust, memory;
24889 enum machine_mode mode;
24891 enum insn_code icode = d->icode;
24892 bool last_arg_constant = false;
24893 const struct insn_data *insn_p = &insn_data[icode];
24894 enum machine_mode tmode = insn_p->operand[0].mode;
24895 enum { load, store } klass;
24897 switch ((enum ix86_special_builtin_type) d->flag)
24899 case VOID_FTYPE_VOID:
24900 emit_insn (GEN_FCN (icode) (target));
24902 case V2DI_FTYPE_PV2DI:
24903 case V32QI_FTYPE_PCCHAR:
24904 case V16QI_FTYPE_PCCHAR:
24905 case V8SF_FTYPE_PCV4SF:
24906 case V8SF_FTYPE_PCFLOAT:
24907 case V4SF_FTYPE_PCFLOAT:
24908 case V4DF_FTYPE_PCV2DF:
24909 case V4DF_FTYPE_PCDOUBLE:
24910 case V2DF_FTYPE_PCDOUBLE:
24915 case VOID_FTYPE_PV2SF_V4SF:
24916 case VOID_FTYPE_PV4DI_V4DI:
24917 case VOID_FTYPE_PV2DI_V2DI:
24918 case VOID_FTYPE_PCHAR_V32QI:
24919 case VOID_FTYPE_PCHAR_V16QI:
24920 case VOID_FTYPE_PFLOAT_V8SF:
24921 case VOID_FTYPE_PFLOAT_V4SF:
24922 case VOID_FTYPE_PDOUBLE_V4DF:
24923 case VOID_FTYPE_PDOUBLE_V2DF:
24924 case VOID_FTYPE_PDI_DI:
24925 case VOID_FTYPE_PINT_INT:
24928 /* Reserve memory operand for target. */
24929 memory = ARRAY_SIZE (args);
24931 case V4SF_FTYPE_V4SF_PCV2SF:
24932 case V2DF_FTYPE_V2DF_PCDOUBLE:
24937 case V8SF_FTYPE_PCV8SF_V8SF:
24938 case V4DF_FTYPE_PCV4DF_V4DF:
24939 case V4SF_FTYPE_PCV4SF_V4SF:
24940 case V2DF_FTYPE_PCV2DF_V2DF:
24945 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24946 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24947 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24948 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24951 /* Reserve memory operand for target. */
24952 memory = ARRAY_SIZE (args);
24955 gcc_unreachable ();
24958 gcc_assert (nargs <= ARRAY_SIZE (args));
24960 if (klass == store)
24962 arg = CALL_EXPR_ARG (exp, 0);
24963 op = expand_normal (arg);
24964 gcc_assert (target == 0);
24965 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24973 || GET_MODE (target) != tmode
24974 || ! (*insn_p->operand[0].predicate) (target, tmode))
24975 target = gen_reg_rtx (tmode);
24978 for (i = 0; i < nargs; i++)
24980 enum machine_mode mode = insn_p->operand[i + 1].mode;
24983 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24984 op = expand_normal (arg);
24985 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24987 if (last_arg_constant && (i + 1) == nargs)
24993 error ("the last argument must be an 8-bit immediate");
25001 /* This must be the memory operand. */
25002 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25003 gcc_assert (GET_MODE (op) == mode
25004 || GET_MODE (op) == VOIDmode);
25008 /* This must be register. */
25009 if (VECTOR_MODE_P (mode))
25010 op = safe_vector_operand (op, mode);
25012 gcc_assert (GET_MODE (op) == mode
25013 || GET_MODE (op) == VOIDmode);
25014 op = copy_to_mode_reg (mode, op);
25019 args[i].mode = mode;
25025 pat = GEN_FCN (icode) (target, args[0].op);
25028 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25031 gcc_unreachable ();
25037 return klass == store ? 0 : target;
25040 /* Return the integer constant in ARG. Constrain it to be in the range
25041 of the subparts of VEC_TYPE; issue an error if not. */
25044 get_element_number (tree vec_type, tree arg)
25046 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25048 if (!host_integerp (arg, 1)
25049 || (elt = tree_low_cst (arg, 1), elt > max))
25051 error ("selector must be an integer constant in the range 0..%wi", max);
25058 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25059 ix86_expand_vector_init. We DO have language-level syntax for this, in
25060 the form of (type){ init-list }. Except that since we can't place emms
25061 instructions from inside the compiler, we can't allow the use of MMX
25062 registers unless the user explicitly asks for it. So we do *not* define
25063 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25064 we have builtins invoked by mmintrin.h that gives us license to emit
25065 these sorts of instructions. */
25068 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25070 enum machine_mode tmode = TYPE_MODE (type);
25071 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25072 int i, n_elt = GET_MODE_NUNITS (tmode);
25073 rtvec v = rtvec_alloc (n_elt);
25075 gcc_assert (VECTOR_MODE_P (tmode));
25076 gcc_assert (call_expr_nargs (exp) == n_elt);
25078 for (i = 0; i < n_elt; ++i)
25080 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25081 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25084 if (!target || !register_operand (target, tmode))
25085 target = gen_reg_rtx (tmode);
25087 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25091 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25092 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25093 had a language-level syntax for referencing vector elements. */
25096 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25098 enum machine_mode tmode, mode0;
25103 arg0 = CALL_EXPR_ARG (exp, 0);
25104 arg1 = CALL_EXPR_ARG (exp, 1);
25106 op0 = expand_normal (arg0);
25107 elt = get_element_number (TREE_TYPE (arg0), arg1);
25109 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25110 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25111 gcc_assert (VECTOR_MODE_P (mode0));
25113 op0 = force_reg (mode0, op0);
25115 if (optimize || !target || !register_operand (target, tmode))
25116 target = gen_reg_rtx (tmode);
25118 ix86_expand_vector_extract (true, target, op0, elt);
25123 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25124 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25125 a language-level syntax for referencing vector elements. */
25128 ix86_expand_vec_set_builtin (tree exp)
25130 enum machine_mode tmode, mode1;
25131 tree arg0, arg1, arg2;
25133 rtx op0, op1, target;
25135 arg0 = CALL_EXPR_ARG (exp, 0);
25136 arg1 = CALL_EXPR_ARG (exp, 1);
25137 arg2 = CALL_EXPR_ARG (exp, 2);
25139 tmode = TYPE_MODE (TREE_TYPE (arg0));
25140 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25141 gcc_assert (VECTOR_MODE_P (tmode));
25143 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25144 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25145 elt = get_element_number (TREE_TYPE (arg0), arg2);
25147 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25148 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25150 op0 = force_reg (tmode, op0);
25151 op1 = force_reg (mode1, op1);
25153 /* OP0 is the source of these builtin functions and shouldn't be
25154 modified. Create a copy, use it and return it as target. */
25155 target = gen_reg_rtx (tmode);
25156 emit_move_insn (target, op0);
25157 ix86_expand_vector_set (true, target, op1, elt);
25162 /* Expand an expression EXP that calls a built-in function,
25163 with result going to TARGET if that's convenient
25164 (and in mode MODE if that's convenient).
25165 SUBTARGET may be used as the target for computing one of EXP's operands.
25166 IGNORE is nonzero if the value is to be ignored. */
25169 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25170 enum machine_mode mode ATTRIBUTE_UNUSED,
25171 int ignore ATTRIBUTE_UNUSED)
25173 const struct builtin_description *d;
25175 enum insn_code icode;
25176 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25177 tree arg0, arg1, arg2;
25178 rtx op0, op1, op2, pat;
25179 enum machine_mode mode0, mode1, mode2;
25180 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25182 /* Determine whether the builtin function is available under the current ISA.
25183 Originally the builtin was not created if it wasn't applicable to the
25184 current ISA based on the command line switches. With function specific
25185 options, we need to check in the context of the function making the call
25186 whether it is supported. */
25187 if (ix86_builtins_isa[fcode].isa
25188 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25190 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25191 NULL, NULL, false);
25194 error ("%qE needs unknown isa option", fndecl);
25197 gcc_assert (opts != NULL);
25198 error ("%qE needs isa option %s", fndecl, opts);
25206 case IX86_BUILTIN_MASKMOVQ:
25207 case IX86_BUILTIN_MASKMOVDQU:
25208 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25209 ? CODE_FOR_mmx_maskmovq
25210 : CODE_FOR_sse2_maskmovdqu);
25211 /* Note the arg order is different from the operand order. */
25212 arg1 = CALL_EXPR_ARG (exp, 0);
25213 arg2 = CALL_EXPR_ARG (exp, 1);
25214 arg0 = CALL_EXPR_ARG (exp, 2);
25215 op0 = expand_normal (arg0);
25216 op1 = expand_normal (arg1);
25217 op2 = expand_normal (arg2);
25218 mode0 = insn_data[icode].operand[0].mode;
25219 mode1 = insn_data[icode].operand[1].mode;
25220 mode2 = insn_data[icode].operand[2].mode;
25222 op0 = force_reg (Pmode, op0);
25223 op0 = gen_rtx_MEM (mode1, op0);
25225 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25226 op0 = copy_to_mode_reg (mode0, op0);
25227 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25228 op1 = copy_to_mode_reg (mode1, op1);
25229 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25230 op2 = copy_to_mode_reg (mode2, op2);
25231 pat = GEN_FCN (icode) (op0, op1, op2);
25237 case IX86_BUILTIN_LDMXCSR:
25238 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25239 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25240 emit_move_insn (target, op0);
25241 emit_insn (gen_sse_ldmxcsr (target));
25244 case IX86_BUILTIN_STMXCSR:
25245 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25246 emit_insn (gen_sse_stmxcsr (target));
25247 return copy_to_mode_reg (SImode, target);
25249 case IX86_BUILTIN_CLFLUSH:
25250 arg0 = CALL_EXPR_ARG (exp, 0);
25251 op0 = expand_normal (arg0);
25252 icode = CODE_FOR_sse2_clflush;
25253 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25254 op0 = copy_to_mode_reg (Pmode, op0);
25256 emit_insn (gen_sse2_clflush (op0));
25259 case IX86_BUILTIN_MONITOR:
25260 arg0 = CALL_EXPR_ARG (exp, 0);
25261 arg1 = CALL_EXPR_ARG (exp, 1);
25262 arg2 = CALL_EXPR_ARG (exp, 2);
25263 op0 = expand_normal (arg0);
25264 op1 = expand_normal (arg1);
25265 op2 = expand_normal (arg2);
25267 op0 = copy_to_mode_reg (Pmode, op0);
25269 op1 = copy_to_mode_reg (SImode, op1);
25271 op2 = copy_to_mode_reg (SImode, op2);
25272 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25275 case IX86_BUILTIN_MWAIT:
25276 arg0 = CALL_EXPR_ARG (exp, 0);
25277 arg1 = CALL_EXPR_ARG (exp, 1);
25278 op0 = expand_normal (arg0);
25279 op1 = expand_normal (arg1);
25281 op0 = copy_to_mode_reg (SImode, op0);
25283 op1 = copy_to_mode_reg (SImode, op1);
25284 emit_insn (gen_sse3_mwait (op0, op1));
25287 case IX86_BUILTIN_VEC_INIT_V2SI:
25288 case IX86_BUILTIN_VEC_INIT_V4HI:
25289 case IX86_BUILTIN_VEC_INIT_V8QI:
25290 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25292 case IX86_BUILTIN_VEC_EXT_V2DF:
25293 case IX86_BUILTIN_VEC_EXT_V2DI:
25294 case IX86_BUILTIN_VEC_EXT_V4SF:
25295 case IX86_BUILTIN_VEC_EXT_V4SI:
25296 case IX86_BUILTIN_VEC_EXT_V8HI:
25297 case IX86_BUILTIN_VEC_EXT_V2SI:
25298 case IX86_BUILTIN_VEC_EXT_V4HI:
25299 case IX86_BUILTIN_VEC_EXT_V16QI:
25300 return ix86_expand_vec_ext_builtin (exp, target);
25302 case IX86_BUILTIN_VEC_SET_V2DI:
25303 case IX86_BUILTIN_VEC_SET_V4SF:
25304 case IX86_BUILTIN_VEC_SET_V4SI:
25305 case IX86_BUILTIN_VEC_SET_V8HI:
25306 case IX86_BUILTIN_VEC_SET_V4HI:
25307 case IX86_BUILTIN_VEC_SET_V16QI:
25308 return ix86_expand_vec_set_builtin (exp);
25310 case IX86_BUILTIN_INFQ:
25311 case IX86_BUILTIN_HUGE_VALQ:
25313 REAL_VALUE_TYPE inf;
25317 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25319 tmp = validize_mem (force_const_mem (mode, tmp));
25322 target = gen_reg_rtx (mode);
25324 emit_move_insn (target, tmp);
25332 for (i = 0, d = bdesc_special_args;
25333 i < ARRAY_SIZE (bdesc_special_args);
25335 if (d->code == fcode)
25336 return ix86_expand_special_args_builtin (d, exp, target);
25338 for (i = 0, d = bdesc_args;
25339 i < ARRAY_SIZE (bdesc_args);
25341 if (d->code == fcode)
25344 case IX86_BUILTIN_FABSQ:
25345 case IX86_BUILTIN_COPYSIGNQ:
25347 /* Emit a normal call if SSE2 isn't available. */
25348 return expand_call (exp, target, ignore);
25350 return ix86_expand_args_builtin (d, exp, target);
25353 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25354 if (d->code == fcode)
25355 return ix86_expand_sse_comi (d, exp, target);
25357 for (i = 0, d = bdesc_pcmpestr;
25358 i < ARRAY_SIZE (bdesc_pcmpestr);
25360 if (d->code == fcode)
25361 return ix86_expand_sse_pcmpestr (d, exp, target);
25363 for (i = 0, d = bdesc_pcmpistr;
25364 i < ARRAY_SIZE (bdesc_pcmpistr);
25366 if (d->code == fcode)
25367 return ix86_expand_sse_pcmpistr (d, exp, target);
25369 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25370 if (d->code == fcode)
25371 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25372 (enum multi_arg_type)d->flag,
25375 gcc_unreachable ();
25378 /* Returns a function decl for a vectorized version of the builtin function
25379 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25380 if it is not available. */
25383 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25386 enum machine_mode in_mode, out_mode;
25389 if (TREE_CODE (type_out) != VECTOR_TYPE
25390 || TREE_CODE (type_in) != VECTOR_TYPE)
25393 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25394 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25395 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25396 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25400 case BUILT_IN_SQRT:
25401 if (out_mode == DFmode && out_n == 2
25402 && in_mode == DFmode && in_n == 2)
25403 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25406 case BUILT_IN_SQRTF:
25407 if (out_mode == SFmode && out_n == 4
25408 && in_mode == SFmode && in_n == 4)
25409 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25412 case BUILT_IN_LRINT:
25413 if (out_mode == SImode && out_n == 4
25414 && in_mode == DFmode && in_n == 2)
25415 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25418 case BUILT_IN_LRINTF:
25419 if (out_mode == SImode && out_n == 4
25420 && in_mode == SFmode && in_n == 4)
25421 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25428 /* Dispatch to a handler for a vectorization library. */
25429 if (ix86_veclib_handler)
25430 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25436 /* Handler for an SVML-style interface to
25437 a library with vectorized intrinsics. */
25440 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25443 tree fntype, new_fndecl, args;
25446 enum machine_mode el_mode, in_mode;
25449 /* The SVML is suitable for unsafe math only. */
25450 if (!flag_unsafe_math_optimizations)
25453 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25454 n = TYPE_VECTOR_SUBPARTS (type_out);
25455 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25456 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25457 if (el_mode != in_mode
25465 case BUILT_IN_LOG10:
25467 case BUILT_IN_TANH:
25469 case BUILT_IN_ATAN:
25470 case BUILT_IN_ATAN2:
25471 case BUILT_IN_ATANH:
25472 case BUILT_IN_CBRT:
25473 case BUILT_IN_SINH:
25475 case BUILT_IN_ASINH:
25476 case BUILT_IN_ASIN:
25477 case BUILT_IN_COSH:
25479 case BUILT_IN_ACOSH:
25480 case BUILT_IN_ACOS:
25481 if (el_mode != DFmode || n != 2)
25485 case BUILT_IN_EXPF:
25486 case BUILT_IN_LOGF:
25487 case BUILT_IN_LOG10F:
25488 case BUILT_IN_POWF:
25489 case BUILT_IN_TANHF:
25490 case BUILT_IN_TANF:
25491 case BUILT_IN_ATANF:
25492 case BUILT_IN_ATAN2F:
25493 case BUILT_IN_ATANHF:
25494 case BUILT_IN_CBRTF:
25495 case BUILT_IN_SINHF:
25496 case BUILT_IN_SINF:
25497 case BUILT_IN_ASINHF:
25498 case BUILT_IN_ASINF:
25499 case BUILT_IN_COSHF:
25500 case BUILT_IN_COSF:
25501 case BUILT_IN_ACOSHF:
25502 case BUILT_IN_ACOSF:
25503 if (el_mode != SFmode || n != 4)
25511 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25513 if (fn == BUILT_IN_LOGF)
25514 strcpy (name, "vmlsLn4");
25515 else if (fn == BUILT_IN_LOG)
25516 strcpy (name, "vmldLn2");
25519 sprintf (name, "vmls%s", bname+10);
25520 name[strlen (name)-1] = '4';
25523 sprintf (name, "vmld%s2", bname+10);
25525 /* Convert to uppercase. */
25529 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25530 args = TREE_CHAIN (args))
25534 fntype = build_function_type_list (type_out, type_in, NULL);
25536 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25538 /* Build a function declaration for the vectorized function. */
25539 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25540 TREE_PUBLIC (new_fndecl) = 1;
25541 DECL_EXTERNAL (new_fndecl) = 1;
25542 DECL_IS_NOVOPS (new_fndecl) = 1;
25543 TREE_READONLY (new_fndecl) = 1;
25548 /* Handler for an ACML-style interface to
25549 a library with vectorized intrinsics. */
25552 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25554 char name[20] = "__vr.._";
25555 tree fntype, new_fndecl, args;
25558 enum machine_mode el_mode, in_mode;
25561 /* The ACML is 64bits only and suitable for unsafe math only as
25562 it does not correctly support parts of IEEE with the required
25563 precision such as denormals. */
25565 || !flag_unsafe_math_optimizations)
25568 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25569 n = TYPE_VECTOR_SUBPARTS (type_out);
25570 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25571 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25572 if (el_mode != in_mode
25582 case BUILT_IN_LOG2:
25583 case BUILT_IN_LOG10:
25586 if (el_mode != DFmode
25591 case BUILT_IN_SINF:
25592 case BUILT_IN_COSF:
25593 case BUILT_IN_EXPF:
25594 case BUILT_IN_POWF:
25595 case BUILT_IN_LOGF:
25596 case BUILT_IN_LOG2F:
25597 case BUILT_IN_LOG10F:
25600 if (el_mode != SFmode
25609 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25610 sprintf (name + 7, "%s", bname+10);
25613 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25614 args = TREE_CHAIN (args))
25618 fntype = build_function_type_list (type_out, type_in, NULL);
25620 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25622 /* Build a function declaration for the vectorized function. */
25623 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25624 TREE_PUBLIC (new_fndecl) = 1;
25625 DECL_EXTERNAL (new_fndecl) = 1;
25626 DECL_IS_NOVOPS (new_fndecl) = 1;
25627 TREE_READONLY (new_fndecl) = 1;
25633 /* Returns a decl of a function that implements conversion of an integer vector
25634 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25635 side of the conversion.
25636 Return NULL_TREE if it is not available. */
25639 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25641 if (TREE_CODE (type) != VECTOR_TYPE
25642 /* There are only conversions from/to signed integers. */
25643 || TYPE_UNSIGNED (TREE_TYPE (type)))
25649 switch (TYPE_MODE (type))
25652 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25657 case FIX_TRUNC_EXPR:
25658 switch (TYPE_MODE (type))
25661 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25671 /* Returns a code for a target-specific builtin that implements
25672 reciprocal of the function, or NULL_TREE if not available. */
25675 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25676 bool sqrt ATTRIBUTE_UNUSED)
25678 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25679 && flag_finite_math_only && !flag_trapping_math
25680 && flag_unsafe_math_optimizations))
25684 /* Machine dependent builtins. */
25687 /* Vectorized version of sqrt to rsqrt conversion. */
25688 case IX86_BUILTIN_SQRTPS_NR:
25689 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25695 /* Normal builtins. */
25698 /* Sqrt to rsqrt conversion. */
25699 case BUILT_IN_SQRTF:
25700 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25707 /* Store OPERAND to the memory after reload is completed. This means
25708 that we can't easily use assign_stack_local. */
25710 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25714 gcc_assert (reload_completed);
25715 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25717 result = gen_rtx_MEM (mode,
25718 gen_rtx_PLUS (Pmode,
25720 GEN_INT (-RED_ZONE_SIZE)));
25721 emit_move_insn (result, operand);
25723 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25729 operand = gen_lowpart (DImode, operand);
25733 gen_rtx_SET (VOIDmode,
25734 gen_rtx_MEM (DImode,
25735 gen_rtx_PRE_DEC (DImode,
25736 stack_pointer_rtx)),
25740 gcc_unreachable ();
25742 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25751 split_di (&operand, 1, operands, operands + 1);
25753 gen_rtx_SET (VOIDmode,
25754 gen_rtx_MEM (SImode,
25755 gen_rtx_PRE_DEC (Pmode,
25756 stack_pointer_rtx)),
25759 gen_rtx_SET (VOIDmode,
25760 gen_rtx_MEM (SImode,
25761 gen_rtx_PRE_DEC (Pmode,
25762 stack_pointer_rtx)),
25767 /* Store HImodes as SImodes. */
25768 operand = gen_lowpart (SImode, operand);
25772 gen_rtx_SET (VOIDmode,
25773 gen_rtx_MEM (GET_MODE (operand),
25774 gen_rtx_PRE_DEC (SImode,
25775 stack_pointer_rtx)),
25779 gcc_unreachable ();
25781 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25786 /* Free operand from the memory. */
25788 ix86_free_from_memory (enum machine_mode mode)
25790 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25794 if (mode == DImode || TARGET_64BIT)
25798 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25799 to pop or add instruction if registers are available. */
25800 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25801 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25806 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25807 QImode must go into class Q_REGS.
25808 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25809 movdf to do mem-to-mem moves through integer regs. */
25811 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25813 enum machine_mode mode = GET_MODE (x);
25815 /* We're only allowed to return a subclass of CLASS. Many of the
25816 following checks fail for NO_REGS, so eliminate that early. */
25817 if (regclass == NO_REGS)
25820 /* All classes can load zeros. */
25821 if (x == CONST0_RTX (mode))
25824 /* Force constants into memory if we are loading a (nonzero) constant into
25825 an MMX or SSE register. This is because there are no MMX/SSE instructions
25826 to load from a constant. */
25828 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25831 /* Prefer SSE regs only, if we can use them for math. */
25832 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25833 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25835 /* Floating-point constants need more complex checks. */
25836 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25838 /* General regs can load everything. */
25839 if (reg_class_subset_p (regclass, GENERAL_REGS))
25842 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25843 zero above. We only want to wind up preferring 80387 registers if
25844 we plan on doing computation with them. */
25846 && standard_80387_constant_p (x))
25848 /* Limit class to non-sse. */
25849 if (regclass == FLOAT_SSE_REGS)
25851 if (regclass == FP_TOP_SSE_REGS)
25853 if (regclass == FP_SECOND_SSE_REGS)
25854 return FP_SECOND_REG;
25855 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25862 /* Generally when we see PLUS here, it's the function invariant
25863 (plus soft-fp const_int). Which can only be computed into general
25865 if (GET_CODE (x) == PLUS)
25866 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25868 /* QImode constants are easy to load, but non-constant QImode data
25869 must go into Q_REGS. */
25870 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25872 if (reg_class_subset_p (regclass, Q_REGS))
25874 if (reg_class_subset_p (Q_REGS, regclass))
25882 /* Discourage putting floating-point values in SSE registers unless
25883 SSE math is being used, and likewise for the 387 registers. */
25885 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25887 enum machine_mode mode = GET_MODE (x);
25889 /* Restrict the output reload class to the register bank that we are doing
25890 math on. If we would like not to return a subset of CLASS, reject this
25891 alternative: if reload cannot do this, it will still use its choice. */
25892 mode = GET_MODE (x);
25893 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25894 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25896 if (X87_FLOAT_MODE_P (mode))
25898 if (regclass == FP_TOP_SSE_REGS)
25900 else if (regclass == FP_SECOND_SSE_REGS)
25901 return FP_SECOND_REG;
25903 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25909 static enum reg_class
25910 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25911 enum machine_mode mode,
25912 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25914 /* QImode spills from non-QI registers require
25915 intermediate register on 32bit targets. */
25916 if (!in_p && mode == QImode && !TARGET_64BIT
25917 && (rclass == GENERAL_REGS
25918 || rclass == LEGACY_REGS
25919 || rclass == INDEX_REGS))
25928 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25929 regno = true_regnum (x);
25931 /* Return Q_REGS if the operand is in memory. */
25939 /* If we are copying between general and FP registers, we need a memory
25940 location. The same is true for SSE and MMX registers.
25942 To optimize register_move_cost performance, allow inline variant.
25944 The macro can't work reliably when one of the CLASSES is class containing
25945 registers from multiple units (SSE, MMX, integer). We avoid this by never
25946 combining those units in single alternative in the machine description.
25947 Ensure that this constraint holds to avoid unexpected surprises.
25949 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25950 enforce these sanity checks. */
25953 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25954 enum machine_mode mode, int strict)
25956 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25957 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25958 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25959 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25960 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25961 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25963 gcc_assert (!strict);
25967 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25970 /* ??? This is a lie. We do have moves between mmx/general, and for
25971 mmx/sse2. But by saying we need secondary memory we discourage the
25972 register allocator from using the mmx registers unless needed. */
25973 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25976 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25978 /* SSE1 doesn't have any direct moves from other classes. */
25982 /* If the target says that inter-unit moves are more expensive
25983 than moving through memory, then don't generate them. */
25984 if (!TARGET_INTER_UNIT_MOVES)
25987 /* Between SSE and general, we have moves no larger than word size. */
25988 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25996 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25997 enum machine_mode mode, int strict)
25999 return inline_secondary_memory_needed (class1, class2, mode, strict);
26002 /* Return true if the registers in CLASS cannot represent the change from
26003 modes FROM to TO. */
26006 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26007 enum reg_class regclass)
26012 /* x87 registers can't do subreg at all, as all values are reformatted
26013 to extended precision. */
26014 if (MAYBE_FLOAT_CLASS_P (regclass))
26017 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26019 /* Vector registers do not support QI or HImode loads. If we don't
26020 disallow a change to these modes, reload will assume it's ok to
26021 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26022 the vec_dupv4hi pattern. */
26023 if (GET_MODE_SIZE (from) < 4)
26026 /* Vector registers do not support subreg with nonzero offsets, which
26027 are otherwise valid for integer registers. Since we can't see
26028 whether we have a nonzero offset from here, prohibit all
26029 nonparadoxical subregs changing size. */
26030 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26037 /* Return the cost of moving data of mode M between a
26038 register and memory. A value of 2 is the default; this cost is
26039 relative to those in `REGISTER_MOVE_COST'.
26041 This function is used extensively by register_move_cost that is used to
26042 build tables at startup. Make it inline in this case.
26043 When IN is 2, return maximum of in and out move cost.
26045 If moving between registers and memory is more expensive than
26046 between two registers, you should define this macro to express the
26049 Model also increased moving costs of QImode registers in non
26053 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26057 if (FLOAT_CLASS_P (regclass))
26075 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26076 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26078 if (SSE_CLASS_P (regclass))
26081 switch (GET_MODE_SIZE (mode))
26096 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26097 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26099 if (MMX_CLASS_P (regclass))
26102 switch (GET_MODE_SIZE (mode))
26114 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26115 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26117 switch (GET_MODE_SIZE (mode))
26120 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26123 return ix86_cost->int_store[0];
26124 if (TARGET_PARTIAL_REG_DEPENDENCY
26125 && optimize_function_for_speed_p (cfun))
26126 cost = ix86_cost->movzbl_load;
26128 cost = ix86_cost->int_load[0];
26130 return MAX (cost, ix86_cost->int_store[0]);
26136 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26138 return ix86_cost->movzbl_load;
26140 return ix86_cost->int_store[0] + 4;
26145 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26146 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26148 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26149 if (mode == TFmode)
26152 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26154 cost = ix86_cost->int_load[2];
26156 cost = ix86_cost->int_store[2];
26157 return (cost * (((int) GET_MODE_SIZE (mode)
26158 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26163 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26165 return inline_memory_move_cost (mode, regclass, in);
26169 /* Return the cost of moving data from a register in class CLASS1 to
26170 one in class CLASS2.
26172 It is not required that the cost always equal 2 when FROM is the same as TO;
26173 on some machines it is expensive to move between registers if they are not
26174 general registers. */
26177 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26178 enum reg_class class2)
26180 /* In case we require secondary memory, compute cost of the store followed
26181 by load. In order to avoid bad register allocation choices, we need
26182 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26184 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26188 cost += inline_memory_move_cost (mode, class1, 2);
26189 cost += inline_memory_move_cost (mode, class2, 2);
26191 /* In case of copying from general_purpose_register we may emit multiple
26192 stores followed by single load causing memory size mismatch stall.
26193 Count this as arbitrarily high cost of 20. */
26194 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26197 /* In the case of FP/MMX moves, the registers actually overlap, and we
26198 have to switch modes in order to treat them differently. */
26199 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26200 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26206 /* Moves between SSE/MMX and integer unit are expensive. */
26207 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26208 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26210 /* ??? By keeping returned value relatively high, we limit the number
26211 of moves between integer and MMX/SSE registers for all targets.
26212 Additionally, high value prevents problem with x86_modes_tieable_p(),
26213 where integer modes in MMX/SSE registers are not tieable
26214 because of missing QImode and HImode moves to, from or between
26215 MMX/SSE registers. */
26216 return MAX (8, ix86_cost->mmxsse_to_integer);
26218 if (MAYBE_FLOAT_CLASS_P (class1))
26219 return ix86_cost->fp_move;
26220 if (MAYBE_SSE_CLASS_P (class1))
26221 return ix86_cost->sse_move;
26222 if (MAYBE_MMX_CLASS_P (class1))
26223 return ix86_cost->mmx_move;
26227 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26230 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26232 /* Flags and only flags can only hold CCmode values. */
26233 if (CC_REGNO_P (regno))
26234 return GET_MODE_CLASS (mode) == MODE_CC;
26235 if (GET_MODE_CLASS (mode) == MODE_CC
26236 || GET_MODE_CLASS (mode) == MODE_RANDOM
26237 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26239 if (FP_REGNO_P (regno))
26240 return VALID_FP_MODE_P (mode);
26241 if (SSE_REGNO_P (regno))
26243 /* We implement the move patterns for all vector modes into and
26244 out of SSE registers, even when no operation instructions
26245 are available. OImode move is available only when AVX is
26247 return ((TARGET_AVX && mode == OImode)
26248 || VALID_AVX256_REG_MODE (mode)
26249 || VALID_SSE_REG_MODE (mode)
26250 || VALID_SSE2_REG_MODE (mode)
26251 || VALID_MMX_REG_MODE (mode)
26252 || VALID_MMX_REG_MODE_3DNOW (mode));
26254 if (MMX_REGNO_P (regno))
26256 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26257 so if the register is available at all, then we can move data of
26258 the given mode into or out of it. */
26259 return (VALID_MMX_REG_MODE (mode)
26260 || VALID_MMX_REG_MODE_3DNOW (mode));
26263 if (mode == QImode)
26265 /* Take care for QImode values - they can be in non-QI regs,
26266 but then they do cause partial register stalls. */
26267 if (regno <= BX_REG || TARGET_64BIT)
26269 if (!TARGET_PARTIAL_REG_STALL)
26271 return reload_in_progress || reload_completed;
26273 /* We handle both integer and floats in the general purpose registers. */
26274 else if (VALID_INT_MODE_P (mode))
26276 else if (VALID_FP_MODE_P (mode))
26278 else if (VALID_DFP_MODE_P (mode))
26280 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26281 on to use that value in smaller contexts, this can easily force a
26282 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26283 supporting DImode, allow it. */
26284 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26290 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26291 tieable integer mode. */
26294 ix86_tieable_integer_mode_p (enum machine_mode mode)
26303 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26306 return TARGET_64BIT;
26313 /* Return true if MODE1 is accessible in a register that can hold MODE2
26314 without copying. That is, all register classes that can hold MODE2
26315 can also hold MODE1. */
26318 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26320 if (mode1 == mode2)
26323 if (ix86_tieable_integer_mode_p (mode1)
26324 && ix86_tieable_integer_mode_p (mode2))
26327 /* MODE2 being XFmode implies fp stack or general regs, which means we
26328 can tie any smaller floating point modes to it. Note that we do not
26329 tie this with TFmode. */
26330 if (mode2 == XFmode)
26331 return mode1 == SFmode || mode1 == DFmode;
26333 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26334 that we can tie it with SFmode. */
26335 if (mode2 == DFmode)
26336 return mode1 == SFmode;
26338 /* If MODE2 is only appropriate for an SSE register, then tie with
26339 any other mode acceptable to SSE registers. */
26340 if (GET_MODE_SIZE (mode2) == 16
26341 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26342 return (GET_MODE_SIZE (mode1) == 16
26343 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26345 /* If MODE2 is appropriate for an MMX register, then tie
26346 with any other mode acceptable to MMX registers. */
26347 if (GET_MODE_SIZE (mode2) == 8
26348 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26349 return (GET_MODE_SIZE (mode1) == 8
26350 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26355 /* Compute a (partial) cost for rtx X. Return true if the complete
26356 cost has been computed, and false if subexpressions should be
26357 scanned. In either case, *TOTAL contains the cost result. */
26360 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26362 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26363 enum machine_mode mode = GET_MODE (x);
26364 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26372 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26374 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26376 else if (flag_pic && SYMBOLIC_CONST (x)
26378 || (!GET_CODE (x) != LABEL_REF
26379 && (GET_CODE (x) != SYMBOL_REF
26380 || !SYMBOL_REF_LOCAL_P (x)))))
26387 if (mode == VOIDmode)
26390 switch (standard_80387_constant_p (x))
26395 default: /* Other constants */
26400 /* Start with (MEM (SYMBOL_REF)), since that's where
26401 it'll probably end up. Add a penalty for size. */
26402 *total = (COSTS_N_INSNS (1)
26403 + (flag_pic != 0 && !TARGET_64BIT)
26404 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26410 /* The zero extensions is often completely free on x86_64, so make
26411 it as cheap as possible. */
26412 if (TARGET_64BIT && mode == DImode
26413 && GET_MODE (XEXP (x, 0)) == SImode)
26415 else if (TARGET_ZERO_EXTEND_WITH_AND)
26416 *total = cost->add;
26418 *total = cost->movzx;
26422 *total = cost->movsx;
26426 if (CONST_INT_P (XEXP (x, 1))
26427 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26429 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26432 *total = cost->add;
26435 if ((value == 2 || value == 3)
26436 && cost->lea <= cost->shift_const)
26438 *total = cost->lea;
26448 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26450 if (CONST_INT_P (XEXP (x, 1)))
26452 if (INTVAL (XEXP (x, 1)) > 32)
26453 *total = cost->shift_const + COSTS_N_INSNS (2);
26455 *total = cost->shift_const * 2;
26459 if (GET_CODE (XEXP (x, 1)) == AND)
26460 *total = cost->shift_var * 2;
26462 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26467 if (CONST_INT_P (XEXP (x, 1)))
26468 *total = cost->shift_const;
26470 *total = cost->shift_var;
26475 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26477 /* ??? SSE scalar cost should be used here. */
26478 *total = cost->fmul;
26481 else if (X87_FLOAT_MODE_P (mode))
26483 *total = cost->fmul;
26486 else if (FLOAT_MODE_P (mode))
26488 /* ??? SSE vector cost should be used here. */
26489 *total = cost->fmul;
26494 rtx op0 = XEXP (x, 0);
26495 rtx op1 = XEXP (x, 1);
26497 if (CONST_INT_P (XEXP (x, 1)))
26499 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26500 for (nbits = 0; value != 0; value &= value - 1)
26504 /* This is arbitrary. */
26507 /* Compute costs correctly for widening multiplication. */
26508 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26509 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26510 == GET_MODE_SIZE (mode))
26512 int is_mulwiden = 0;
26513 enum machine_mode inner_mode = GET_MODE (op0);
26515 if (GET_CODE (op0) == GET_CODE (op1))
26516 is_mulwiden = 1, op1 = XEXP (op1, 0);
26517 else if (CONST_INT_P (op1))
26519 if (GET_CODE (op0) == SIGN_EXTEND)
26520 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26523 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26527 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26530 *total = (cost->mult_init[MODE_INDEX (mode)]
26531 + nbits * cost->mult_bit
26532 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26541 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26542 /* ??? SSE cost should be used here. */
26543 *total = cost->fdiv;
26544 else if (X87_FLOAT_MODE_P (mode))
26545 *total = cost->fdiv;
26546 else if (FLOAT_MODE_P (mode))
26547 /* ??? SSE vector cost should be used here. */
26548 *total = cost->fdiv;
26550 *total = cost->divide[MODE_INDEX (mode)];
26554 if (GET_MODE_CLASS (mode) == MODE_INT
26555 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26557 if (GET_CODE (XEXP (x, 0)) == PLUS
26558 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26559 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26560 && CONSTANT_P (XEXP (x, 1)))
26562 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26563 if (val == 2 || val == 4 || val == 8)
26565 *total = cost->lea;
26566 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26567 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26568 outer_code, speed);
26569 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26573 else if (GET_CODE (XEXP (x, 0)) == MULT
26574 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26576 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26577 if (val == 2 || val == 4 || val == 8)
26579 *total = cost->lea;
26580 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26581 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26585 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26587 *total = cost->lea;
26588 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26589 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26590 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26597 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26599 /* ??? SSE cost should be used here. */
26600 *total = cost->fadd;
26603 else if (X87_FLOAT_MODE_P (mode))
26605 *total = cost->fadd;
26608 else if (FLOAT_MODE_P (mode))
26610 /* ??? SSE vector cost should be used here. */
26611 *total = cost->fadd;
26619 if (!TARGET_64BIT && mode == DImode)
26621 *total = (cost->add * 2
26622 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26623 << (GET_MODE (XEXP (x, 0)) != DImode))
26624 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26625 << (GET_MODE (XEXP (x, 1)) != DImode)));
26631 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26633 /* ??? SSE cost should be used here. */
26634 *total = cost->fchs;
26637 else if (X87_FLOAT_MODE_P (mode))
26639 *total = cost->fchs;
26642 else if (FLOAT_MODE_P (mode))
26644 /* ??? SSE vector cost should be used here. */
26645 *total = cost->fchs;
26651 if (!TARGET_64BIT && mode == DImode)
26652 *total = cost->add * 2;
26654 *total = cost->add;
26658 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26659 && XEXP (XEXP (x, 0), 1) == const1_rtx
26660 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26661 && XEXP (x, 1) == const0_rtx)
26663 /* This kind of construct is implemented using test[bwl].
26664 Treat it as if we had an AND. */
26665 *total = (cost->add
26666 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26667 + rtx_cost (const1_rtx, outer_code, speed));
26673 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26678 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26679 /* ??? SSE cost should be used here. */
26680 *total = cost->fabs;
26681 else if (X87_FLOAT_MODE_P (mode))
26682 *total = cost->fabs;
26683 else if (FLOAT_MODE_P (mode))
26684 /* ??? SSE vector cost should be used here. */
26685 *total = cost->fabs;
26689 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26690 /* ??? SSE cost should be used here. */
26691 *total = cost->fsqrt;
26692 else if (X87_FLOAT_MODE_P (mode))
26693 *total = cost->fsqrt;
26694 else if (FLOAT_MODE_P (mode))
26695 /* ??? SSE vector cost should be used here. */
26696 *total = cost->fsqrt;
26700 if (XINT (x, 1) == UNSPEC_TP)
26711 static int current_machopic_label_num;
26713 /* Given a symbol name and its associated stub, write out the
26714 definition of the stub. */
26717 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26719 unsigned int length;
26720 char *binder_name, *symbol_name, lazy_ptr_name[32];
26721 int label = ++current_machopic_label_num;
26723 /* For 64-bit we shouldn't get here. */
26724 gcc_assert (!TARGET_64BIT);
26726 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26727 symb = (*targetm.strip_name_encoding) (symb);
26729 length = strlen (stub);
26730 binder_name = XALLOCAVEC (char, length + 32);
26731 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26733 length = strlen (symb);
26734 symbol_name = XALLOCAVEC (char, length + 32);
26735 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26737 sprintf (lazy_ptr_name, "L%d$lz", label);
26740 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26742 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26744 fprintf (file, "%s:\n", stub);
26745 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26749 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26750 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26751 fprintf (file, "\tjmp\t*%%edx\n");
26754 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26756 fprintf (file, "%s:\n", binder_name);
26760 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26761 fprintf (file, "\tpushl\t%%eax\n");
26764 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26766 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26768 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26769 fprintf (file, "%s:\n", lazy_ptr_name);
26770 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26771 fprintf (file, "\t.long %s\n", binder_name);
26775 darwin_x86_file_end (void)
26777 darwin_file_end ();
26780 #endif /* TARGET_MACHO */
26782 /* Order the registers for register allocator. */
26785 x86_order_regs_for_local_alloc (void)
26790 /* First allocate the local general purpose registers. */
26791 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26792 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26793 reg_alloc_order [pos++] = i;
26795 /* Global general purpose registers. */
26796 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26797 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26798 reg_alloc_order [pos++] = i;
26800 /* x87 registers come first in case we are doing FP math
26802 if (!TARGET_SSE_MATH)
26803 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26804 reg_alloc_order [pos++] = i;
26806 /* SSE registers. */
26807 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26808 reg_alloc_order [pos++] = i;
26809 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26810 reg_alloc_order [pos++] = i;
26812 /* x87 registers. */
26813 if (TARGET_SSE_MATH)
26814 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26815 reg_alloc_order [pos++] = i;
26817 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26818 reg_alloc_order [pos++] = i;
26820 /* Initialize the rest of array as we do not allocate some registers
26822 while (pos < FIRST_PSEUDO_REGISTER)
26823 reg_alloc_order [pos++] = 0;
26826 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26827 struct attribute_spec.handler. */
26829 ix86_handle_abi_attribute (tree *node, tree name,
26830 tree args ATTRIBUTE_UNUSED,
26831 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26833 if (TREE_CODE (*node) != FUNCTION_TYPE
26834 && TREE_CODE (*node) != METHOD_TYPE
26835 && TREE_CODE (*node) != FIELD_DECL
26836 && TREE_CODE (*node) != TYPE_DECL)
26838 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26840 *no_add_attrs = true;
26845 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26847 *no_add_attrs = true;
26851 /* Can combine regparm with all attributes but fastcall. */
26852 if (is_attribute_p ("ms_abi", name))
26854 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26856 error ("ms_abi and sysv_abi attributes are not compatible");
26861 else if (is_attribute_p ("sysv_abi", name))
26863 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26865 error ("ms_abi and sysv_abi attributes are not compatible");
26874 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26875 struct attribute_spec.handler. */
26877 ix86_handle_struct_attribute (tree *node, tree name,
26878 tree args ATTRIBUTE_UNUSED,
26879 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26882 if (DECL_P (*node))
26884 if (TREE_CODE (*node) == TYPE_DECL)
26885 type = &TREE_TYPE (*node);
26890 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26891 || TREE_CODE (*type) == UNION_TYPE)))
26893 warning (OPT_Wattributes, "%qE attribute ignored",
26895 *no_add_attrs = true;
26898 else if ((is_attribute_p ("ms_struct", name)
26899 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26900 || ((is_attribute_p ("gcc_struct", name)
26901 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26903 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26905 *no_add_attrs = true;
26912 ix86_ms_bitfield_layout_p (const_tree record_type)
26914 return (TARGET_MS_BITFIELD_LAYOUT &&
26915 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26916 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26919 /* Returns an expression indicating where the this parameter is
26920 located on entry to the FUNCTION. */
26923 x86_this_parameter (tree function)
26925 tree type = TREE_TYPE (function);
26926 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26931 const int *parm_regs;
26933 if (ix86_function_type_abi (type) == MS_ABI)
26934 parm_regs = x86_64_ms_abi_int_parameter_registers;
26936 parm_regs = x86_64_int_parameter_registers;
26937 return gen_rtx_REG (DImode, parm_regs[aggr]);
26940 nregs = ix86_function_regparm (type, function);
26942 if (nregs > 0 && !stdarg_p (type))
26946 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26947 regno = aggr ? DX_REG : CX_REG;
26955 return gen_rtx_MEM (SImode,
26956 plus_constant (stack_pointer_rtx, 4));
26959 return gen_rtx_REG (SImode, regno);
26962 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26965 /* Determine whether x86_output_mi_thunk can succeed. */
26968 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26969 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26970 HOST_WIDE_INT vcall_offset, const_tree function)
26972 /* 64-bit can handle anything. */
26976 /* For 32-bit, everything's fine if we have one free register. */
26977 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26980 /* Need a free register for vcall_offset. */
26984 /* Need a free register for GOT references. */
26985 if (flag_pic && !(*targetm.binds_local_p) (function))
26988 /* Otherwise ok. */
26992 /* Output the assembler code for a thunk function. THUNK_DECL is the
26993 declaration for the thunk function itself, FUNCTION is the decl for
26994 the target function. DELTA is an immediate constant offset to be
26995 added to THIS. If VCALL_OFFSET is nonzero, the word at
26996 *(*this + vcall_offset) should be added to THIS. */
26999 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27000 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27001 HOST_WIDE_INT vcall_offset, tree function)
27004 rtx this_param = x86_this_parameter (function);
27007 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27008 pull it in now and let DELTA benefit. */
27009 if (REG_P (this_param))
27010 this_reg = this_param;
27011 else if (vcall_offset)
27013 /* Put the this parameter into %eax. */
27014 xops[0] = this_param;
27015 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27016 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27019 this_reg = NULL_RTX;
27021 /* Adjust the this parameter by a fixed constant. */
27024 xops[0] = GEN_INT (delta);
27025 xops[1] = this_reg ? this_reg : this_param;
27028 if (!x86_64_general_operand (xops[0], DImode))
27030 tmp = gen_rtx_REG (DImode, R10_REG);
27032 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27034 xops[1] = this_param;
27036 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27039 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27042 /* Adjust the this parameter by a value stored in the vtable. */
27046 tmp = gen_rtx_REG (DImode, R10_REG);
27049 int tmp_regno = CX_REG;
27050 if (lookup_attribute ("fastcall",
27051 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27052 tmp_regno = AX_REG;
27053 tmp = gen_rtx_REG (SImode, tmp_regno);
27056 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27058 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27060 /* Adjust the this parameter. */
27061 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27062 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27064 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27065 xops[0] = GEN_INT (vcall_offset);
27067 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27068 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27070 xops[1] = this_reg;
27071 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27074 /* If necessary, drop THIS back to its stack slot. */
27075 if (this_reg && this_reg != this_param)
27077 xops[0] = this_reg;
27078 xops[1] = this_param;
27079 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27082 xops[0] = XEXP (DECL_RTL (function), 0);
27085 if (!flag_pic || (*targetm.binds_local_p) (function))
27086 output_asm_insn ("jmp\t%P0", xops);
27087 /* All thunks should be in the same object as their target,
27088 and thus binds_local_p should be true. */
27089 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27090 gcc_unreachable ();
27093 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27094 tmp = gen_rtx_CONST (Pmode, tmp);
27095 tmp = gen_rtx_MEM (QImode, tmp);
27097 output_asm_insn ("jmp\t%A0", xops);
27102 if (!flag_pic || (*targetm.binds_local_p) (function))
27103 output_asm_insn ("jmp\t%P0", xops);
27108 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27109 tmp = (gen_rtx_SYMBOL_REF
27111 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27112 tmp = gen_rtx_MEM (QImode, tmp);
27114 output_asm_insn ("jmp\t%0", xops);
27117 #endif /* TARGET_MACHO */
27119 tmp = gen_rtx_REG (SImode, CX_REG);
27120 output_set_got (tmp, NULL_RTX);
27123 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27124 output_asm_insn ("jmp\t{*}%1", xops);
27130 x86_file_start (void)
27132 default_file_start ();
27134 darwin_file_start ();
27136 if (X86_FILE_START_VERSION_DIRECTIVE)
27137 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27138 if (X86_FILE_START_FLTUSED)
27139 fputs ("\t.global\t__fltused\n", asm_out_file);
27140 if (ix86_asm_dialect == ASM_INTEL)
27141 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27145 x86_field_alignment (tree field, int computed)
27147 enum machine_mode mode;
27148 tree type = TREE_TYPE (field);
27150 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27152 mode = TYPE_MODE (strip_array_types (type));
27153 if (mode == DFmode || mode == DCmode
27154 || GET_MODE_CLASS (mode) == MODE_INT
27155 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27156 return MIN (32, computed);
27160 /* Output assembler code to FILE to increment profiler label # LABELNO
27161 for profiling a function entry. */
27163 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27167 #ifndef NO_PROFILE_COUNTERS
27168 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27171 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27172 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27174 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27178 #ifndef NO_PROFILE_COUNTERS
27179 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27180 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27182 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27186 #ifndef NO_PROFILE_COUNTERS
27187 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27188 PROFILE_COUNT_REGISTER);
27190 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27194 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27195 /* We don't have exact information about the insn sizes, but we may assume
27196 quite safely that we are informed about all 1 byte insns and memory
27197 address sizes. This is enough to eliminate unnecessary padding in
27201 min_insn_size (rtx insn)
27205 if (!INSN_P (insn) || !active_insn_p (insn))
27208 /* Discard alignments we've emit and jump instructions. */
27209 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27210 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27213 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27214 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27217 /* Important case - calls are always 5 bytes.
27218 It is common to have many calls in the row. */
27220 && symbolic_reference_mentioned_p (PATTERN (insn))
27221 && !SIBLING_CALL_P (insn))
27223 if (get_attr_length (insn) <= 1)
27226 /* For normal instructions we may rely on the sizes of addresses
27227 and the presence of symbol to require 4 bytes of encoding.
27228 This is not the case for jumps where references are PC relative. */
27229 if (!JUMP_P (insn))
27231 l = get_attr_length_address (insn);
27232 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27241 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27245 ix86_avoid_jump_mispredicts (void)
27247 rtx insn, start = get_insns ();
27248 int nbytes = 0, njumps = 0;
27251 /* Look for all minimal intervals of instructions containing 4 jumps.
27252 The intervals are bounded by START and INSN. NBYTES is the total
27253 size of instructions in the interval including INSN and not including
27254 START. When the NBYTES is smaller than 16 bytes, it is possible
27255 that the end of START and INSN ends up in the same 16byte page.
27257 The smallest offset in the page INSN can start is the case where START
27258 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27259 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27261 for (insn = start; insn; insn = NEXT_INSN (insn))
27265 if (GET_CODE (insn) == CODE_LABEL)
27267 int align = label_to_alignment (insn);
27268 int max_skip = label_to_max_skip (insn);
27272 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27273 already in the current 16 byte page, because otherwise
27274 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27275 bytes to reach 16 byte boundary. */
27277 || (align <= 3 && max_skip != (1 << align) - 1))
27280 fprintf (dump_file, "Label %i with max_skip %i\n",
27281 INSN_UID (insn), max_skip);
27284 while (nbytes + max_skip >= 16)
27286 start = NEXT_INSN (start);
27287 if ((JUMP_P (start)
27288 && GET_CODE (PATTERN (start)) != ADDR_VEC
27289 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27291 njumps--, isjump = 1;
27294 nbytes -= min_insn_size (start);
27300 min_size = min_insn_size (insn);
27301 nbytes += min_size;
27303 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27304 INSN_UID (insn), min_size);
27306 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27307 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27315 start = NEXT_INSN (start);
27316 if ((JUMP_P (start)
27317 && GET_CODE (PATTERN (start)) != ADDR_VEC
27318 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27320 njumps--, isjump = 1;
27323 nbytes -= min_insn_size (start);
27325 gcc_assert (njumps >= 0);
27327 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27328 INSN_UID (start), INSN_UID (insn), nbytes);
27330 if (njumps == 3 && isjump && nbytes < 16)
27332 int padsize = 15 - nbytes + min_insn_size (insn);
27335 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27336 INSN_UID (insn), padsize);
27337 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27343 /* AMD Athlon works faster
27344 when RET is not destination of conditional jump or directly preceded
27345 by other jump instruction. We avoid the penalty by inserting NOP just
27346 before the RET instructions in such cases. */
27348 ix86_pad_returns (void)
27353 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27355 basic_block bb = e->src;
27356 rtx ret = BB_END (bb);
27358 bool replace = false;
27360 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27361 || optimize_bb_for_size_p (bb))
27363 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27364 if (active_insn_p (prev) || LABEL_P (prev))
27366 if (prev && LABEL_P (prev))
27371 FOR_EACH_EDGE (e, ei, bb->preds)
27372 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27373 && !(e->flags & EDGE_FALLTHRU))
27378 prev = prev_active_insn (ret);
27380 && ((JUMP_P (prev) && any_condjump_p (prev))
27383 /* Empty functions get branch mispredict even when the jump destination
27384 is not visible to us. */
27385 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27390 emit_insn_before (gen_return_internal_long (), ret);
27396 /* Implement machine specific optimizations. We implement padding of returns
27397 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27401 if (optimize && optimize_function_for_speed_p (cfun))
27403 if (TARGET_PAD_RETURNS)
27404 ix86_pad_returns ();
27405 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27406 if (TARGET_FOUR_JUMP_LIMIT)
27407 ix86_avoid_jump_mispredicts ();
27412 /* Return nonzero when QImode register that must be represented via REX prefix
27415 x86_extended_QIreg_mentioned_p (rtx insn)
27418 extract_insn_cached (insn);
27419 for (i = 0; i < recog_data.n_operands; i++)
27420 if (REG_P (recog_data.operand[i])
27421 && REGNO (recog_data.operand[i]) > BX_REG)
27426 /* Return nonzero when P points to register encoded via REX prefix.
27427 Called via for_each_rtx. */
27429 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27431 unsigned int regno;
27434 regno = REGNO (*p);
27435 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27438 /* Return true when INSN mentions register that must be encoded using REX
27441 x86_extended_reg_mentioned_p (rtx insn)
27443 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27444 extended_reg_mentioned_1, NULL);
27447 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27448 optabs would emit if we didn't have TFmode patterns. */
27451 x86_emit_floatuns (rtx operands[2])
27453 rtx neglab, donelab, i0, i1, f0, in, out;
27454 enum machine_mode mode, inmode;
27456 inmode = GET_MODE (operands[1]);
27457 gcc_assert (inmode == SImode || inmode == DImode);
27460 in = force_reg (inmode, operands[1]);
27461 mode = GET_MODE (out);
27462 neglab = gen_label_rtx ();
27463 donelab = gen_label_rtx ();
27464 f0 = gen_reg_rtx (mode);
27466 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27468 expand_float (out, in, 0);
27470 emit_jump_insn (gen_jump (donelab));
27473 emit_label (neglab);
27475 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27477 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27479 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27481 expand_float (f0, i0, 0);
27483 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27485 emit_label (donelab);
27488 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27489 with all elements equal to VAR. Return true if successful. */
27492 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27493 rtx target, rtx val)
27495 enum machine_mode hmode, smode, wsmode, wvmode;
27510 val = force_reg (GET_MODE_INNER (mode), val);
27511 x = gen_rtx_VEC_DUPLICATE (mode, val);
27512 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27518 if (TARGET_SSE || TARGET_3DNOW_A)
27520 val = gen_lowpart (SImode, val);
27521 x = gen_rtx_TRUNCATE (HImode, val);
27522 x = gen_rtx_VEC_DUPLICATE (mode, x);
27523 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27545 /* Extend HImode to SImode using a paradoxical SUBREG. */
27546 tmp1 = gen_reg_rtx (SImode);
27547 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27548 /* Insert the SImode value as low element of V4SImode vector. */
27549 tmp2 = gen_reg_rtx (V4SImode);
27550 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27551 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27552 CONST0_RTX (V4SImode),
27554 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27555 /* Cast the V4SImode vector back to a V8HImode vector. */
27556 tmp1 = gen_reg_rtx (V8HImode);
27557 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27558 /* Duplicate the low short through the whole low SImode word. */
27559 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27560 /* Cast the V8HImode vector back to a V4SImode vector. */
27561 tmp2 = gen_reg_rtx (V4SImode);
27562 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27563 /* Replicate the low element of the V4SImode vector. */
27564 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27565 /* Cast the V2SImode back to V8HImode, and store in target. */
27566 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27577 /* Extend QImode to SImode using a paradoxical SUBREG. */
27578 tmp1 = gen_reg_rtx (SImode);
27579 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27580 /* Insert the SImode value as low element of V4SImode vector. */
27581 tmp2 = gen_reg_rtx (V4SImode);
27582 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27583 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27584 CONST0_RTX (V4SImode),
27586 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27587 /* Cast the V4SImode vector back to a V16QImode vector. */
27588 tmp1 = gen_reg_rtx (V16QImode);
27589 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27590 /* Duplicate the low byte through the whole low SImode word. */
27591 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27592 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27593 /* Cast the V16QImode vector back to a V4SImode vector. */
27594 tmp2 = gen_reg_rtx (V4SImode);
27595 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27596 /* Replicate the low element of the V4SImode vector. */
27597 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27598 /* Cast the V2SImode back to V16QImode, and store in target. */
27599 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27607 /* Replicate the value once into the next wider mode and recurse. */
27608 val = convert_modes (wsmode, smode, val, true);
27609 x = expand_simple_binop (wsmode, ASHIFT, val,
27610 GEN_INT (GET_MODE_BITSIZE (smode)),
27611 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27612 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27614 x = gen_reg_rtx (wvmode);
27615 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27616 gcc_unreachable ();
27617 emit_move_insn (target, gen_lowpart (mode, x));
27640 rtx tmp = gen_reg_rtx (hmode);
27641 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27642 emit_insn (gen_rtx_SET (VOIDmode, target,
27643 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27652 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27653 whose ONE_VAR element is VAR, and other elements are zero. Return true
27657 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27658 rtx target, rtx var, int one_var)
27660 enum machine_mode vsimode;
27663 bool use_vector_set = false;
27668 /* For SSE4.1, we normally use vector set. But if the second
27669 element is zero and inter-unit moves are OK, we use movq
27671 use_vector_set = (TARGET_64BIT
27673 && !(TARGET_INTER_UNIT_MOVES
27679 use_vector_set = TARGET_SSE4_1;
27682 use_vector_set = TARGET_SSE2;
27685 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27692 use_vector_set = TARGET_AVX;
27695 /* Use ix86_expand_vector_set in 64bit mode only. */
27696 use_vector_set = TARGET_AVX && TARGET_64BIT;
27702 if (use_vector_set)
27704 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27705 var = force_reg (GET_MODE_INNER (mode), var);
27706 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27722 var = force_reg (GET_MODE_INNER (mode), var);
27723 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27724 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27729 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27730 new_target = gen_reg_rtx (mode);
27732 new_target = target;
27733 var = force_reg (GET_MODE_INNER (mode), var);
27734 x = gen_rtx_VEC_DUPLICATE (mode, var);
27735 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27736 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27739 /* We need to shuffle the value to the correct position, so
27740 create a new pseudo to store the intermediate result. */
27742 /* With SSE2, we can use the integer shuffle insns. */
27743 if (mode != V4SFmode && TARGET_SSE2)
27745 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27747 GEN_INT (one_var == 1 ? 0 : 1),
27748 GEN_INT (one_var == 2 ? 0 : 1),
27749 GEN_INT (one_var == 3 ? 0 : 1)));
27750 if (target != new_target)
27751 emit_move_insn (target, new_target);
27755 /* Otherwise convert the intermediate result to V4SFmode and
27756 use the SSE1 shuffle instructions. */
27757 if (mode != V4SFmode)
27759 tmp = gen_reg_rtx (V4SFmode);
27760 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27765 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27767 GEN_INT (one_var == 1 ? 0 : 1),
27768 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27769 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27771 if (mode != V4SFmode)
27772 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27773 else if (tmp != target)
27774 emit_move_insn (target, tmp);
27776 else if (target != new_target)
27777 emit_move_insn (target, new_target);
27782 vsimode = V4SImode;
27788 vsimode = V2SImode;
27794 /* Zero extend the variable element to SImode and recurse. */
27795 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27797 x = gen_reg_rtx (vsimode);
27798 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27800 gcc_unreachable ();
27802 emit_move_insn (target, gen_lowpart (mode, x));
27810 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27811 consisting of the values in VALS. It is known that all elements
27812 except ONE_VAR are constants. Return true if successful. */
27815 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27816 rtx target, rtx vals, int one_var)
27818 rtx var = XVECEXP (vals, 0, one_var);
27819 enum machine_mode wmode;
27822 const_vec = copy_rtx (vals);
27823 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27824 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27832 /* For the two element vectors, it's just as easy to use
27833 the general case. */
27837 /* Use ix86_expand_vector_set in 64bit mode only. */
27860 /* There's no way to set one QImode entry easily. Combine
27861 the variable value with its adjacent constant value, and
27862 promote to an HImode set. */
27863 x = XVECEXP (vals, 0, one_var ^ 1);
27866 var = convert_modes (HImode, QImode, var, true);
27867 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27868 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27869 x = GEN_INT (INTVAL (x) & 0xff);
27873 var = convert_modes (HImode, QImode, var, true);
27874 x = gen_int_mode (INTVAL (x) << 8, HImode);
27876 if (x != const0_rtx)
27877 var = expand_simple_binop (HImode, IOR, var, x, var,
27878 1, OPTAB_LIB_WIDEN);
27880 x = gen_reg_rtx (wmode);
27881 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27882 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27884 emit_move_insn (target, gen_lowpart (mode, x));
27891 emit_move_insn (target, const_vec);
27892 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27896 /* A subroutine of ix86_expand_vector_init_general. Use vector
27897 concatenate to handle the most general case: all values variable,
27898 and none identical. */
27901 ix86_expand_vector_init_concat (enum machine_mode mode,
27902 rtx target, rtx *ops, int n)
27904 enum machine_mode cmode, hmode = VOIDmode;
27905 rtx first[8], second[4];
27945 gcc_unreachable ();
27948 if (!register_operand (ops[1], cmode))
27949 ops[1] = force_reg (cmode, ops[1]);
27950 if (!register_operand (ops[0], cmode))
27951 ops[0] = force_reg (cmode, ops[0]);
27952 emit_insn (gen_rtx_SET (VOIDmode, target,
27953 gen_rtx_VEC_CONCAT (mode, ops[0],
27973 gcc_unreachable ();
27989 gcc_unreachable ();
27994 /* FIXME: We process inputs backward to help RA. PR 36222. */
27997 for (; i > 0; i -= 2, j--)
27999 first[j] = gen_reg_rtx (cmode);
28000 v = gen_rtvec (2, ops[i - 1], ops[i]);
28001 ix86_expand_vector_init (false, first[j],
28002 gen_rtx_PARALLEL (cmode, v));
28008 gcc_assert (hmode != VOIDmode);
28009 for (i = j = 0; i < n; i += 2, j++)
28011 second[j] = gen_reg_rtx (hmode);
28012 ix86_expand_vector_init_concat (hmode, second [j],
28016 ix86_expand_vector_init_concat (mode, target, second, n);
28019 ix86_expand_vector_init_concat (mode, target, first, n);
28023 gcc_unreachable ();
28027 /* A subroutine of ix86_expand_vector_init_general. Use vector
28028 interleave to handle the most general case: all values variable,
28029 and none identical. */
28032 ix86_expand_vector_init_interleave (enum machine_mode mode,
28033 rtx target, rtx *ops, int n)
28035 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28038 rtx (*gen_load_even) (rtx, rtx, rtx);
28039 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28040 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28045 gen_load_even = gen_vec_setv8hi;
28046 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28047 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28048 inner_mode = HImode;
28049 first_imode = V4SImode;
28050 second_imode = V2DImode;
28051 third_imode = VOIDmode;
28054 gen_load_even = gen_vec_setv16qi;
28055 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28056 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28057 inner_mode = QImode;
28058 first_imode = V8HImode;
28059 second_imode = V4SImode;
28060 third_imode = V2DImode;
28063 gcc_unreachable ();
28066 for (i = 0; i < n; i++)
28068 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28069 op0 = gen_reg_rtx (SImode);
28070 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28072 /* Insert the SImode value as low element of V4SImode vector. */
28073 op1 = gen_reg_rtx (V4SImode);
28074 op0 = gen_rtx_VEC_MERGE (V4SImode,
28075 gen_rtx_VEC_DUPLICATE (V4SImode,
28077 CONST0_RTX (V4SImode),
28079 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28081 /* Cast the V4SImode vector back to a vector in orignal mode. */
28082 op0 = gen_reg_rtx (mode);
28083 emit_move_insn (op0, gen_lowpart (mode, op1));
28085 /* Load even elements into the second positon. */
28086 emit_insn ((*gen_load_even) (op0,
28087 force_reg (inner_mode,
28091 /* Cast vector to FIRST_IMODE vector. */
28092 ops[i] = gen_reg_rtx (first_imode);
28093 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28096 /* Interleave low FIRST_IMODE vectors. */
28097 for (i = j = 0; i < n; i += 2, j++)
28099 op0 = gen_reg_rtx (first_imode);
28100 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28102 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28103 ops[j] = gen_reg_rtx (second_imode);
28104 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28107 /* Interleave low SECOND_IMODE vectors. */
28108 switch (second_imode)
28111 for (i = j = 0; i < n / 2; i += 2, j++)
28113 op0 = gen_reg_rtx (second_imode);
28114 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28117 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28119 ops[j] = gen_reg_rtx (third_imode);
28120 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28122 second_imode = V2DImode;
28123 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28127 op0 = gen_reg_rtx (second_imode);
28128 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28131 /* Cast the SECOND_IMODE vector back to a vector on original
28133 emit_insn (gen_rtx_SET (VOIDmode, target,
28134 gen_lowpart (mode, op0)));
28138 gcc_unreachable ();
28142 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28143 all values variable, and none identical. */
28146 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28147 rtx target, rtx vals)
28149 rtx ops[32], op0, op1;
28150 enum machine_mode half_mode = VOIDmode;
28157 if (!mmx_ok && !TARGET_SSE)
28169 n = GET_MODE_NUNITS (mode);
28170 for (i = 0; i < n; i++)
28171 ops[i] = XVECEXP (vals, 0, i);
28172 ix86_expand_vector_init_concat (mode, target, ops, n);
28176 half_mode = V16QImode;
28180 half_mode = V8HImode;
28184 n = GET_MODE_NUNITS (mode);
28185 for (i = 0; i < n; i++)
28186 ops[i] = XVECEXP (vals, 0, i);
28187 op0 = gen_reg_rtx (half_mode);
28188 op1 = gen_reg_rtx (half_mode);
28189 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28191 ix86_expand_vector_init_interleave (half_mode, op1,
28192 &ops [n >> 1], n >> 2);
28193 emit_insn (gen_rtx_SET (VOIDmode, target,
28194 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28198 if (!TARGET_SSE4_1)
28206 /* Don't use ix86_expand_vector_init_interleave if we can't
28207 move from GPR to SSE register directly. */
28208 if (!TARGET_INTER_UNIT_MOVES)
28211 n = GET_MODE_NUNITS (mode);
28212 for (i = 0; i < n; i++)
28213 ops[i] = XVECEXP (vals, 0, i);
28214 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28222 gcc_unreachable ();
28226 int i, j, n_elts, n_words, n_elt_per_word;
28227 enum machine_mode inner_mode;
28228 rtx words[4], shift;
28230 inner_mode = GET_MODE_INNER (mode);
28231 n_elts = GET_MODE_NUNITS (mode);
28232 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28233 n_elt_per_word = n_elts / n_words;
28234 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28236 for (i = 0; i < n_words; ++i)
28238 rtx word = NULL_RTX;
28240 for (j = 0; j < n_elt_per_word; ++j)
28242 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28243 elt = convert_modes (word_mode, inner_mode, elt, true);
28249 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28250 word, 1, OPTAB_LIB_WIDEN);
28251 word = expand_simple_binop (word_mode, IOR, word, elt,
28252 word, 1, OPTAB_LIB_WIDEN);
28260 emit_move_insn (target, gen_lowpart (mode, words[0]));
28261 else if (n_words == 2)
28263 rtx tmp = gen_reg_rtx (mode);
28264 emit_clobber (tmp);
28265 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28266 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28267 emit_move_insn (target, tmp);
28269 else if (n_words == 4)
28271 rtx tmp = gen_reg_rtx (V4SImode);
28272 gcc_assert (word_mode == SImode);
28273 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28274 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28275 emit_move_insn (target, gen_lowpart (mode, tmp));
28278 gcc_unreachable ();
28282 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28283 instructions unless MMX_OK is true. */
28286 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28288 enum machine_mode mode = GET_MODE (target);
28289 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28290 int n_elts = GET_MODE_NUNITS (mode);
28291 int n_var = 0, one_var = -1;
28292 bool all_same = true, all_const_zero = true;
28296 for (i = 0; i < n_elts; ++i)
28298 x = XVECEXP (vals, 0, i);
28299 if (!(CONST_INT_P (x)
28300 || GET_CODE (x) == CONST_DOUBLE
28301 || GET_CODE (x) == CONST_FIXED))
28302 n_var++, one_var = i;
28303 else if (x != CONST0_RTX (inner_mode))
28304 all_const_zero = false;
28305 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28309 /* Constants are best loaded from the constant pool. */
28312 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28316 /* If all values are identical, broadcast the value. */
28318 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28319 XVECEXP (vals, 0, 0)))
28322 /* Values where only one field is non-constant are best loaded from
28323 the pool and overwritten via move later. */
28327 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28328 XVECEXP (vals, 0, one_var),
28332 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28336 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28340 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28342 enum machine_mode mode = GET_MODE (target);
28343 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28344 enum machine_mode half_mode;
28345 bool use_vec_merge = false;
28347 static rtx (*gen_extract[6][2]) (rtx, rtx)
28349 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28350 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28351 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28352 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28353 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28354 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28356 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28358 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28359 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28360 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28361 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28362 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28363 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28373 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28374 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28376 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28378 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28379 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28385 use_vec_merge = TARGET_SSE4_1;
28393 /* For the two element vectors, we implement a VEC_CONCAT with
28394 the extraction of the other element. */
28396 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28397 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28400 op0 = val, op1 = tmp;
28402 op0 = tmp, op1 = val;
28404 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28405 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28410 use_vec_merge = TARGET_SSE4_1;
28417 use_vec_merge = true;
28421 /* tmp = target = A B C D */
28422 tmp = copy_to_reg (target);
28423 /* target = A A B B */
28424 emit_insn (gen_sse_unpcklps (target, target, target));
28425 /* target = X A B B */
28426 ix86_expand_vector_set (false, target, val, 0);
28427 /* target = A X C D */
28428 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28429 GEN_INT (1), GEN_INT (0),
28430 GEN_INT (2+4), GEN_INT (3+4)));
28434 /* tmp = target = A B C D */
28435 tmp = copy_to_reg (target);
28436 /* tmp = X B C D */
28437 ix86_expand_vector_set (false, tmp, val, 0);
28438 /* target = A B X D */
28439 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28440 GEN_INT (0), GEN_INT (1),
28441 GEN_INT (0+4), GEN_INT (3+4)));
28445 /* tmp = target = A B C D */
28446 tmp = copy_to_reg (target);
28447 /* tmp = X B C D */
28448 ix86_expand_vector_set (false, tmp, val, 0);
28449 /* target = A B X D */
28450 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28451 GEN_INT (0), GEN_INT (1),
28452 GEN_INT (2+4), GEN_INT (0+4)));
28456 gcc_unreachable ();
28461 use_vec_merge = TARGET_SSE4_1;
28465 /* Element 0 handled by vec_merge below. */
28468 use_vec_merge = true;
28474 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28475 store into element 0, then shuffle them back. */
28479 order[0] = GEN_INT (elt);
28480 order[1] = const1_rtx;
28481 order[2] = const2_rtx;
28482 order[3] = GEN_INT (3);
28483 order[elt] = const0_rtx;
28485 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28486 order[1], order[2], order[3]));
28488 ix86_expand_vector_set (false, target, val, 0);
28490 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28491 order[1], order[2], order[3]));
28495 /* For SSE1, we have to reuse the V4SF code. */
28496 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28497 gen_lowpart (SFmode, val), elt);
28502 use_vec_merge = TARGET_SSE2;
28505 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28509 use_vec_merge = TARGET_SSE4_1;
28516 half_mode = V16QImode;
28522 half_mode = V8HImode;
28528 half_mode = V4SImode;
28534 half_mode = V2DImode;
28540 half_mode = V4SFmode;
28546 half_mode = V2DFmode;
28552 /* Compute offset. */
28556 gcc_assert (i <= 1);
28558 /* Extract the half. */
28559 tmp = gen_reg_rtx (half_mode);
28560 emit_insn ((*gen_extract[j][i]) (tmp, target));
28562 /* Put val in tmp at elt. */
28563 ix86_expand_vector_set (false, tmp, val, elt);
28566 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28575 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28576 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28577 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28581 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28583 emit_move_insn (mem, target);
28585 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28586 emit_move_insn (tmp, val);
28588 emit_move_insn (target, mem);
28593 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28595 enum machine_mode mode = GET_MODE (vec);
28596 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28597 bool use_vec_extr = false;
28610 use_vec_extr = true;
28614 use_vec_extr = TARGET_SSE4_1;
28626 tmp = gen_reg_rtx (mode);
28627 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28628 GEN_INT (elt), GEN_INT (elt),
28629 GEN_INT (elt+4), GEN_INT (elt+4)));
28633 tmp = gen_reg_rtx (mode);
28634 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28638 gcc_unreachable ();
28641 use_vec_extr = true;
28646 use_vec_extr = TARGET_SSE4_1;
28660 tmp = gen_reg_rtx (mode);
28661 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28662 GEN_INT (elt), GEN_INT (elt),
28663 GEN_INT (elt), GEN_INT (elt)));
28667 tmp = gen_reg_rtx (mode);
28668 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28672 gcc_unreachable ();
28675 use_vec_extr = true;
28680 /* For SSE1, we have to reuse the V4SF code. */
28681 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28682 gen_lowpart (V4SFmode, vec), elt);
28688 use_vec_extr = TARGET_SSE2;
28691 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28695 use_vec_extr = TARGET_SSE4_1;
28699 /* ??? Could extract the appropriate HImode element and shift. */
28706 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28707 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28709 /* Let the rtl optimizers know about the zero extension performed. */
28710 if (inner_mode == QImode || inner_mode == HImode)
28712 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28713 target = gen_lowpart (SImode, target);
28716 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28720 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28722 emit_move_insn (mem, vec);
28724 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28725 emit_move_insn (target, tmp);
28729 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28730 pattern to reduce; DEST is the destination; IN is the input vector. */
28733 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28735 rtx tmp1, tmp2, tmp3;
28737 tmp1 = gen_reg_rtx (V4SFmode);
28738 tmp2 = gen_reg_rtx (V4SFmode);
28739 tmp3 = gen_reg_rtx (V4SFmode);
28741 emit_insn (gen_sse_movhlps (tmp1, in, in));
28742 emit_insn (fn (tmp2, tmp1, in));
28744 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28745 GEN_INT (1), GEN_INT (1),
28746 GEN_INT (1+4), GEN_INT (1+4)));
28747 emit_insn (fn (dest, tmp2, tmp3));
28750 /* Target hook for scalar_mode_supported_p. */
28752 ix86_scalar_mode_supported_p (enum machine_mode mode)
28754 if (DECIMAL_FLOAT_MODE_P (mode))
28756 else if (mode == TFmode)
28759 return default_scalar_mode_supported_p (mode);
28762 /* Implements target hook vector_mode_supported_p. */
28764 ix86_vector_mode_supported_p (enum machine_mode mode)
28766 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28768 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28770 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28772 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28774 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28779 /* Target hook for c_mode_for_suffix. */
28780 static enum machine_mode
28781 ix86_c_mode_for_suffix (char suffix)
28791 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28793 We do this in the new i386 backend to maintain source compatibility
28794 with the old cc0-based compiler. */
28797 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28798 tree inputs ATTRIBUTE_UNUSED,
28801 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28803 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28808 /* Implements target vector targetm.asm.encode_section_info. This
28809 is not used by netware. */
28811 static void ATTRIBUTE_UNUSED
28812 ix86_encode_section_info (tree decl, rtx rtl, int first)
28814 default_encode_section_info (decl, rtl, first);
28816 if (TREE_CODE (decl) == VAR_DECL
28817 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28818 && ix86_in_large_data_p (decl))
28819 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28822 /* Worker function for REVERSE_CONDITION. */
28825 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28827 return (mode != CCFPmode && mode != CCFPUmode
28828 ? reverse_condition (code)
28829 : reverse_condition_maybe_unordered (code));
28832 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28836 output_387_reg_move (rtx insn, rtx *operands)
28838 if (REG_P (operands[0]))
28840 if (REG_P (operands[1])
28841 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28843 if (REGNO (operands[0]) == FIRST_STACK_REG)
28844 return output_387_ffreep (operands, 0);
28845 return "fstp\t%y0";
28847 if (STACK_TOP_P (operands[0]))
28848 return "fld%Z1\t%y1";
28851 else if (MEM_P (operands[0]))
28853 gcc_assert (REG_P (operands[1]));
28854 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28855 return "fstp%Z0\t%y0";
28858 /* There is no non-popping store to memory for XFmode.
28859 So if we need one, follow the store with a load. */
28860 if (GET_MODE (operands[0]) == XFmode)
28861 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28863 return "fst%Z0\t%y0";
28870 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28871 FP status register is set. */
28874 ix86_emit_fp_unordered_jump (rtx label)
28876 rtx reg = gen_reg_rtx (HImode);
28879 emit_insn (gen_x86_fnstsw_1 (reg));
28881 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28883 emit_insn (gen_x86_sahf_1 (reg));
28885 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28886 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28890 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28892 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28893 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28896 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28897 gen_rtx_LABEL_REF (VOIDmode, label),
28899 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28901 emit_jump_insn (temp);
28902 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28905 /* Output code to perform a log1p XFmode calculation. */
28907 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28909 rtx label1 = gen_label_rtx ();
28910 rtx label2 = gen_label_rtx ();
28912 rtx tmp = gen_reg_rtx (XFmode);
28913 rtx tmp2 = gen_reg_rtx (XFmode);
28916 emit_insn (gen_absxf2 (tmp, op1));
28917 test = gen_rtx_GE (VOIDmode, tmp,
28918 CONST_DOUBLE_FROM_REAL_VALUE (
28919 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28921 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
28923 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28924 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28925 emit_jump (label2);
28927 emit_label (label1);
28928 emit_move_insn (tmp, CONST1_RTX (XFmode));
28929 emit_insn (gen_addxf3 (tmp, op1, tmp));
28930 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28931 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28933 emit_label (label2);
28936 /* Output code to perform a Newton-Rhapson approximation of a single precision
28937 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28939 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28941 rtx x0, x1, e0, e1, two;
28943 x0 = gen_reg_rtx (mode);
28944 e0 = gen_reg_rtx (mode);
28945 e1 = gen_reg_rtx (mode);
28946 x1 = gen_reg_rtx (mode);
28948 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28950 if (VECTOR_MODE_P (mode))
28951 two = ix86_build_const_vector (SFmode, true, two);
28953 two = force_reg (mode, two);
28955 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28957 /* x0 = rcp(b) estimate */
28958 emit_insn (gen_rtx_SET (VOIDmode, x0,
28959 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28962 emit_insn (gen_rtx_SET (VOIDmode, e0,
28963 gen_rtx_MULT (mode, x0, b)));
28965 emit_insn (gen_rtx_SET (VOIDmode, e1,
28966 gen_rtx_MINUS (mode, two, e0)));
28968 emit_insn (gen_rtx_SET (VOIDmode, x1,
28969 gen_rtx_MULT (mode, x0, e1)));
28971 emit_insn (gen_rtx_SET (VOIDmode, res,
28972 gen_rtx_MULT (mode, a, x1)));
28975 /* Output code to perform a Newton-Rhapson approximation of a
28976 single precision floating point [reciprocal] square root. */
28978 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28981 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28984 x0 = gen_reg_rtx (mode);
28985 e0 = gen_reg_rtx (mode);
28986 e1 = gen_reg_rtx (mode);
28987 e2 = gen_reg_rtx (mode);
28988 e3 = gen_reg_rtx (mode);
28990 real_from_integer (&r, VOIDmode, -3, -1, 0);
28991 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28993 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28994 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28996 if (VECTOR_MODE_P (mode))
28998 mthree = ix86_build_const_vector (SFmode, true, mthree);
28999 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29002 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29003 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29005 /* x0 = rsqrt(a) estimate */
29006 emit_insn (gen_rtx_SET (VOIDmode, x0,
29007 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29010 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29015 zero = gen_reg_rtx (mode);
29016 mask = gen_reg_rtx (mode);
29018 zero = force_reg (mode, CONST0_RTX(mode));
29019 emit_insn (gen_rtx_SET (VOIDmode, mask,
29020 gen_rtx_NE (mode, zero, a)));
29022 emit_insn (gen_rtx_SET (VOIDmode, x0,
29023 gen_rtx_AND (mode, x0, mask)));
29027 emit_insn (gen_rtx_SET (VOIDmode, e0,
29028 gen_rtx_MULT (mode, x0, a)));
29030 emit_insn (gen_rtx_SET (VOIDmode, e1,
29031 gen_rtx_MULT (mode, e0, x0)));
29034 mthree = force_reg (mode, mthree);
29035 emit_insn (gen_rtx_SET (VOIDmode, e2,
29036 gen_rtx_PLUS (mode, e1, mthree)));
29038 mhalf = force_reg (mode, mhalf);
29040 /* e3 = -.5 * x0 */
29041 emit_insn (gen_rtx_SET (VOIDmode, e3,
29042 gen_rtx_MULT (mode, x0, mhalf)));
29044 /* e3 = -.5 * e0 */
29045 emit_insn (gen_rtx_SET (VOIDmode, e3,
29046 gen_rtx_MULT (mode, e0, mhalf)));
29047 /* ret = e2 * e3 */
29048 emit_insn (gen_rtx_SET (VOIDmode, res,
29049 gen_rtx_MULT (mode, e2, e3)));
29052 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29054 static void ATTRIBUTE_UNUSED
29055 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29058 /* With Binutils 2.15, the "@unwind" marker must be specified on
29059 every occurrence of the ".eh_frame" section, not just the first
29062 && strcmp (name, ".eh_frame") == 0)
29064 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29065 flags & SECTION_WRITE ? "aw" : "a");
29068 default_elf_asm_named_section (name, flags, decl);
29071 /* Return the mangling of TYPE if it is an extended fundamental type. */
29073 static const char *
29074 ix86_mangle_type (const_tree type)
29076 type = TYPE_MAIN_VARIANT (type);
29078 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29079 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29082 switch (TYPE_MODE (type))
29085 /* __float128 is "g". */
29088 /* "long double" or __float80 is "e". */
29095 /* For 32-bit code we can save PIC register setup by using
29096 __stack_chk_fail_local hidden function instead of calling
29097 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29098 register, so it is better to call __stack_chk_fail directly. */
29101 ix86_stack_protect_fail (void)
29103 return TARGET_64BIT
29104 ? default_external_stack_protect_fail ()
29105 : default_hidden_stack_protect_fail ();
29108 /* Select a format to encode pointers in exception handling data. CODE
29109 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29110 true if the symbol may be affected by dynamic relocations.
29112 ??? All x86 object file formats are capable of representing this.
29113 After all, the relocation needed is the same as for the call insn.
29114 Whether or not a particular assembler allows us to enter such, I
29115 guess we'll have to see. */
29117 asm_preferred_eh_data_format (int code, int global)
29121 int type = DW_EH_PE_sdata8;
29123 || ix86_cmodel == CM_SMALL_PIC
29124 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29125 type = DW_EH_PE_sdata4;
29126 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29128 if (ix86_cmodel == CM_SMALL
29129 || (ix86_cmodel == CM_MEDIUM && code))
29130 return DW_EH_PE_udata4;
29131 return DW_EH_PE_absptr;
29134 /* Expand copysign from SIGN to the positive value ABS_VALUE
29135 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29138 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29140 enum machine_mode mode = GET_MODE (sign);
29141 rtx sgn = gen_reg_rtx (mode);
29142 if (mask == NULL_RTX)
29144 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29145 if (!VECTOR_MODE_P (mode))
29147 /* We need to generate a scalar mode mask in this case. */
29148 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29149 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29150 mask = gen_reg_rtx (mode);
29151 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29155 mask = gen_rtx_NOT (mode, mask);
29156 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29157 gen_rtx_AND (mode, mask, sign)));
29158 emit_insn (gen_rtx_SET (VOIDmode, result,
29159 gen_rtx_IOR (mode, abs_value, sgn)));
29162 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29163 mask for masking out the sign-bit is stored in *SMASK, if that is
29166 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29168 enum machine_mode mode = GET_MODE (op0);
29171 xa = gen_reg_rtx (mode);
29172 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29173 if (!VECTOR_MODE_P (mode))
29175 /* We need to generate a scalar mode mask in this case. */
29176 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29177 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29178 mask = gen_reg_rtx (mode);
29179 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29181 emit_insn (gen_rtx_SET (VOIDmode, xa,
29182 gen_rtx_AND (mode, op0, mask)));
29190 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29191 swapping the operands if SWAP_OPERANDS is true. The expanded
29192 code is a forward jump to a newly created label in case the
29193 comparison is true. The generated label rtx is returned. */
29195 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29196 bool swap_operands)
29207 label = gen_label_rtx ();
29208 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29209 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29210 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29211 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29212 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29213 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29214 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29215 JUMP_LABEL (tmp) = label;
29220 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29221 using comparison code CODE. Operands are swapped for the comparison if
29222 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29224 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29225 bool swap_operands)
29227 enum machine_mode mode = GET_MODE (op0);
29228 rtx mask = gen_reg_rtx (mode);
29237 if (mode == DFmode)
29238 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29239 gen_rtx_fmt_ee (code, mode, op0, op1)));
29241 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29242 gen_rtx_fmt_ee (code, mode, op0, op1)));
29247 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29248 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29250 ix86_gen_TWO52 (enum machine_mode mode)
29252 REAL_VALUE_TYPE TWO52r;
29255 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29256 TWO52 = const_double_from_real_value (TWO52r, mode);
29257 TWO52 = force_reg (mode, TWO52);
29262 /* Expand SSE sequence for computing lround from OP1 storing
29265 ix86_expand_lround (rtx op0, rtx op1)
29267 /* C code for the stuff we're doing below:
29268 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29271 enum machine_mode mode = GET_MODE (op1);
29272 const struct real_format *fmt;
29273 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29276 /* load nextafter (0.5, 0.0) */
29277 fmt = REAL_MODE_FORMAT (mode);
29278 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29279 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29281 /* adj = copysign (0.5, op1) */
29282 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29283 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29285 /* adj = op1 + adj */
29286 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29288 /* op0 = (imode)adj */
29289 expand_fix (op0, adj, 0);
29292 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29295 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29297 /* C code for the stuff we're doing below (for do_floor):
29299 xi -= (double)xi > op1 ? 1 : 0;
29302 enum machine_mode fmode = GET_MODE (op1);
29303 enum machine_mode imode = GET_MODE (op0);
29304 rtx ireg, freg, label, tmp;
29306 /* reg = (long)op1 */
29307 ireg = gen_reg_rtx (imode);
29308 expand_fix (ireg, op1, 0);
29310 /* freg = (double)reg */
29311 freg = gen_reg_rtx (fmode);
29312 expand_float (freg, ireg, 0);
29314 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29315 label = ix86_expand_sse_compare_and_jump (UNLE,
29316 freg, op1, !do_floor);
29317 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29318 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29319 emit_move_insn (ireg, tmp);
29321 emit_label (label);
29322 LABEL_NUSES (label) = 1;
29324 emit_move_insn (op0, ireg);
29327 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29328 result in OPERAND0. */
29330 ix86_expand_rint (rtx operand0, rtx operand1)
29332 /* C code for the stuff we're doing below:
29333 xa = fabs (operand1);
29334 if (!isless (xa, 2**52))
29336 xa = xa + 2**52 - 2**52;
29337 return copysign (xa, operand1);
29339 enum machine_mode mode = GET_MODE (operand0);
29340 rtx res, xa, label, TWO52, mask;
29342 res = gen_reg_rtx (mode);
29343 emit_move_insn (res, operand1);
29345 /* xa = abs (operand1) */
29346 xa = ix86_expand_sse_fabs (res, &mask);
29348 /* if (!isless (xa, TWO52)) goto label; */
29349 TWO52 = ix86_gen_TWO52 (mode);
29350 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29352 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29353 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29355 ix86_sse_copysign_to_positive (res, xa, res, mask);
29357 emit_label (label);
29358 LABEL_NUSES (label) = 1;
29360 emit_move_insn (operand0, res);
29363 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29366 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29368 /* C code for the stuff we expand below.
29369 double xa = fabs (x), x2;
29370 if (!isless (xa, TWO52))
29372 xa = xa + TWO52 - TWO52;
29373 x2 = copysign (xa, x);
29382 enum machine_mode mode = GET_MODE (operand0);
29383 rtx xa, TWO52, tmp, label, one, res, mask;
29385 TWO52 = ix86_gen_TWO52 (mode);
29387 /* Temporary for holding the result, initialized to the input
29388 operand to ease control flow. */
29389 res = gen_reg_rtx (mode);
29390 emit_move_insn (res, operand1);
29392 /* xa = abs (operand1) */
29393 xa = ix86_expand_sse_fabs (res, &mask);
29395 /* if (!isless (xa, TWO52)) goto label; */
29396 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29398 /* xa = xa + TWO52 - TWO52; */
29399 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29400 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29402 /* xa = copysign (xa, operand1) */
29403 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29405 /* generate 1.0 or -1.0 */
29406 one = force_reg (mode,
29407 const_double_from_real_value (do_floor
29408 ? dconst1 : dconstm1, mode));
29410 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29411 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29412 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29413 gen_rtx_AND (mode, one, tmp)));
29414 /* We always need to subtract here to preserve signed zero. */
29415 tmp = expand_simple_binop (mode, MINUS,
29416 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29417 emit_move_insn (res, tmp);
29419 emit_label (label);
29420 LABEL_NUSES (label) = 1;
29422 emit_move_insn (operand0, res);
29425 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29428 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29430 /* C code for the stuff we expand below.
29431 double xa = fabs (x), x2;
29432 if (!isless (xa, TWO52))
29434 x2 = (double)(long)x;
29441 if (HONOR_SIGNED_ZEROS (mode))
29442 return copysign (x2, x);
29445 enum machine_mode mode = GET_MODE (operand0);
29446 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29448 TWO52 = ix86_gen_TWO52 (mode);
29450 /* Temporary for holding the result, initialized to the input
29451 operand to ease control flow. */
29452 res = gen_reg_rtx (mode);
29453 emit_move_insn (res, operand1);
29455 /* xa = abs (operand1) */
29456 xa = ix86_expand_sse_fabs (res, &mask);
29458 /* if (!isless (xa, TWO52)) goto label; */
29459 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29461 /* xa = (double)(long)x */
29462 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29463 expand_fix (xi, res, 0);
29464 expand_float (xa, xi, 0);
29467 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29469 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29470 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29471 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29472 gen_rtx_AND (mode, one, tmp)));
29473 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29474 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29475 emit_move_insn (res, tmp);
29477 if (HONOR_SIGNED_ZEROS (mode))
29478 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29480 emit_label (label);
29481 LABEL_NUSES (label) = 1;
29483 emit_move_insn (operand0, res);
29486 /* Expand SSE sequence for computing round from OPERAND1 storing
29487 into OPERAND0. Sequence that works without relying on DImode truncation
29488 via cvttsd2siq that is only available on 64bit targets. */
29490 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29492 /* C code for the stuff we expand below.
29493 double xa = fabs (x), xa2, x2;
29494 if (!isless (xa, TWO52))
29496 Using the absolute value and copying back sign makes
29497 -0.0 -> -0.0 correct.
29498 xa2 = xa + TWO52 - TWO52;
29503 else if (dxa > 0.5)
29505 x2 = copysign (xa2, x);
29508 enum machine_mode mode = GET_MODE (operand0);
29509 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29511 TWO52 = ix86_gen_TWO52 (mode);
29513 /* Temporary for holding the result, initialized to the input
29514 operand to ease control flow. */
29515 res = gen_reg_rtx (mode);
29516 emit_move_insn (res, operand1);
29518 /* xa = abs (operand1) */
29519 xa = ix86_expand_sse_fabs (res, &mask);
29521 /* if (!isless (xa, TWO52)) goto label; */
29522 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29524 /* xa2 = xa + TWO52 - TWO52; */
29525 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29526 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29528 /* dxa = xa2 - xa; */
29529 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29531 /* generate 0.5, 1.0 and -0.5 */
29532 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29533 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29534 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29538 tmp = gen_reg_rtx (mode);
29539 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29540 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29541 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29542 gen_rtx_AND (mode, one, tmp)));
29543 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29544 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29545 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29546 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29547 gen_rtx_AND (mode, one, tmp)));
29548 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29550 /* res = copysign (xa2, operand1) */
29551 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29553 emit_label (label);
29554 LABEL_NUSES (label) = 1;
29556 emit_move_insn (operand0, res);
29559 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29562 ix86_expand_trunc (rtx operand0, rtx operand1)
29564 /* C code for SSE variant we expand below.
29565 double xa = fabs (x), x2;
29566 if (!isless (xa, TWO52))
29568 x2 = (double)(long)x;
29569 if (HONOR_SIGNED_ZEROS (mode))
29570 return copysign (x2, x);
29573 enum machine_mode mode = GET_MODE (operand0);
29574 rtx xa, xi, TWO52, label, res, mask;
29576 TWO52 = ix86_gen_TWO52 (mode);
29578 /* Temporary for holding the result, initialized to the input
29579 operand to ease control flow. */
29580 res = gen_reg_rtx (mode);
29581 emit_move_insn (res, operand1);
29583 /* xa = abs (operand1) */
29584 xa = ix86_expand_sse_fabs (res, &mask);
29586 /* if (!isless (xa, TWO52)) goto label; */
29587 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29589 /* x = (double)(long)x */
29590 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29591 expand_fix (xi, res, 0);
29592 expand_float (res, xi, 0);
29594 if (HONOR_SIGNED_ZEROS (mode))
29595 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29597 emit_label (label);
29598 LABEL_NUSES (label) = 1;
29600 emit_move_insn (operand0, res);
29603 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29606 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29608 enum machine_mode mode = GET_MODE (operand0);
29609 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29611 /* C code for SSE variant we expand below.
29612 double xa = fabs (x), x2;
29613 if (!isless (xa, TWO52))
29615 xa2 = xa + TWO52 - TWO52;
29619 x2 = copysign (xa2, x);
29623 TWO52 = ix86_gen_TWO52 (mode);
29625 /* Temporary for holding the result, initialized to the input
29626 operand to ease control flow. */
29627 res = gen_reg_rtx (mode);
29628 emit_move_insn (res, operand1);
29630 /* xa = abs (operand1) */
29631 xa = ix86_expand_sse_fabs (res, &smask);
29633 /* if (!isless (xa, TWO52)) goto label; */
29634 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29636 /* res = xa + TWO52 - TWO52; */
29637 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29638 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29639 emit_move_insn (res, tmp);
29642 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29644 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29645 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29646 emit_insn (gen_rtx_SET (VOIDmode, mask,
29647 gen_rtx_AND (mode, mask, one)));
29648 tmp = expand_simple_binop (mode, MINUS,
29649 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29650 emit_move_insn (res, tmp);
29652 /* res = copysign (res, operand1) */
29653 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29655 emit_label (label);
29656 LABEL_NUSES (label) = 1;
29658 emit_move_insn (operand0, res);
29661 /* Expand SSE sequence for computing round from OPERAND1 storing
29664 ix86_expand_round (rtx operand0, rtx operand1)
29666 /* C code for the stuff we're doing below:
29667 double xa = fabs (x);
29668 if (!isless (xa, TWO52))
29670 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29671 return copysign (xa, x);
29673 enum machine_mode mode = GET_MODE (operand0);
29674 rtx res, TWO52, xa, label, xi, half, mask;
29675 const struct real_format *fmt;
29676 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29678 /* Temporary for holding the result, initialized to the input
29679 operand to ease control flow. */
29680 res = gen_reg_rtx (mode);
29681 emit_move_insn (res, operand1);
29683 TWO52 = ix86_gen_TWO52 (mode);
29684 xa = ix86_expand_sse_fabs (res, &mask);
29685 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29687 /* load nextafter (0.5, 0.0) */
29688 fmt = REAL_MODE_FORMAT (mode);
29689 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29690 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29692 /* xa = xa + 0.5 */
29693 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29694 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29696 /* xa = (double)(int64_t)xa */
29697 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29698 expand_fix (xi, xa, 0);
29699 expand_float (xa, xi, 0);
29701 /* res = copysign (xa, operand1) */
29702 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29704 emit_label (label);
29705 LABEL_NUSES (label) = 1;
29707 emit_move_insn (operand0, res);
29711 /* Validate whether a SSE5 instruction is valid or not.
29712 OPERANDS is the array of operands.
29713 NUM is the number of operands.
29714 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29715 NUM_MEMORY is the maximum number of memory operands to accept.
29716 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29719 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29720 bool uses_oc0, int num_memory, bool commutative)
29726 /* Count the number of memory arguments */
29729 for (i = 0; i < num; i++)
29731 enum machine_mode mode = GET_MODE (operands[i]);
29732 if (register_operand (operands[i], mode))
29735 else if (memory_operand (operands[i], mode))
29737 mem_mask |= (1 << i);
29743 rtx pattern = PATTERN (insn);
29745 /* allow 0 for pcmov */
29746 if (GET_CODE (pattern) != SET
29747 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29749 || operands[i] != CONST0_RTX (mode))
29754 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29755 a memory operation. */
29756 if (num_memory < 0)
29758 num_memory = -num_memory;
29759 if ((mem_mask & (1 << (num-1))) != 0)
29761 mem_mask &= ~(1 << (num-1));
29766 /* If there were no memory operations, allow the insn */
29770 /* Do not allow the destination register to be a memory operand. */
29771 else if (mem_mask & (1 << 0))
29774 /* If there are too many memory operations, disallow the instruction. While
29775 the hardware only allows 1 memory reference, before register allocation
29776 for some insns, we allow two memory operations sometimes in order to allow
29777 code like the following to be optimized:
29779 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29781 or similar cases that are vectorized into using the fmaddss
29783 else if (mem_count > num_memory)
29786 /* Don't allow more than one memory operation if not optimizing. */
29787 else if (mem_count > 1 && !optimize)
29790 else if (num == 4 && mem_count == 1)
29792 /* formats (destination is the first argument), example fmaddss:
29793 xmm1, xmm1, xmm2, xmm3/mem
29794 xmm1, xmm1, xmm2/mem, xmm3
29795 xmm1, xmm2, xmm3/mem, xmm1
29796 xmm1, xmm2/mem, xmm3, xmm1 */
29798 return ((mem_mask == (1 << 1))
29799 || (mem_mask == (1 << 2))
29800 || (mem_mask == (1 << 3)));
29802 /* format, example pmacsdd:
29803 xmm1, xmm2, xmm3/mem, xmm1 */
29805 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29807 return (mem_mask == (1 << 2));
29810 else if (num == 4 && num_memory == 2)
29812 /* If there are two memory operations, we can load one of the memory ops
29813 into the destination register. This is for optimizing the
29814 multiply/add ops, which the combiner has optimized both the multiply
29815 and the add insns to have a memory operation. We have to be careful
29816 that the destination doesn't overlap with the inputs. */
29817 rtx op0 = operands[0];
29819 if (reg_mentioned_p (op0, operands[1])
29820 || reg_mentioned_p (op0, operands[2])
29821 || reg_mentioned_p (op0, operands[3]))
29824 /* formats (destination is the first argument), example fmaddss:
29825 xmm1, xmm1, xmm2, xmm3/mem
29826 xmm1, xmm1, xmm2/mem, xmm3
29827 xmm1, xmm2, xmm3/mem, xmm1
29828 xmm1, xmm2/mem, xmm3, xmm1
29830 For the oc0 case, we will load either operands[1] or operands[3] into
29831 operands[0], so any combination of 2 memory operands is ok. */
29835 /* format, example pmacsdd:
29836 xmm1, xmm2, xmm3/mem, xmm1
29838 For the integer multiply/add instructions be more restrictive and
29839 require operands[2] and operands[3] to be the memory operands. */
29841 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29843 return (mem_mask == ((1 << 2) | (1 << 3)));
29846 else if (num == 3 && num_memory == 1)
29848 /* formats, example protb:
29849 xmm1, xmm2, xmm3/mem
29850 xmm1, xmm2/mem, xmm3 */
29852 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29854 /* format, example comeq:
29855 xmm1, xmm2, xmm3/mem */
29857 return (mem_mask == (1 << 2));
29861 gcc_unreachable ();
29867 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29868 hardware will allow by using the destination register to load one of the
29869 memory operations. Presently this is used by the multiply/add routines to
29870 allow 2 memory references. */
29873 ix86_expand_sse5_multiple_memory (rtx operands[],
29875 enum machine_mode mode)
29877 rtx op0 = operands[0];
29879 || memory_operand (op0, mode)
29880 || reg_mentioned_p (op0, operands[1])
29881 || reg_mentioned_p (op0, operands[2])
29882 || reg_mentioned_p (op0, operands[3]))
29883 gcc_unreachable ();
29885 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29886 the destination register. */
29887 if (memory_operand (operands[1], mode))
29889 emit_move_insn (op0, operands[1]);
29892 else if (memory_operand (operands[3], mode))
29894 emit_move_insn (op0, operands[3]);
29898 gcc_unreachable ();
29904 /* Table of valid machine attributes. */
29905 static const struct attribute_spec ix86_attribute_table[] =
29907 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29908 /* Stdcall attribute says callee is responsible for popping arguments
29909 if they are not variable. */
29910 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29911 /* Fastcall attribute says callee is responsible for popping arguments
29912 if they are not variable. */
29913 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29914 /* Cdecl attribute says the callee is a normal C declaration */
29915 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29916 /* Regparm attribute specifies how many integer arguments are to be
29917 passed in registers. */
29918 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29919 /* Sseregparm attribute says we are using x86_64 calling conventions
29920 for FP arguments. */
29921 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29922 /* force_align_arg_pointer says this function realigns the stack at entry. */
29923 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29924 false, true, true, ix86_handle_cconv_attribute },
29925 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29926 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29927 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29928 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29930 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29931 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29932 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29933 SUBTARGET_ATTRIBUTE_TABLE,
29935 /* ms_abi and sysv_abi calling convention function attributes. */
29936 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29937 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29939 { NULL, 0, 0, false, false, false, NULL }
29942 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29944 x86_builtin_vectorization_cost (bool runtime_test)
29946 /* If the branch of the runtime test is taken - i.e. - the vectorized
29947 version is skipped - this incurs a misprediction cost (because the
29948 vectorized version is expected to be the fall-through). So we subtract
29949 the latency of a mispredicted branch from the costs that are incured
29950 when the vectorized version is executed.
29952 TODO: The values in individual target tables have to be tuned or new
29953 fields may be needed. For eg. on K8, the default branch path is the
29954 not-taken path. If the taken path is predicted correctly, the minimum
29955 penalty of going down the taken-path is 1 cycle. If the taken-path is
29956 not predicted correctly, then the minimum penalty is 10 cycles. */
29960 return (-(ix86_cost->cond_taken_branch_cost));
29966 /* This function returns the calling abi specific va_list type node.
29967 It returns the FNDECL specific va_list type. */
29970 ix86_fn_abi_va_list (tree fndecl)
29973 return va_list_type_node;
29974 gcc_assert (fndecl != NULL_TREE);
29976 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29977 return ms_va_list_type_node;
29979 return sysv_va_list_type_node;
29982 /* Returns the canonical va_list type specified by TYPE. If there
29983 is no valid TYPE provided, it return NULL_TREE. */
29986 ix86_canonical_va_list_type (tree type)
29990 /* Resolve references and pointers to va_list type. */
29991 if (INDIRECT_REF_P (type))
29992 type = TREE_TYPE (type);
29993 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29994 type = TREE_TYPE (type);
29998 wtype = va_list_type_node;
29999 gcc_assert (wtype != NULL_TREE);
30001 if (TREE_CODE (wtype) == ARRAY_TYPE)
30003 /* If va_list is an array type, the argument may have decayed
30004 to a pointer type, e.g. by being passed to another function.
30005 In that case, unwrap both types so that we can compare the
30006 underlying records. */
30007 if (TREE_CODE (htype) == ARRAY_TYPE
30008 || POINTER_TYPE_P (htype))
30010 wtype = TREE_TYPE (wtype);
30011 htype = TREE_TYPE (htype);
30014 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30015 return va_list_type_node;
30016 wtype = sysv_va_list_type_node;
30017 gcc_assert (wtype != NULL_TREE);
30019 if (TREE_CODE (wtype) == ARRAY_TYPE)
30021 /* If va_list is an array type, the argument may have decayed
30022 to a pointer type, e.g. by being passed to another function.
30023 In that case, unwrap both types so that we can compare the
30024 underlying records. */
30025 if (TREE_CODE (htype) == ARRAY_TYPE
30026 || POINTER_TYPE_P (htype))
30028 wtype = TREE_TYPE (wtype);
30029 htype = TREE_TYPE (htype);
30032 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30033 return sysv_va_list_type_node;
30034 wtype = ms_va_list_type_node;
30035 gcc_assert (wtype != NULL_TREE);
30037 if (TREE_CODE (wtype) == ARRAY_TYPE)
30039 /* If va_list is an array type, the argument may have decayed
30040 to a pointer type, e.g. by being passed to another function.
30041 In that case, unwrap both types so that we can compare the
30042 underlying records. */
30043 if (TREE_CODE (htype) == ARRAY_TYPE
30044 || POINTER_TYPE_P (htype))
30046 wtype = TREE_TYPE (wtype);
30047 htype = TREE_TYPE (htype);
30050 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30051 return ms_va_list_type_node;
30054 return std_canonical_va_list_type (type);
30057 /* Iterate through the target-specific builtin types for va_list.
30058 IDX denotes the iterator, *PTREE is set to the result type of
30059 the va_list builtin, and *PNAME to its internal type.
30060 Returns zero if there is no element for this index, otherwise
30061 IDX should be increased upon the next call.
30062 Note, do not iterate a base builtin's name like __builtin_va_list.
30063 Used from c_common_nodes_and_builtins. */
30066 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30072 *ptree = ms_va_list_type_node;
30073 *pname = "__builtin_ms_va_list";
30076 *ptree = sysv_va_list_type_node;
30077 *pname = "__builtin_sysv_va_list";
30085 /* Initialize the GCC target structure. */
30086 #undef TARGET_RETURN_IN_MEMORY
30087 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30089 #undef TARGET_LEGITIMIZE_ADDRESS
30090 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30092 #undef TARGET_ATTRIBUTE_TABLE
30093 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30094 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30095 # undef TARGET_MERGE_DECL_ATTRIBUTES
30096 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30099 #undef TARGET_COMP_TYPE_ATTRIBUTES
30100 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30102 #undef TARGET_INIT_BUILTINS
30103 #define TARGET_INIT_BUILTINS ix86_init_builtins
30104 #undef TARGET_EXPAND_BUILTIN
30105 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30107 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30108 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30109 ix86_builtin_vectorized_function
30111 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30112 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30114 #undef TARGET_BUILTIN_RECIPROCAL
30115 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30117 #undef TARGET_ASM_FUNCTION_EPILOGUE
30118 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30120 #undef TARGET_ENCODE_SECTION_INFO
30121 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30122 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30124 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30127 #undef TARGET_ASM_OPEN_PAREN
30128 #define TARGET_ASM_OPEN_PAREN ""
30129 #undef TARGET_ASM_CLOSE_PAREN
30130 #define TARGET_ASM_CLOSE_PAREN ""
30132 #undef TARGET_ASM_ALIGNED_HI_OP
30133 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30134 #undef TARGET_ASM_ALIGNED_SI_OP
30135 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30137 #undef TARGET_ASM_ALIGNED_DI_OP
30138 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30141 #undef TARGET_ASM_UNALIGNED_HI_OP
30142 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30143 #undef TARGET_ASM_UNALIGNED_SI_OP
30144 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30145 #undef TARGET_ASM_UNALIGNED_DI_OP
30146 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30148 #undef TARGET_SCHED_ADJUST_COST
30149 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30150 #undef TARGET_SCHED_ISSUE_RATE
30151 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30152 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30153 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30154 ia32_multipass_dfa_lookahead
30156 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30157 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30160 #undef TARGET_HAVE_TLS
30161 #define TARGET_HAVE_TLS true
30163 #undef TARGET_CANNOT_FORCE_CONST_MEM
30164 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30165 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30166 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30168 #undef TARGET_DELEGITIMIZE_ADDRESS
30169 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30171 #undef TARGET_MS_BITFIELD_LAYOUT_P
30172 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30175 #undef TARGET_BINDS_LOCAL_P
30176 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30178 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30179 #undef TARGET_BINDS_LOCAL_P
30180 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30183 #undef TARGET_ASM_OUTPUT_MI_THUNK
30184 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30185 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30186 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30188 #undef TARGET_ASM_FILE_START
30189 #define TARGET_ASM_FILE_START x86_file_start
30191 #undef TARGET_DEFAULT_TARGET_FLAGS
30192 #define TARGET_DEFAULT_TARGET_FLAGS \
30194 | TARGET_SUBTARGET_DEFAULT \
30195 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30197 #undef TARGET_HANDLE_OPTION
30198 #define TARGET_HANDLE_OPTION ix86_handle_option
30200 #undef TARGET_RTX_COSTS
30201 #define TARGET_RTX_COSTS ix86_rtx_costs
30202 #undef TARGET_ADDRESS_COST
30203 #define TARGET_ADDRESS_COST ix86_address_cost
30205 #undef TARGET_FIXED_CONDITION_CODE_REGS
30206 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30207 #undef TARGET_CC_MODES_COMPATIBLE
30208 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30210 #undef TARGET_MACHINE_DEPENDENT_REORG
30211 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30213 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30214 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30216 #undef TARGET_BUILD_BUILTIN_VA_LIST
30217 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30219 #undef TARGET_FN_ABI_VA_LIST
30220 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30222 #undef TARGET_CANONICAL_VA_LIST_TYPE
30223 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30225 #undef TARGET_EXPAND_BUILTIN_VA_START
30226 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30228 #undef TARGET_MD_ASM_CLOBBERS
30229 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30231 #undef TARGET_PROMOTE_PROTOTYPES
30232 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30233 #undef TARGET_STRUCT_VALUE_RTX
30234 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30235 #undef TARGET_SETUP_INCOMING_VARARGS
30236 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30237 #undef TARGET_MUST_PASS_IN_STACK
30238 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30239 #undef TARGET_PASS_BY_REFERENCE
30240 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30241 #undef TARGET_INTERNAL_ARG_POINTER
30242 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30243 #undef TARGET_UPDATE_STACK_BOUNDARY
30244 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30245 #undef TARGET_GET_DRAP_RTX
30246 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30247 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30248 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30249 #undef TARGET_STRICT_ARGUMENT_NAMING
30250 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30252 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30253 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30255 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30256 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30258 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30259 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30261 #undef TARGET_C_MODE_FOR_SUFFIX
30262 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30265 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30266 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30269 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30270 #undef TARGET_INSERT_ATTRIBUTES
30271 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30274 #undef TARGET_MANGLE_TYPE
30275 #define TARGET_MANGLE_TYPE ix86_mangle_type
30277 #undef TARGET_STACK_PROTECT_FAIL
30278 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30280 #undef TARGET_FUNCTION_VALUE
30281 #define TARGET_FUNCTION_VALUE ix86_function_value
30283 #undef TARGET_SECONDARY_RELOAD
30284 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30286 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30287 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30289 #undef TARGET_SET_CURRENT_FUNCTION
30290 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30292 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30293 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30295 #undef TARGET_OPTION_SAVE
30296 #define TARGET_OPTION_SAVE ix86_function_specific_save
30298 #undef TARGET_OPTION_RESTORE
30299 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30301 #undef TARGET_OPTION_PRINT
30302 #define TARGET_OPTION_PRINT ix86_function_specific_print
30304 #undef TARGET_OPTION_CAN_INLINE_P
30305 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30307 #undef TARGET_EXPAND_TO_RTL_HOOK
30308 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30310 #undef TARGET_LEGITIMATE_ADDRESS_P
30311 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30313 struct gcc_target targetm = TARGET_INITIALIZER;
30315 #include "gt-i386.h"