1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1366 /* X86_TUNE_READ_MODIFY */
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1457 /* X86_TUNE_USE_FFREEP */
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct stack_local_entry GTY(())
1727 unsigned short mode;
1730 struct stack_local_entry *next;
1733 /* Structure describing stack frame layout.
1734 Stack grows downward:
1740 saved frame pointer if frame_pointer_needed
1741 <- HARD_FRAME_POINTER
1750 [va_arg registers] (
1751 > to_allocate <- FRAME_POINTER
1763 HOST_WIDE_INT frame;
1765 int outgoing_arguments_size;
1768 HOST_WIDE_INT to_allocate;
1769 /* The offsets relative to ARG_POINTER. */
1770 HOST_WIDE_INT frame_pointer_offset;
1771 HOST_WIDE_INT hard_frame_pointer_offset;
1772 HOST_WIDE_INT stack_pointer_offset;
1774 /* When save_regs_using_mov is set, emit prologue using
1775 move instead of push instructions. */
1776 bool save_regs_using_mov;
1779 /* Code model option. */
1780 enum cmodel ix86_cmodel;
1782 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1784 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1786 /* Which unit we are generating floating point math for. */
1787 enum fpmath_unit ix86_fpmath;
1789 /* Which cpu are we scheduling for. */
1790 enum attr_cpu ix86_schedule;
1792 /* Which cpu are we optimizing for. */
1793 enum processor_type ix86_tune;
1795 /* Which instruction set architecture to use. */
1796 enum processor_type ix86_arch;
1798 /* true if sse prefetch instruction is not NOOP. */
1799 int x86_prefetch_sse;
1801 /* ix86_regparm_string as a number */
1802 static int ix86_regparm;
1804 /* -mstackrealign option */
1805 extern int ix86_force_align_arg_pointer;
1806 static const char ix86_force_align_arg_pointer_string[]
1807 = "force_align_arg_pointer";
1809 static rtx (*ix86_gen_leave) (void);
1810 static rtx (*ix86_gen_pop1) (rtx);
1811 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1813 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1814 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1815 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1816 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1818 /* Preferred alignment for stack boundary in bits. */
1819 unsigned int ix86_preferred_stack_boundary;
1821 /* Alignment for incoming stack boundary in bits specified at
1823 static unsigned int ix86_user_incoming_stack_boundary;
1825 /* Default alignment for incoming stack boundary in bits. */
1826 static unsigned int ix86_default_incoming_stack_boundary;
1828 /* Alignment for incoming stack boundary in bits. */
1829 unsigned int ix86_incoming_stack_boundary;
1831 /* The abi used by target. */
1832 enum calling_abi ix86_abi;
1834 /* Values 1-5: see jump.c */
1835 int ix86_branch_cost;
1837 /* Calling abi specific va_list type nodes. */
1838 static GTY(()) tree sysv_va_list_type_node;
1839 static GTY(()) tree ms_va_list_type_node;
1841 /* Variables which are this size or smaller are put in the data/bss
1842 or ldata/lbss sections. */
1844 int ix86_section_threshold = 65536;
1846 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1847 char internal_label_prefix[16];
1848 int internal_label_prefix_len;
1850 /* Fence to use after loop using movnt. */
1853 /* Register class used for passing given 64bit part of the argument.
1854 These represent classes as documented by the PS ABI, with the exception
1855 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1856 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1858 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1859 whenever possible (upper half does contain padding). */
1860 enum x86_64_reg_class
1863 X86_64_INTEGER_CLASS,
1864 X86_64_INTEGERSI_CLASS,
1871 X86_64_COMPLEX_X87_CLASS,
1875 #define MAX_CLASSES 4
1877 /* Table of constants used by fldpi, fldln2, etc.... */
1878 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1879 static bool ext_80387_constants_init = 0;
1882 static struct machine_function * ix86_init_machine_status (void);
1883 static rtx ix86_function_value (const_tree, const_tree, bool);
1884 static int ix86_function_regparm (const_tree, const_tree);
1885 static void ix86_compute_frame_layout (struct ix86_frame *);
1886 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1888 static void ix86_add_new_builtins (int);
1890 enum ix86_function_specific_strings
1892 IX86_FUNCTION_SPECIFIC_ARCH,
1893 IX86_FUNCTION_SPECIFIC_TUNE,
1894 IX86_FUNCTION_SPECIFIC_FPMATH,
1895 IX86_FUNCTION_SPECIFIC_MAX
1898 static char *ix86_target_string (int, int, const char *, const char *,
1899 const char *, bool);
1900 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1901 static void ix86_function_specific_save (struct cl_target_option *);
1902 static void ix86_function_specific_restore (struct cl_target_option *);
1903 static void ix86_function_specific_print (FILE *, int,
1904 struct cl_target_option *);
1905 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1906 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1907 static bool ix86_can_inline_p (tree, tree);
1908 static void ix86_set_current_function (tree);
1910 static enum calling_abi ix86_function_abi (const_tree);
1913 /* The svr4 ABI for the i386 says that records and unions are returned
1915 #ifndef DEFAULT_PCC_STRUCT_RETURN
1916 #define DEFAULT_PCC_STRUCT_RETURN 1
1919 /* Whether -mtune= or -march= were specified */
1920 static int ix86_tune_defaulted;
1921 static int ix86_arch_specified;
1923 /* Bit flags that specify the ISA we are compiling for. */
1924 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1926 /* A mask of ix86_isa_flags that includes bit X if X
1927 was set or cleared on the command line. */
1928 static int ix86_isa_flags_explicit;
1930 /* Define a set of ISAs which are available when a given ISA is
1931 enabled. MMX and SSE ISAs are handled separately. */
1933 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1934 #define OPTION_MASK_ISA_3DNOW_SET \
1935 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1937 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1938 #define OPTION_MASK_ISA_SSE2_SET \
1939 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1940 #define OPTION_MASK_ISA_SSE3_SET \
1941 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1942 #define OPTION_MASK_ISA_SSSE3_SET \
1943 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_1_SET \
1945 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1946 #define OPTION_MASK_ISA_SSE4_2_SET \
1947 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1948 #define OPTION_MASK_ISA_AVX_SET \
1949 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1950 #define OPTION_MASK_ISA_FMA_SET \
1951 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1953 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1955 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1957 #define OPTION_MASK_ISA_SSE4A_SET \
1958 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1959 #define OPTION_MASK_ISA_SSE5_SET \
1960 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1962 /* AES and PCLMUL need SSE2 because they use xmm registers */
1963 #define OPTION_MASK_ISA_AES_SET \
1964 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1965 #define OPTION_MASK_ISA_PCLMUL_SET \
1966 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1968 #define OPTION_MASK_ISA_ABM_SET \
1969 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1970 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1971 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1972 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 /* Vectorization library interface and handlers. */
2016 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2017 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2018 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2020 /* Processor target table, indexed by processor number */
2023 const struct processor_costs *cost; /* Processor costs */
2024 const int align_loop; /* Default alignments. */
2025 const int align_loop_max_skip;
2026 const int align_jump;
2027 const int align_jump_max_skip;
2028 const int align_func;
2031 static const struct ptt processor_target_table[PROCESSOR_max] =
2033 {&i386_cost, 4, 3, 4, 3, 4},
2034 {&i486_cost, 16, 15, 16, 15, 16},
2035 {&pentium_cost, 16, 7, 16, 7, 16},
2036 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2037 {&geode_cost, 0, 0, 0, 0, 0},
2038 {&k6_cost, 32, 7, 32, 7, 32},
2039 {&athlon_cost, 16, 7, 16, 7, 16},
2040 {&pentium4_cost, 0, 0, 0, 0, 0},
2041 {&k8_cost, 16, 7, 16, 7, 16},
2042 {&nocona_cost, 0, 0, 0, 0, 0},
2043 {&core2_cost, 16, 10, 16, 10, 16},
2044 {&generic32_cost, 16, 7, 16, 7, 16},
2045 {&generic64_cost, 16, 10, 16, 10, 16},
2046 {&amdfam10_cost, 32, 24, 32, 7, 32},
2047 {&atom_cost, 16, 7, 16, 7, 16}
2050 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2076 /* Implement TARGET_HANDLE_OPTION. */
2079 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2086 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2087 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2091 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2092 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2099 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2100 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2104 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2105 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2115 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2116 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2120 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2121 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2128 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2129 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2133 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2134 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2141 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2146 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2147 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2154 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2159 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2160 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2167 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2172 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2173 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2180 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2185 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2186 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2193 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2198 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2199 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2206 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2211 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2212 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2217 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2222 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2223 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2229 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2234 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2235 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2242 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2243 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2247 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2248 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2255 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2256 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2260 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2261 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2268 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2269 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2273 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2274 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2281 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2282 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2286 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2287 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2294 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2295 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2299 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2300 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2307 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2308 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2312 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2313 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2320 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2321 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2325 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2326 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2335 /* Return a string the documents the current -m options. The caller is
2336 responsible for freeing the string. */
2339 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2340 const char *fpmath, bool add_nl_p)
2342 struct ix86_target_opts
2344 const char *option; /* option string */
2345 int mask; /* isa mask options */
2348 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2349 preceding options while match those first. */
2350 static struct ix86_target_opts isa_opts[] =
2352 { "-m64", OPTION_MASK_ISA_64BIT },
2353 { "-msse5", OPTION_MASK_ISA_SSE5 },
2354 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2355 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2356 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2357 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2358 { "-msse3", OPTION_MASK_ISA_SSE3 },
2359 { "-msse2", OPTION_MASK_ISA_SSE2 },
2360 { "-msse", OPTION_MASK_ISA_SSE },
2361 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2362 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2363 { "-mmmx", OPTION_MASK_ISA_MMX },
2364 { "-mabm", OPTION_MASK_ISA_ABM },
2365 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2366 { "-maes", OPTION_MASK_ISA_AES },
2367 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2371 static struct ix86_target_opts flag_opts[] =
2373 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2374 { "-m80387", MASK_80387 },
2375 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2376 { "-malign-double", MASK_ALIGN_DOUBLE },
2377 { "-mcld", MASK_CLD },
2378 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2379 { "-mieee-fp", MASK_IEEE_FP },
2380 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2381 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2382 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2383 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2384 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2385 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2386 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2387 { "-mno-red-zone", MASK_NO_RED_ZONE },
2388 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2389 { "-mrecip", MASK_RECIP },
2390 { "-mrtd", MASK_RTD },
2391 { "-msseregparm", MASK_SSEREGPARM },
2392 { "-mstack-arg-probe", MASK_STACK_PROBE },
2393 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2396 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2399 char target_other[40];
2408 memset (opts, '\0', sizeof (opts));
2410 /* Add -march= option. */
2413 opts[num][0] = "-march=";
2414 opts[num++][1] = arch;
2417 /* Add -mtune= option. */
2420 opts[num][0] = "-mtune=";
2421 opts[num++][1] = tune;
2424 /* Pick out the options in isa options. */
2425 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2427 if ((isa & isa_opts[i].mask) != 0)
2429 opts[num++][0] = isa_opts[i].option;
2430 isa &= ~ isa_opts[i].mask;
2434 if (isa && add_nl_p)
2436 opts[num++][0] = isa_other;
2437 sprintf (isa_other, "(other isa: 0x%x)", isa);
2440 /* Add flag options. */
2441 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2443 if ((flags & flag_opts[i].mask) != 0)
2445 opts[num++][0] = flag_opts[i].option;
2446 flags &= ~ flag_opts[i].mask;
2450 if (flags && add_nl_p)
2452 opts[num++][0] = target_other;
2453 sprintf (target_other, "(other flags: 0x%x)", isa);
2456 /* Add -fpmath= option. */
2459 opts[num][0] = "-mfpmath=";
2460 opts[num++][1] = fpmath;
2467 gcc_assert (num < ARRAY_SIZE (opts));
2469 /* Size the string. */
2471 sep_len = (add_nl_p) ? 3 : 1;
2472 for (i = 0; i < num; i++)
2475 for (j = 0; j < 2; j++)
2477 len += strlen (opts[i][j]);
2480 /* Build the string. */
2481 ret = ptr = (char *) xmalloc (len);
2484 for (i = 0; i < num; i++)
2488 for (j = 0; j < 2; j++)
2489 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2496 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2504 for (j = 0; j < 2; j++)
2507 memcpy (ptr, opts[i][j], len2[j]);
2509 line_len += len2[j];
2514 gcc_assert (ret + len >= ptr);
2519 /* Function that is callable from the debugger to print the current
2522 ix86_debug_options (void)
2524 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2525 ix86_arch_string, ix86_tune_string,
2526 ix86_fpmath_string, true);
2530 fprintf (stderr, "%s\n\n", opts);
2534 fprintf (stderr, "<no options>\n\n");
2539 /* Sometimes certain combinations of command options do not make
2540 sense on a particular target machine. You can define a macro
2541 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2542 defined, is executed once just after all the command options have
2545 Don't use this macro to turn on various extra optimizations for
2546 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2549 override_options (bool main_args_p)
2552 unsigned int ix86_arch_mask, ix86_tune_mask;
2557 /* Comes from final.c -- no real reason to change it. */
2558 #define MAX_CODE_ALIGN 16
2566 PTA_PREFETCH_SSE = 1 << 4,
2568 PTA_3DNOW_A = 1 << 6,
2572 PTA_POPCNT = 1 << 10,
2574 PTA_SSE4A = 1 << 12,
2575 PTA_NO_SAHF = 1 << 13,
2576 PTA_SSE4_1 = 1 << 14,
2577 PTA_SSE4_2 = 1 << 15,
2580 PTA_PCLMUL = 1 << 18,
2587 const char *const name; /* processor name or nickname. */
2588 const enum processor_type processor;
2589 const enum attr_cpu schedule;
2590 const unsigned /*enum pta_flags*/ flags;
2592 const processor_alias_table[] =
2594 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2595 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2596 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2598 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2599 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2600 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2602 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2603 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2605 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2606 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2608 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2610 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2611 PTA_MMX | PTA_SSE | PTA_SSE2},
2612 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2613 PTA_MMX |PTA_SSE | PTA_SSE2},
2614 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2615 PTA_MMX | PTA_SSE | PTA_SSE2},
2616 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2617 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2618 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2619 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2620 | PTA_CX16 | PTA_NO_SAHF},
2621 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2622 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2623 | PTA_SSSE3 | PTA_CX16},
2624 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2625 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2626 | PTA_SSSE3 | PTA_CX16},
2627 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2628 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2629 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2630 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2632 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2633 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2634 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2635 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2636 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2637 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2638 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2639 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2640 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2641 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2642 {"x86-64", PROCESSOR_K8, CPU_K8,
2643 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2644 {"k8", PROCESSOR_K8, CPU_K8,
2645 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2646 | PTA_SSE2 | PTA_NO_SAHF},
2647 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2648 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2649 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2650 {"opteron", PROCESSOR_K8, CPU_K8,
2651 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2652 | PTA_SSE2 | PTA_NO_SAHF},
2653 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2654 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2655 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2656 {"athlon64", PROCESSOR_K8, CPU_K8,
2657 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2658 | PTA_SSE2 | PTA_NO_SAHF},
2659 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2660 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2661 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2662 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2663 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2664 | PTA_SSE2 | PTA_NO_SAHF},
2665 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2666 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2667 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2668 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2669 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2670 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2671 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2672 0 /* flags are only used for -march switch. */ },
2673 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2674 PTA_64BIT /* flags are only used for -march switch. */ },
2677 int const pta_size = ARRAY_SIZE (processor_alias_table);
2679 /* Set up prefix/suffix so the error messages refer to either the command
2680 line argument, or the attribute(target). */
2689 prefix = "option(\"";
2694 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2695 SUBTARGET_OVERRIDE_OPTIONS;
2698 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2699 SUBSUBTARGET_OVERRIDE_OPTIONS;
2702 /* -fPIC is the default for x86_64. */
2703 if (TARGET_MACHO && TARGET_64BIT)
2706 /* Set the default values for switches whose default depends on TARGET_64BIT
2707 in case they weren't overwritten by command line options. */
2710 /* Mach-O doesn't support omitting the frame pointer for now. */
2711 if (flag_omit_frame_pointer == 2)
2712 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2713 if (flag_asynchronous_unwind_tables == 2)
2714 flag_asynchronous_unwind_tables = 1;
2715 if (flag_pcc_struct_return == 2)
2716 flag_pcc_struct_return = 0;
2720 if (flag_omit_frame_pointer == 2)
2721 flag_omit_frame_pointer = 0;
2722 if (flag_asynchronous_unwind_tables == 2)
2723 flag_asynchronous_unwind_tables = 0;
2724 if (flag_pcc_struct_return == 2)
2725 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2728 /* Need to check -mtune=generic first. */
2729 if (ix86_tune_string)
2731 if (!strcmp (ix86_tune_string, "generic")
2732 || !strcmp (ix86_tune_string, "i686")
2733 /* As special support for cross compilers we read -mtune=native
2734 as -mtune=generic. With native compilers we won't see the
2735 -mtune=native, as it was changed by the driver. */
2736 || !strcmp (ix86_tune_string, "native"))
2739 ix86_tune_string = "generic64";
2741 ix86_tune_string = "generic32";
2743 /* If this call is for setting the option attribute, allow the
2744 generic32/generic64 that was previously set. */
2745 else if (!main_args_p
2746 && (!strcmp (ix86_tune_string, "generic32")
2747 || !strcmp (ix86_tune_string, "generic64")))
2749 else if (!strncmp (ix86_tune_string, "generic", 7))
2750 error ("bad value (%s) for %stune=%s %s",
2751 ix86_tune_string, prefix, suffix, sw);
2755 if (ix86_arch_string)
2756 ix86_tune_string = ix86_arch_string;
2757 if (!ix86_tune_string)
2759 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2760 ix86_tune_defaulted = 1;
2763 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2764 need to use a sensible tune option. */
2765 if (!strcmp (ix86_tune_string, "generic")
2766 || !strcmp (ix86_tune_string, "x86-64")
2767 || !strcmp (ix86_tune_string, "i686"))
2770 ix86_tune_string = "generic64";
2772 ix86_tune_string = "generic32";
2775 if (ix86_stringop_string)
2777 if (!strcmp (ix86_stringop_string, "rep_byte"))
2778 stringop_alg = rep_prefix_1_byte;
2779 else if (!strcmp (ix86_stringop_string, "libcall"))
2780 stringop_alg = libcall;
2781 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2782 stringop_alg = rep_prefix_4_byte;
2783 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2785 /* rep; movq isn't available in 32-bit code. */
2786 stringop_alg = rep_prefix_8_byte;
2787 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2788 stringop_alg = loop_1_byte;
2789 else if (!strcmp (ix86_stringop_string, "loop"))
2790 stringop_alg = loop;
2791 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2792 stringop_alg = unrolled_loop;
2794 error ("bad value (%s) for %sstringop-strategy=%s %s",
2795 ix86_stringop_string, prefix, suffix, sw);
2797 if (!strcmp (ix86_tune_string, "x86-64"))
2798 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2799 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2800 prefix, suffix, prefix, suffix, prefix, suffix);
2802 if (!ix86_arch_string)
2803 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2805 ix86_arch_specified = 1;
2807 if (!strcmp (ix86_arch_string, "generic"))
2808 error ("generic CPU can be used only for %stune=%s %s",
2809 prefix, suffix, sw);
2810 if (!strncmp (ix86_arch_string, "generic", 7))
2811 error ("bad value (%s) for %sarch=%s %s",
2812 ix86_arch_string, prefix, suffix, sw);
2814 /* Validate -mabi= value. */
2815 if (ix86_abi_string)
2817 if (strcmp (ix86_abi_string, "sysv") == 0)
2818 ix86_abi = SYSV_ABI;
2819 else if (strcmp (ix86_abi_string, "ms") == 0)
2822 error ("unknown ABI (%s) for %sabi=%s %s",
2823 ix86_abi_string, prefix, suffix, sw);
2826 ix86_abi = DEFAULT_ABI;
2828 if (ix86_cmodel_string != 0)
2830 if (!strcmp (ix86_cmodel_string, "small"))
2831 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2832 else if (!strcmp (ix86_cmodel_string, "medium"))
2833 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2834 else if (!strcmp (ix86_cmodel_string, "large"))
2835 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2837 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2838 else if (!strcmp (ix86_cmodel_string, "32"))
2839 ix86_cmodel = CM_32;
2840 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2841 ix86_cmodel = CM_KERNEL;
2843 error ("bad value (%s) for %scmodel=%s %s",
2844 ix86_cmodel_string, prefix, suffix, sw);
2848 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2849 use of rip-relative addressing. This eliminates fixups that
2850 would otherwise be needed if this object is to be placed in a
2851 DLL, and is essentially just as efficient as direct addressing. */
2852 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2853 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2854 else if (TARGET_64BIT)
2855 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2857 ix86_cmodel = CM_32;
2859 if (ix86_asm_string != 0)
2862 && !strcmp (ix86_asm_string, "intel"))
2863 ix86_asm_dialect = ASM_INTEL;
2864 else if (!strcmp (ix86_asm_string, "att"))
2865 ix86_asm_dialect = ASM_ATT;
2867 error ("bad value (%s) for %sasm=%s %s",
2868 ix86_asm_string, prefix, suffix, sw);
2870 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2871 error ("code model %qs not supported in the %s bit mode",
2872 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2873 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2874 sorry ("%i-bit mode not compiled in",
2875 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2877 for (i = 0; i < pta_size; i++)
2878 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2880 ix86_schedule = processor_alias_table[i].schedule;
2881 ix86_arch = processor_alias_table[i].processor;
2882 /* Default cpu tuning to the architecture. */
2883 ix86_tune = ix86_arch;
2885 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2886 error ("CPU you selected does not support x86-64 "
2889 if (processor_alias_table[i].flags & PTA_MMX
2890 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2891 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2892 if (processor_alias_table[i].flags & PTA_3DNOW
2893 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2894 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2895 if (processor_alias_table[i].flags & PTA_3DNOW_A
2896 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2897 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2898 if (processor_alias_table[i].flags & PTA_SSE
2899 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2900 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2901 if (processor_alias_table[i].flags & PTA_SSE2
2902 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2904 if (processor_alias_table[i].flags & PTA_SSE3
2905 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2906 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2907 if (processor_alias_table[i].flags & PTA_SSSE3
2908 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2909 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2910 if (processor_alias_table[i].flags & PTA_SSE4_1
2911 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2912 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2913 if (processor_alias_table[i].flags & PTA_SSE4_2
2914 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2915 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2916 if (processor_alias_table[i].flags & PTA_AVX
2917 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2918 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2919 if (processor_alias_table[i].flags & PTA_FMA
2920 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2921 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2922 if (processor_alias_table[i].flags & PTA_SSE4A
2923 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2924 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2925 if (processor_alias_table[i].flags & PTA_SSE5
2926 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2927 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2928 if (processor_alias_table[i].flags & PTA_ABM
2929 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2930 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2931 if (processor_alias_table[i].flags & PTA_CX16
2932 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2933 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2934 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2935 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2936 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2937 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2938 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2939 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2940 if (processor_alias_table[i].flags & PTA_AES
2941 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2942 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2943 if (processor_alias_table[i].flags & PTA_PCLMUL
2944 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2945 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2946 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2947 x86_prefetch_sse = true;
2953 error ("bad value (%s) for %sarch=%s %s",
2954 ix86_arch_string, prefix, suffix, sw);
2956 ix86_arch_mask = 1u << ix86_arch;
2957 for (i = 0; i < X86_ARCH_LAST; ++i)
2958 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2960 for (i = 0; i < pta_size; i++)
2961 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2963 ix86_schedule = processor_alias_table[i].schedule;
2964 ix86_tune = processor_alias_table[i].processor;
2965 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2967 if (ix86_tune_defaulted)
2969 ix86_tune_string = "x86-64";
2970 for (i = 0; i < pta_size; i++)
2971 if (! strcmp (ix86_tune_string,
2972 processor_alias_table[i].name))
2974 ix86_schedule = processor_alias_table[i].schedule;
2975 ix86_tune = processor_alias_table[i].processor;
2978 error ("CPU you selected does not support x86-64 "
2981 /* Intel CPUs have always interpreted SSE prefetch instructions as
2982 NOPs; so, we can enable SSE prefetch instructions even when
2983 -mtune (rather than -march) points us to a processor that has them.
2984 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2985 higher processors. */
2987 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2988 x86_prefetch_sse = true;
2992 error ("bad value (%s) for %stune=%s %s",
2993 ix86_tune_string, prefix, suffix, sw);
2995 ix86_tune_mask = 1u << ix86_tune;
2996 for (i = 0; i < X86_TUNE_LAST; ++i)
2997 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3000 ix86_cost = &ix86_size_cost;
3002 ix86_cost = processor_target_table[ix86_tune].cost;
3004 /* Arrange to set up i386_stack_locals for all functions. */
3005 init_machine_status = ix86_init_machine_status;
3007 /* Validate -mregparm= value. */
3008 if (ix86_regparm_string)
3011 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3012 i = atoi (ix86_regparm_string);
3013 if (i < 0 || i > REGPARM_MAX)
3014 error ("%sregparm=%d%s is not between 0 and %d",
3015 prefix, i, suffix, REGPARM_MAX);
3020 ix86_regparm = REGPARM_MAX;
3022 /* If the user has provided any of the -malign-* options,
3023 warn and use that value only if -falign-* is not set.
3024 Remove this code in GCC 3.2 or later. */
3025 if (ix86_align_loops_string)
3027 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3028 prefix, suffix, suffix);
3029 if (align_loops == 0)
3031 i = atoi (ix86_align_loops_string);
3032 if (i < 0 || i > MAX_CODE_ALIGN)
3033 error ("%salign-loops=%d%s is not between 0 and %d",
3034 prefix, i, suffix, MAX_CODE_ALIGN);
3036 align_loops = 1 << i;
3040 if (ix86_align_jumps_string)
3042 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3043 prefix, suffix, suffix);
3044 if (align_jumps == 0)
3046 i = atoi (ix86_align_jumps_string);
3047 if (i < 0 || i > MAX_CODE_ALIGN)
3048 error ("%salign-loops=%d%s is not between 0 and %d",
3049 prefix, i, suffix, MAX_CODE_ALIGN);
3051 align_jumps = 1 << i;
3055 if (ix86_align_funcs_string)
3057 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3058 prefix, suffix, suffix);
3059 if (align_functions == 0)
3061 i = atoi (ix86_align_funcs_string);
3062 if (i < 0 || i > MAX_CODE_ALIGN)
3063 error ("%salign-loops=%d%s is not between 0 and %d",
3064 prefix, i, suffix, MAX_CODE_ALIGN);
3066 align_functions = 1 << i;
3070 /* Default align_* from the processor table. */
3071 if (align_loops == 0)
3073 align_loops = processor_target_table[ix86_tune].align_loop;
3074 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3076 if (align_jumps == 0)
3078 align_jumps = processor_target_table[ix86_tune].align_jump;
3079 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3081 if (align_functions == 0)
3083 align_functions = processor_target_table[ix86_tune].align_func;
3086 /* Validate -mbranch-cost= value, or provide default. */
3087 ix86_branch_cost = ix86_cost->branch_cost;
3088 if (ix86_branch_cost_string)
3090 i = atoi (ix86_branch_cost_string);
3092 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3094 ix86_branch_cost = i;
3096 if (ix86_section_threshold_string)
3098 i = atoi (ix86_section_threshold_string);
3100 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3102 ix86_section_threshold = i;
3105 if (ix86_tls_dialect_string)
3107 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3108 ix86_tls_dialect = TLS_DIALECT_GNU;
3109 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3110 ix86_tls_dialect = TLS_DIALECT_GNU2;
3111 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3112 ix86_tls_dialect = TLS_DIALECT_SUN;
3114 error ("bad value (%s) for %stls-dialect=%s %s",
3115 ix86_tls_dialect_string, prefix, suffix, sw);
3118 if (ix87_precision_string)
3120 i = atoi (ix87_precision_string);
3121 if (i != 32 && i != 64 && i != 80)
3122 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3127 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3129 /* Enable by default the SSE and MMX builtins. Do allow the user to
3130 explicitly disable any of these. In particular, disabling SSE and
3131 MMX for kernel code is extremely useful. */
3132 if (!ix86_arch_specified)
3134 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3135 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3138 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3142 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3144 if (!ix86_arch_specified)
3146 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3148 /* i386 ABI does not specify red zone. It still makes sense to use it
3149 when programmer takes care to stack from being destroyed. */
3150 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3151 target_flags |= MASK_NO_RED_ZONE;
3154 /* Keep nonleaf frame pointers. */
3155 if (flag_omit_frame_pointer)
3156 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3157 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3158 flag_omit_frame_pointer = 1;
3160 /* If we're doing fast math, we don't care about comparison order
3161 wrt NaNs. This lets us use a shorter comparison sequence. */
3162 if (flag_finite_math_only)
3163 target_flags &= ~MASK_IEEE_FP;
3165 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3166 since the insns won't need emulation. */
3167 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3168 target_flags &= ~MASK_NO_FANCY_MATH_387;
3170 /* Likewise, if the target doesn't have a 387, or we've specified
3171 software floating point, don't use 387 inline intrinsics. */
3173 target_flags |= MASK_NO_FANCY_MATH_387;
3175 /* Turn on MMX builtins for -msse. */
3178 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3179 x86_prefetch_sse = true;
3182 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3183 if (TARGET_SSE4_2 || TARGET_ABM)
3184 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3186 /* Validate -mpreferred-stack-boundary= value or default it to
3187 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3188 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3189 if (ix86_preferred_stack_boundary_string)
3191 i = atoi (ix86_preferred_stack_boundary_string);
3192 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3193 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3194 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3196 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3199 /* Set the default value for -mstackrealign. */
3200 if (ix86_force_align_arg_pointer == -1)
3201 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3203 /* Validate -mincoming-stack-boundary= value or default it to
3204 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3205 if (ix86_force_align_arg_pointer)
3206 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3208 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3209 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3210 if (ix86_incoming_stack_boundary_string)
3212 i = atoi (ix86_incoming_stack_boundary_string);
3213 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3214 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3215 i, TARGET_64BIT ? 4 : 2);
3218 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3219 ix86_incoming_stack_boundary
3220 = ix86_user_incoming_stack_boundary;
3224 /* Accept -msseregparm only if at least SSE support is enabled. */
3225 if (TARGET_SSEREGPARM
3227 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3229 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3230 if (ix86_fpmath_string != 0)
3232 if (! strcmp (ix86_fpmath_string, "387"))
3233 ix86_fpmath = FPMATH_387;
3234 else if (! strcmp (ix86_fpmath_string, "sse"))
3238 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3239 ix86_fpmath = FPMATH_387;
3242 ix86_fpmath = FPMATH_SSE;
3244 else if (! strcmp (ix86_fpmath_string, "387,sse")
3245 || ! strcmp (ix86_fpmath_string, "387+sse")
3246 || ! strcmp (ix86_fpmath_string, "sse,387")
3247 || ! strcmp (ix86_fpmath_string, "sse+387")
3248 || ! strcmp (ix86_fpmath_string, "both"))
3252 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3253 ix86_fpmath = FPMATH_387;
3255 else if (!TARGET_80387)
3257 warning (0, "387 instruction set disabled, using SSE arithmetics");
3258 ix86_fpmath = FPMATH_SSE;
3261 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3264 error ("bad value (%s) for %sfpmath=%s %s",
3265 ix86_fpmath_string, prefix, suffix, sw);
3268 /* If the i387 is disabled, then do not return values in it. */
3270 target_flags &= ~MASK_FLOAT_RETURNS;
3272 /* Use external vectorized library in vectorizing intrinsics. */
3273 if (ix86_veclibabi_string)
3275 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3276 ix86_veclib_handler = ix86_veclibabi_svml;
3277 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3278 ix86_veclib_handler = ix86_veclibabi_acml;
3280 error ("unknown vectorization library ABI type (%s) for "
3281 "%sveclibabi=%s %s", ix86_veclibabi_string,
3282 prefix, suffix, sw);
3285 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3286 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3288 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3290 /* ??? Unwind info is not correct around the CFG unless either a frame
3291 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3292 unwind info generation to be aware of the CFG and propagating states
3294 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3295 || flag_exceptions || flag_non_call_exceptions)
3296 && flag_omit_frame_pointer
3297 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3299 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3300 warning (0, "unwind tables currently require either a frame pointer "
3301 "or %saccumulate-outgoing-args%s for correctness",
3303 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3306 /* If stack probes are required, the space used for large function
3307 arguments on the stack must also be probed, so enable
3308 -maccumulate-outgoing-args so this happens in the prologue. */
3309 if (TARGET_STACK_PROBE
3310 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3312 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3313 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3314 "for correctness", prefix, suffix);
3315 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3318 /* For sane SSE instruction set generation we need fcomi instruction.
3319 It is safe to enable all CMOVE instructions. */
3323 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3326 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3327 p = strchr (internal_label_prefix, 'X');
3328 internal_label_prefix_len = p - internal_label_prefix;
3332 /* When scheduling description is not available, disable scheduler pass
3333 so it won't slow down the compilation and make x87 code slower. */
3334 if (!TARGET_SCHEDULE)
3335 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3337 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3338 set_param_value ("simultaneous-prefetches",
3339 ix86_cost->simultaneous_prefetches);
3340 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3341 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3342 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3343 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3344 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3345 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3347 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3348 can be optimized to ap = __builtin_next_arg (0). */
3350 targetm.expand_builtin_va_start = NULL;
3354 ix86_gen_leave = gen_leave_rex64;
3355 ix86_gen_pop1 = gen_popdi1;
3356 ix86_gen_add3 = gen_adddi3;
3357 ix86_gen_sub3 = gen_subdi3;
3358 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3359 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3360 ix86_gen_monitor = gen_sse3_monitor64;
3361 ix86_gen_andsp = gen_anddi3;
3365 ix86_gen_leave = gen_leave;
3366 ix86_gen_pop1 = gen_popsi1;
3367 ix86_gen_add3 = gen_addsi3;
3368 ix86_gen_sub3 = gen_subsi3;
3369 ix86_gen_sub3_carry = gen_subsi3_carry;
3370 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3371 ix86_gen_monitor = gen_sse3_monitor;
3372 ix86_gen_andsp = gen_andsi3;
3376 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3378 target_flags |= MASK_CLD & ~target_flags_explicit;
3381 /* Save the initial options in case the user does function specific options */
3383 target_option_default_node = target_option_current_node
3384 = build_target_option_node ();
3387 /* Save the current options */
3390 ix86_function_specific_save (struct cl_target_option *ptr)
3392 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3396 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3398 ptr->arch = ix86_arch;
3399 ptr->schedule = ix86_schedule;
3400 ptr->tune = ix86_tune;
3401 ptr->fpmath = ix86_fpmath;
3402 ptr->branch_cost = ix86_branch_cost;
3403 ptr->tune_defaulted = ix86_tune_defaulted;
3404 ptr->arch_specified = ix86_arch_specified;
3405 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3406 ptr->target_flags_explicit = target_flags_explicit;
3409 /* Restore the current options */
3412 ix86_function_specific_restore (struct cl_target_option *ptr)
3414 enum processor_type old_tune = ix86_tune;
3415 enum processor_type old_arch = ix86_arch;
3416 unsigned int ix86_arch_mask, ix86_tune_mask;
3419 ix86_arch = ptr->arch;
3420 ix86_schedule = ptr->schedule;
3421 ix86_tune = ptr->tune;
3422 ix86_fpmath = ptr->fpmath;
3423 ix86_branch_cost = ptr->branch_cost;
3424 ix86_tune_defaulted = ptr->tune_defaulted;
3425 ix86_arch_specified = ptr->arch_specified;
3426 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3427 target_flags_explicit = ptr->target_flags_explicit;
3429 /* Recreate the arch feature tests if the arch changed */
3430 if (old_arch != ix86_arch)
3432 ix86_arch_mask = 1u << ix86_arch;
3433 for (i = 0; i < X86_ARCH_LAST; ++i)
3434 ix86_arch_features[i]
3435 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3438 /* Recreate the tune optimization tests */
3439 if (old_tune != ix86_tune)
3441 ix86_tune_mask = 1u << ix86_tune;
3442 for (i = 0; i < X86_TUNE_LAST; ++i)
3443 ix86_tune_features[i]
3444 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 /* Print the current options */
3451 ix86_function_specific_print (FILE *file, int indent,
3452 struct cl_target_option *ptr)
3455 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3456 NULL, NULL, NULL, false);
3458 fprintf (file, "%*sarch = %d (%s)\n",
3461 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3462 ? cpu_names[ptr->arch]
3465 fprintf (file, "%*stune = %d (%s)\n",
3468 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3469 ? cpu_names[ptr->tune]
3472 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3473 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3474 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3475 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3479 fprintf (file, "%*s%s\n", indent, "", target_string);
3480 free (target_string);
3485 /* Inner function to process the attribute((target(...))), take an argument and
3486 set the current options from the argument. If we have a list, recursively go
3490 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3495 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3496 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3497 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3498 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3513 enum ix86_opt_type type;
3518 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3519 IX86_ATTR_ISA ("abm", OPT_mabm),
3520 IX86_ATTR_ISA ("aes", OPT_maes),
3521 IX86_ATTR_ISA ("avx", OPT_mavx),
3522 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3523 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3524 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3525 IX86_ATTR_ISA ("sse", OPT_msse),
3526 IX86_ATTR_ISA ("sse2", OPT_msse2),
3527 IX86_ATTR_ISA ("sse3", OPT_msse3),
3528 IX86_ATTR_ISA ("sse4", OPT_msse4),
3529 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3530 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3531 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3532 IX86_ATTR_ISA ("sse5", OPT_msse5),
3533 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3535 /* string options */
3536 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3537 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3538 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3541 IX86_ATTR_YES ("cld",
3545 IX86_ATTR_NO ("fancy-math-387",
3546 OPT_mfancy_math_387,
3547 MASK_NO_FANCY_MATH_387),
3549 IX86_ATTR_NO ("fused-madd",
3551 MASK_NO_FUSED_MADD),
3553 IX86_ATTR_YES ("ieee-fp",
3557 IX86_ATTR_YES ("inline-all-stringops",
3558 OPT_minline_all_stringops,
3559 MASK_INLINE_ALL_STRINGOPS),
3561 IX86_ATTR_YES ("inline-stringops-dynamically",
3562 OPT_minline_stringops_dynamically,
3563 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3565 IX86_ATTR_NO ("align-stringops",
3566 OPT_mno_align_stringops,
3567 MASK_NO_ALIGN_STRINGOPS),
3569 IX86_ATTR_YES ("recip",
3575 /* If this is a list, recurse to get the options. */
3576 if (TREE_CODE (args) == TREE_LIST)
3580 for (; args; args = TREE_CHAIN (args))
3581 if (TREE_VALUE (args)
3582 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3588 else if (TREE_CODE (args) != STRING_CST)
3591 /* Handle multiple arguments separated by commas. */
3592 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3594 while (next_optstr && *next_optstr != '\0')
3596 char *p = next_optstr;
3598 char *comma = strchr (next_optstr, ',');
3599 const char *opt_string;
3600 size_t len, opt_len;
3605 enum ix86_opt_type type = ix86_opt_unknown;
3611 len = comma - next_optstr;
3612 next_optstr = comma + 1;
3620 /* Recognize no-xxx. */
3621 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3630 /* Find the option. */
3633 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3635 type = attrs[i].type;
3636 opt_len = attrs[i].len;
3637 if (ch == attrs[i].string[0]
3638 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3639 && memcmp (p, attrs[i].string, opt_len) == 0)
3642 mask = attrs[i].mask;
3643 opt_string = attrs[i].string;
3648 /* Process the option. */
3651 error ("attribute(target(\"%s\")) is unknown", orig_p);
3655 else if (type == ix86_opt_isa)
3656 ix86_handle_option (opt, p, opt_set_p);
3658 else if (type == ix86_opt_yes || type == ix86_opt_no)
3660 if (type == ix86_opt_no)
3661 opt_set_p = !opt_set_p;
3664 target_flags |= mask;
3666 target_flags &= ~mask;
3669 else if (type == ix86_opt_str)
3673 error ("option(\"%s\") was already specified", opt_string);
3677 p_strings[opt] = xstrdup (p + opt_len);
3687 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3690 ix86_valid_target_attribute_tree (tree args)
3692 const char *orig_arch_string = ix86_arch_string;
3693 const char *orig_tune_string = ix86_tune_string;
3694 const char *orig_fpmath_string = ix86_fpmath_string;
3695 int orig_tune_defaulted = ix86_tune_defaulted;
3696 int orig_arch_specified = ix86_arch_specified;
3697 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3700 struct cl_target_option *def
3701 = TREE_TARGET_OPTION (target_option_default_node);
3703 /* Process each of the options on the chain. */
3704 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3707 /* If the changed options are different from the default, rerun override_options,
3708 and then save the options away. The string options are are attribute options,
3709 and will be undone when we copy the save structure. */
3710 if (ix86_isa_flags != def->ix86_isa_flags
3711 || target_flags != def->target_flags
3712 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3714 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3716 /* If we are using the default tune= or arch=, undo the string assigned,
3717 and use the default. */
3718 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3719 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3720 else if (!orig_arch_specified)
3721 ix86_arch_string = NULL;
3723 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3724 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3725 else if (orig_tune_defaulted)
3726 ix86_tune_string = NULL;
3728 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3729 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3730 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3731 else if (!TARGET_64BIT && TARGET_SSE)
3732 ix86_fpmath_string = "sse,387";
3734 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3735 override_options (false);
3737 /* Add any builtin functions with the new isa if any. */
3738 ix86_add_new_builtins (ix86_isa_flags);
3740 /* Save the current options unless we are validating options for
3742 t = build_target_option_node ();
3744 ix86_arch_string = orig_arch_string;
3745 ix86_tune_string = orig_tune_string;
3746 ix86_fpmath_string = orig_fpmath_string;
3748 /* Free up memory allocated to hold the strings */
3749 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3750 if (option_strings[i])
3751 free (option_strings[i]);
3757 /* Hook to validate attribute((target("string"))). */
3760 ix86_valid_target_attribute_p (tree fndecl,
3761 tree ARG_UNUSED (name),
3763 int ARG_UNUSED (flags))
3765 struct cl_target_option cur_target;
3767 tree old_optimize = build_optimization_node ();
3768 tree new_target, new_optimize;
3769 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3771 /* If the function changed the optimization levels as well as setting target
3772 options, start with the optimizations specified. */
3773 if (func_optimize && func_optimize != old_optimize)
3774 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3776 /* The target attributes may also change some optimization flags, so update
3777 the optimization options if necessary. */
3778 cl_target_option_save (&cur_target);
3779 new_target = ix86_valid_target_attribute_tree (args);
3780 new_optimize = build_optimization_node ();
3787 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3789 if (old_optimize != new_optimize)
3790 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3793 cl_target_option_restore (&cur_target);
3795 if (old_optimize != new_optimize)
3796 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3802 /* Hook to determine if one function can safely inline another. */
3805 ix86_can_inline_p (tree caller, tree callee)
3808 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3809 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3811 /* If callee has no option attributes, then it is ok to inline. */
3815 /* If caller has no option attributes, but callee does then it is not ok to
3817 else if (!caller_tree)
3822 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3823 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3825 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3826 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3828 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3829 != callee_opts->ix86_isa_flags)
3832 /* See if we have the same non-isa options. */
3833 else if (caller_opts->target_flags != callee_opts->target_flags)
3836 /* See if arch, tune, etc. are the same. */
3837 else if (caller_opts->arch != callee_opts->arch)
3840 else if (caller_opts->tune != callee_opts->tune)
3843 else if (caller_opts->fpmath != callee_opts->fpmath)
3846 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3857 /* Remember the last target of ix86_set_current_function. */
3858 static GTY(()) tree ix86_previous_fndecl;
3860 /* Establish appropriate back-end context for processing the function
3861 FNDECL. The argument might be NULL to indicate processing at top
3862 level, outside of any function scope. */
3864 ix86_set_current_function (tree fndecl)
3866 /* Only change the context if the function changes. This hook is called
3867 several times in the course of compiling a function, and we don't want to
3868 slow things down too much or call target_reinit when it isn't safe. */
3869 if (fndecl && fndecl != ix86_previous_fndecl)
3871 tree old_tree = (ix86_previous_fndecl
3872 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3875 tree new_tree = (fndecl
3876 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3879 ix86_previous_fndecl = fndecl;
3880 if (old_tree == new_tree)
3885 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3891 struct cl_target_option *def
3892 = TREE_TARGET_OPTION (target_option_current_node);
3894 cl_target_option_restore (def);
3901 /* Return true if this goes in large data/bss. */
3904 ix86_in_large_data_p (tree exp)
3906 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3909 /* Functions are never large data. */
3910 if (TREE_CODE (exp) == FUNCTION_DECL)
3913 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3915 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3916 if (strcmp (section, ".ldata") == 0
3917 || strcmp (section, ".lbss") == 0)
3923 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3925 /* If this is an incomplete type with size 0, then we can't put it
3926 in data because it might be too big when completed. */
3927 if (!size || size > ix86_section_threshold)
3934 /* Switch to the appropriate section for output of DECL.
3935 DECL is either a `VAR_DECL' node or a constant of some sort.
3936 RELOC indicates whether forming the initial value of DECL requires
3937 link-time relocations. */
3939 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3943 x86_64_elf_select_section (tree decl, int reloc,
3944 unsigned HOST_WIDE_INT align)
3946 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3947 && ix86_in_large_data_p (decl))
3949 const char *sname = NULL;
3950 unsigned int flags = SECTION_WRITE;
3951 switch (categorize_decl_for_section (decl, reloc))
3956 case SECCAT_DATA_REL:
3957 sname = ".ldata.rel";
3959 case SECCAT_DATA_REL_LOCAL:
3960 sname = ".ldata.rel.local";
3962 case SECCAT_DATA_REL_RO:
3963 sname = ".ldata.rel.ro";
3965 case SECCAT_DATA_REL_RO_LOCAL:
3966 sname = ".ldata.rel.ro.local";
3970 flags |= SECTION_BSS;
3973 case SECCAT_RODATA_MERGE_STR:
3974 case SECCAT_RODATA_MERGE_STR_INIT:
3975 case SECCAT_RODATA_MERGE_CONST:
3979 case SECCAT_SRODATA:
3986 /* We don't split these for medium model. Place them into
3987 default sections and hope for best. */
3989 case SECCAT_EMUTLS_VAR:
3990 case SECCAT_EMUTLS_TMPL:
3995 /* We might get called with string constants, but get_named_section
3996 doesn't like them as they are not DECLs. Also, we need to set
3997 flags in that case. */
3999 return get_section (sname, flags, NULL);
4000 return get_named_section (decl, sname, reloc);
4003 return default_elf_select_section (decl, reloc, align);
4006 /* Build up a unique section name, expressed as a
4007 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4008 RELOC indicates whether the initial value of EXP requires
4009 link-time relocations. */
4011 static void ATTRIBUTE_UNUSED
4012 x86_64_elf_unique_section (tree decl, int reloc)
4014 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4015 && ix86_in_large_data_p (decl))
4017 const char *prefix = NULL;
4018 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4019 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4021 switch (categorize_decl_for_section (decl, reloc))
4024 case SECCAT_DATA_REL:
4025 case SECCAT_DATA_REL_LOCAL:
4026 case SECCAT_DATA_REL_RO:
4027 case SECCAT_DATA_REL_RO_LOCAL:
4028 prefix = one_only ? ".ld" : ".ldata";
4031 prefix = one_only ? ".lb" : ".lbss";
4034 case SECCAT_RODATA_MERGE_STR:
4035 case SECCAT_RODATA_MERGE_STR_INIT:
4036 case SECCAT_RODATA_MERGE_CONST:
4037 prefix = one_only ? ".lr" : ".lrodata";
4039 case SECCAT_SRODATA:
4046 /* We don't split these for medium model. Place them into
4047 default sections and hope for best. */
4049 case SECCAT_EMUTLS_VAR:
4050 prefix = targetm.emutls.var_section;
4052 case SECCAT_EMUTLS_TMPL:
4053 prefix = targetm.emutls.tmpl_section;
4058 const char *name, *linkonce;
4061 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4062 name = targetm.strip_name_encoding (name);
4064 /* If we're using one_only, then there needs to be a .gnu.linkonce
4065 prefix to the section name. */
4066 linkonce = one_only ? ".gnu.linkonce" : "";
4068 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4070 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4074 default_unique_section (decl, reloc);
4077 #ifdef COMMON_ASM_OP
4078 /* This says how to output assembler code to declare an
4079 uninitialized external linkage data object.
4081 For medium model x86-64 we need to use .largecomm opcode for
4084 x86_elf_aligned_common (FILE *file,
4085 const char *name, unsigned HOST_WIDE_INT size,
4088 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4089 && size > (unsigned int)ix86_section_threshold)
4090 fprintf (file, ".largecomm\t");
4092 fprintf (file, "%s", COMMON_ASM_OP);
4093 assemble_name (file, name);
4094 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4095 size, align / BITS_PER_UNIT);
4099 /* Utility function for targets to use in implementing
4100 ASM_OUTPUT_ALIGNED_BSS. */
4103 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4104 const char *name, unsigned HOST_WIDE_INT size,
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 switch_to_section (get_named_section (decl, ".lbss", 0));
4111 switch_to_section (bss_section);
4112 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4113 #ifdef ASM_DECLARE_OBJECT_NAME
4114 last_assemble_variable_decl = decl;
4115 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4117 /* Standard thing is just output label for the object. */
4118 ASM_OUTPUT_LABEL (file, name);
4119 #endif /* ASM_DECLARE_OBJECT_NAME */
4120 ASM_OUTPUT_SKIP (file, size ? size : 1);
4124 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4126 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4127 make the problem with not enough registers even worse. */
4128 #ifdef INSN_SCHEDULING
4130 flag_schedule_insns = 0;
4134 /* The Darwin libraries never set errno, so we might as well
4135 avoid calling them when that's the only reason we would. */
4136 flag_errno_math = 0;
4138 /* The default values of these switches depend on the TARGET_64BIT
4139 that is not known at this moment. Mark these values with 2 and
4140 let user the to override these. In case there is no command line option
4141 specifying them, we will set the defaults in override_options. */
4143 flag_omit_frame_pointer = 2;
4144 flag_pcc_struct_return = 2;
4145 flag_asynchronous_unwind_tables = 2;
4146 flag_vect_cost_model = 1;
4147 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4148 SUBTARGET_OPTIMIZATION_OPTIONS;
4152 /* Decide whether we can make a sibling call to a function. DECL is the
4153 declaration of the function being targeted by the call and EXP is the
4154 CALL_EXPR representing the call. */
4157 ix86_function_ok_for_sibcall (tree decl, tree exp)
4162 /* If we are generating position-independent code, we cannot sibcall
4163 optimize any indirect call, or a direct call to a global function,
4164 as the PLT requires %ebx be live. */
4165 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4172 func = TREE_TYPE (CALL_EXPR_FN (exp));
4173 if (POINTER_TYPE_P (func))
4174 func = TREE_TYPE (func);
4177 /* Check that the return value locations are the same. Like
4178 if we are returning floats on the 80387 register stack, we cannot
4179 make a sibcall from a function that doesn't return a float to a
4180 function that does or, conversely, from a function that does return
4181 a float to a function that doesn't; the necessary stack adjustment
4182 would not be executed. This is also the place we notice
4183 differences in the return value ABI. Note that it is ok for one
4184 of the functions to have void return type as long as the return
4185 value of the other is passed in a register. */
4186 a = ix86_function_value (TREE_TYPE (exp), func, false);
4187 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4189 if (STACK_REG_P (a) || STACK_REG_P (b))
4191 if (!rtx_equal_p (a, b))
4194 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4196 else if (!rtx_equal_p (a, b))
4199 /* If this call is indirect, we'll need to be able to use a call-clobbered
4200 register for the address of the target function. Make sure that all
4201 such registers are not used for passing parameters. */
4202 if (!decl && !TARGET_64BIT)
4206 /* We're looking at the CALL_EXPR, we need the type of the function. */
4207 type = CALL_EXPR_FN (exp); /* pointer expression */
4208 type = TREE_TYPE (type); /* pointer type */
4209 type = TREE_TYPE (type); /* function type */
4211 if (ix86_function_regparm (type, NULL) >= 3)
4213 /* ??? Need to count the actual number of registers to be used,
4214 not the possible number of registers. Fix later. */
4219 /* Dllimport'd functions are also called indirectly. */
4220 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4222 && decl && DECL_DLLIMPORT_P (decl)
4223 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4226 /* If we need to align the outgoing stack, then sibcalling would
4227 unalign the stack, which may break the called function. */
4228 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4231 /* Otherwise okay. That also includes certain types of indirect calls. */
4235 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4236 calling convention attributes;
4237 arguments as in struct attribute_spec.handler. */
4240 ix86_handle_cconv_attribute (tree *node, tree name,
4242 int flags ATTRIBUTE_UNUSED,
4245 if (TREE_CODE (*node) != FUNCTION_TYPE
4246 && TREE_CODE (*node) != METHOD_TYPE
4247 && TREE_CODE (*node) != FIELD_DECL
4248 && TREE_CODE (*node) != TYPE_DECL)
4250 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4251 IDENTIFIER_POINTER (name));
4252 *no_add_attrs = true;
4256 /* Can combine regparm with all attributes but fastcall. */
4257 if (is_attribute_p ("regparm", name))
4261 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4263 error ("fastcall and regparm attributes are not compatible");
4266 cst = TREE_VALUE (args);
4267 if (TREE_CODE (cst) != INTEGER_CST)
4269 warning (OPT_Wattributes,
4270 "%qs attribute requires an integer constant argument",
4271 IDENTIFIER_POINTER (name));
4272 *no_add_attrs = true;
4274 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4276 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4277 IDENTIFIER_POINTER (name), REGPARM_MAX);
4278 *no_add_attrs = true;
4286 /* Do not warn when emulating the MS ABI. */
4287 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4288 warning (OPT_Wattributes, "%qs attribute ignored",
4289 IDENTIFIER_POINTER (name));
4290 *no_add_attrs = true;
4294 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4295 if (is_attribute_p ("fastcall", name))
4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4299 error ("fastcall and cdecl attributes are not compatible");
4301 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4303 error ("fastcall and stdcall attributes are not compatible");
4305 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4307 error ("fastcall and regparm attributes are not compatible");
4311 /* Can combine stdcall with fastcall (redundant), regparm and
4313 else if (is_attribute_p ("stdcall", name))
4315 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4317 error ("stdcall and cdecl attributes are not compatible");
4319 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4321 error ("stdcall and fastcall attributes are not compatible");
4325 /* Can combine cdecl with regparm and sseregparm. */
4326 else if (is_attribute_p ("cdecl", name))
4328 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4330 error ("stdcall and cdecl attributes are not compatible");
4332 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4334 error ("fastcall and cdecl attributes are not compatible");
4338 /* Can combine sseregparm with all attributes. */
4343 /* Return 0 if the attributes for two types are incompatible, 1 if they
4344 are compatible, and 2 if they are nearly compatible (which causes a
4345 warning to be generated). */
4348 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4350 /* Check for mismatch of non-default calling convention. */
4351 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4353 if (TREE_CODE (type1) != FUNCTION_TYPE
4354 && TREE_CODE (type1) != METHOD_TYPE)
4357 /* Check for mismatched fastcall/regparm types. */
4358 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4359 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4360 || (ix86_function_regparm (type1, NULL)
4361 != ix86_function_regparm (type2, NULL)))
4364 /* Check for mismatched sseregparm types. */
4365 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4366 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4369 /* Check for mismatched return types (cdecl vs stdcall). */
4370 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4371 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4377 /* Return the regparm value for a function with the indicated TYPE and DECL.
4378 DECL may be NULL when calling function indirectly
4379 or considering a libcall. */
4382 ix86_function_regparm (const_tree type, const_tree decl)
4387 static bool error_issued;
4390 return (ix86_function_type_abi (type) == SYSV_ABI
4391 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4393 regparm = ix86_regparm;
4394 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4398 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4400 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4402 /* We can't use regparm(3) for nested functions because
4403 these pass static chain pointer in %ecx register. */
4404 if (!error_issued && regparm == 3
4405 && decl_function_context (decl)
4406 && !DECL_NO_STATIC_CHAIN (decl))
4408 error ("nested functions are limited to 2 register parameters");
4409 error_issued = true;
4417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4420 /* Use register calling convention for local functions when possible. */
4422 && TREE_CODE (decl) == FUNCTION_DECL
4426 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4427 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4430 int local_regparm, globals = 0, regno;
4433 /* Make sure no regparm register is taken by a
4434 fixed register variable. */
4435 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4436 if (fixed_regs[local_regparm])
4439 /* We can't use regparm(3) for nested functions as these use
4440 static chain pointer in third argument. */
4441 if (local_regparm == 3
4442 && decl_function_context (decl)
4443 && !DECL_NO_STATIC_CHAIN (decl))
4446 /* If the function realigns its stackpointer, the prologue will
4447 clobber %ecx. If we've already generated code for the callee,
4448 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4449 scanning the attributes for the self-realigning property. */
4450 f = DECL_STRUCT_FUNCTION (decl);
4451 /* Since current internal arg pointer won't conflict with
4452 parameter passing regs, so no need to change stack
4453 realignment and adjust regparm number.
4455 Each fixed register usage increases register pressure,
4456 so less registers should be used for argument passing.
4457 This functionality can be overriden by an explicit
4459 for (regno = 0; regno <= DI_REG; regno++)
4460 if (fixed_regs[regno])
4464 = globals < local_regparm ? local_regparm - globals : 0;
4466 if (local_regparm > regparm)
4467 regparm = local_regparm;
4474 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4475 DFmode (2) arguments in SSE registers for a function with the
4476 indicated TYPE and DECL. DECL may be NULL when calling function
4477 indirectly or considering a libcall. Otherwise return 0. */
4480 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4482 gcc_assert (!TARGET_64BIT);
4484 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4485 by the sseregparm attribute. */
4486 if (TARGET_SSEREGPARM
4487 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4494 error ("Calling %qD with attribute sseregparm without "
4495 "SSE/SSE2 enabled", decl);
4497 error ("Calling %qT with attribute sseregparm without "
4498 "SSE/SSE2 enabled", type);
4506 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4507 (and DFmode for SSE2) arguments in SSE registers. */
4508 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4510 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4511 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4513 return TARGET_SSE2 ? 2 : 1;
4519 /* Return true if EAX is live at the start of the function. Used by
4520 ix86_expand_prologue to determine if we need special help before
4521 calling allocate_stack_worker. */
4524 ix86_eax_live_at_start_p (void)
4526 /* Cheat. Don't bother working forward from ix86_function_regparm
4527 to the function type to whether an actual argument is located in
4528 eax. Instead just look at cfg info, which is still close enough
4529 to correct at this point. This gives false positives for broken
4530 functions that might use uninitialized data that happens to be
4531 allocated in eax, but who cares? */
4532 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4535 /* Value is the number of bytes of arguments automatically
4536 popped when returning from a subroutine call.
4537 FUNDECL is the declaration node of the function (as a tree),
4538 FUNTYPE is the data type of the function (as a tree),
4539 or for a library call it is an identifier node for the subroutine name.
4540 SIZE is the number of bytes of arguments passed on the stack.
4542 On the 80386, the RTD insn may be used to pop them if the number
4543 of args is fixed, but if the number is variable then the caller
4544 must pop them all. RTD can't be used for library calls now
4545 because the library is compiled with the Unix compiler.
4546 Use of RTD is a selectable option, since it is incompatible with
4547 standard Unix calling sequences. If the option is not selected,
4548 the caller must always pop the args.
4550 The attribute stdcall is equivalent to RTD on a per module basis. */
4553 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4557 /* None of the 64-bit ABIs pop arguments. */
4561 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4563 /* Cdecl functions override -mrtd, and never pop the stack. */
4564 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4566 /* Stdcall and fastcall functions will pop the stack if not
4568 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4569 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4572 if (rtd && ! stdarg_p (funtype))
4576 /* Lose any fake structure return argument if it is passed on the stack. */
4577 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4578 && !KEEP_AGGREGATE_RETURN_POINTER)
4580 int nregs = ix86_function_regparm (funtype, fundecl);
4582 return GET_MODE_SIZE (Pmode);
4588 /* Argument support functions. */
4590 /* Return true when register may be used to pass function parameters. */
4592 ix86_function_arg_regno_p (int regno)
4595 const int *parm_regs;
4600 return (regno < REGPARM_MAX
4601 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4603 return (regno < REGPARM_MAX
4604 || (TARGET_MMX && MMX_REGNO_P (regno)
4605 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4606 || (TARGET_SSE && SSE_REGNO_P (regno)
4607 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4612 if (SSE_REGNO_P (regno) && TARGET_SSE)
4617 if (TARGET_SSE && SSE_REGNO_P (regno)
4618 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4622 /* TODO: The function should depend on current function ABI but
4623 builtins.c would need updating then. Therefore we use the
4626 /* RAX is used as hidden argument to va_arg functions. */
4627 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4630 if (ix86_abi == MS_ABI)
4631 parm_regs = x86_64_ms_abi_int_parameter_registers;
4633 parm_regs = x86_64_int_parameter_registers;
4634 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4635 : X86_64_REGPARM_MAX); i++)
4636 if (regno == parm_regs[i])
4641 /* Return if we do not know how to pass TYPE solely in registers. */
4644 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4646 if (must_pass_in_stack_var_size_or_pad (mode, type))
4649 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4650 The layout_type routine is crafty and tries to trick us into passing
4651 currently unsupported vector types on the stack by using TImode. */
4652 return (!TARGET_64BIT && mode == TImode
4653 && type && TREE_CODE (type) != VECTOR_TYPE);
4656 /* It returns the size, in bytes, of the area reserved for arguments passed
4657 in registers for the function represented by fndecl dependent to the used
4660 ix86_reg_parm_stack_space (const_tree fndecl)
4662 enum calling_abi call_abi = SYSV_ABI;
4663 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4664 call_abi = ix86_function_abi (fndecl);
4666 call_abi = ix86_function_type_abi (fndecl);
4667 if (call_abi == MS_ABI)
4672 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4675 ix86_function_type_abi (const_tree fntype)
4677 if (TARGET_64BIT && fntype != NULL)
4679 enum calling_abi abi = ix86_abi;
4680 if (abi == SYSV_ABI)
4682 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4685 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4692 static enum calling_abi
4693 ix86_function_abi (const_tree fndecl)
4697 return ix86_function_type_abi (TREE_TYPE (fndecl));
4700 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4703 ix86_cfun_abi (void)
4705 if (! cfun || ! TARGET_64BIT)
4707 return cfun->machine->call_abi;
4711 extern void init_regs (void);
4713 /* Implementation of call abi switching target hook. Specific to FNDECL
4714 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4715 for more details. */
4717 ix86_call_abi_override (const_tree fndecl)
4719 if (fndecl == NULL_TREE)
4720 cfun->machine->call_abi = ix86_abi;
4722 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4725 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4726 re-initialization of init_regs each time we switch function context since
4727 this is needed only during RTL expansion. */
4729 ix86_maybe_switch_abi (void)
4732 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4736 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4737 for a call to a function whose data type is FNTYPE.
4738 For a library call, FNTYPE is 0. */
4741 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4742 tree fntype, /* tree ptr for function decl */
4743 rtx libname, /* SYMBOL_REF of library name or 0 */
4746 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4747 memset (cum, 0, sizeof (*cum));
4750 cum->call_abi = ix86_function_abi (fndecl);
4752 cum->call_abi = ix86_function_type_abi (fntype);
4753 /* Set up the number of registers to use for passing arguments. */
4755 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4756 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4757 cum->nregs = ix86_regparm;
4760 if (cum->call_abi != ix86_abi)
4761 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4766 cum->sse_nregs = SSE_REGPARM_MAX;
4769 if (cum->call_abi != ix86_abi)
4770 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4771 : X64_SSE_REGPARM_MAX;
4775 cum->mmx_nregs = MMX_REGPARM_MAX;
4776 cum->warn_avx = true;
4777 cum->warn_sse = true;
4778 cum->warn_mmx = true;
4780 /* Because type might mismatch in between caller and callee, we need to
4781 use actual type of function for local calls.
4782 FIXME: cgraph_analyze can be told to actually record if function uses
4783 va_start so for local functions maybe_vaarg can be made aggressive
4785 FIXME: once typesytem is fixed, we won't need this code anymore. */
4787 fntype = TREE_TYPE (fndecl);
4788 cum->maybe_vaarg = (fntype
4789 ? (!prototype_p (fntype) || stdarg_p (fntype))
4794 /* If there are variable arguments, then we won't pass anything
4795 in registers in 32-bit mode. */
4796 if (stdarg_p (fntype))
4807 /* Use ecx and edx registers if function has fastcall attribute,
4808 else look for regparm information. */
4811 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4817 cum->nregs = ix86_function_regparm (fntype, fndecl);
4820 /* Set up the number of SSE registers used for passing SFmode
4821 and DFmode arguments. Warn for mismatching ABI. */
4822 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4826 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4827 But in the case of vector types, it is some vector mode.
4829 When we have only some of our vector isa extensions enabled, then there
4830 are some modes for which vector_mode_supported_p is false. For these
4831 modes, the generic vector support in gcc will choose some non-vector mode
4832 in order to implement the type. By computing the natural mode, we'll
4833 select the proper ABI location for the operand and not depend on whatever
4834 the middle-end decides to do with these vector types.
4836 The midde-end can't deal with the vector types > 16 bytes. In this
4837 case, we return the original mode and warn ABI change if CUM isn't
4840 static enum machine_mode
4841 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4843 enum machine_mode mode = TYPE_MODE (type);
4845 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4847 HOST_WIDE_INT size = int_size_in_bytes (type);
4848 if ((size == 8 || size == 16 || size == 32)
4849 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4850 && TYPE_VECTOR_SUBPARTS (type) > 1)
4852 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4854 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4855 mode = MIN_MODE_VECTOR_FLOAT;
4857 mode = MIN_MODE_VECTOR_INT;
4859 /* Get the mode which has this inner mode and number of units. */
4860 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4861 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4862 && GET_MODE_INNER (mode) == innermode)
4864 if (size == 32 && !TARGET_AVX)
4866 static bool warnedavx;
4873 warning (0, "AVX vector argument without AVX "
4874 "enabled changes the ABI");
4876 return TYPE_MODE (type);
4889 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4890 this may not agree with the mode that the type system has chosen for the
4891 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4892 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4895 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4900 if (orig_mode != BLKmode)
4901 tmp = gen_rtx_REG (orig_mode, regno);
4904 tmp = gen_rtx_REG (mode, regno);
4905 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4906 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4912 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4913 of this code is to classify each 8bytes of incoming argument by the register
4914 class and assign registers accordingly. */
4916 /* Return the union class of CLASS1 and CLASS2.
4917 See the x86-64 PS ABI for details. */
4919 static enum x86_64_reg_class
4920 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4922 /* Rule #1: If both classes are equal, this is the resulting class. */
4923 if (class1 == class2)
4926 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4928 if (class1 == X86_64_NO_CLASS)
4930 if (class2 == X86_64_NO_CLASS)
4933 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4934 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4935 return X86_64_MEMORY_CLASS;
4937 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4938 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4939 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4940 return X86_64_INTEGERSI_CLASS;
4941 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4942 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4943 return X86_64_INTEGER_CLASS;
4945 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4947 if (class1 == X86_64_X87_CLASS
4948 || class1 == X86_64_X87UP_CLASS
4949 || class1 == X86_64_COMPLEX_X87_CLASS
4950 || class2 == X86_64_X87_CLASS
4951 || class2 == X86_64_X87UP_CLASS
4952 || class2 == X86_64_COMPLEX_X87_CLASS)
4953 return X86_64_MEMORY_CLASS;
4955 /* Rule #6: Otherwise class SSE is used. */
4956 return X86_64_SSE_CLASS;
4959 /* Classify the argument of type TYPE and mode MODE.
4960 CLASSES will be filled by the register class used to pass each word
4961 of the operand. The number of words is returned. In case the parameter
4962 should be passed in memory, 0 is returned. As a special case for zero
4963 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4965 BIT_OFFSET is used internally for handling records and specifies offset
4966 of the offset in bits modulo 256 to avoid overflow cases.
4968 See the x86-64 PS ABI for details.
4972 classify_argument (enum machine_mode mode, const_tree type,
4973 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4975 HOST_WIDE_INT bytes =
4976 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4977 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4979 /* Variable sized entities are always passed/returned in memory. */
4983 if (mode != VOIDmode
4984 && targetm.calls.must_pass_in_stack (mode, type))
4987 if (type && AGGREGATE_TYPE_P (type))
4991 enum x86_64_reg_class subclasses[MAX_CLASSES];
4993 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4997 for (i = 0; i < words; i++)
4998 classes[i] = X86_64_NO_CLASS;
5000 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5001 signalize memory class, so handle it as special case. */
5004 classes[0] = X86_64_NO_CLASS;
5008 /* Classify each field of record and merge classes. */
5009 switch (TREE_CODE (type))
5012 /* And now merge the fields of structure. */
5013 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5015 if (TREE_CODE (field) == FIELD_DECL)
5019 if (TREE_TYPE (field) == error_mark_node)
5022 /* Bitfields are always classified as integer. Handle them
5023 early, since later code would consider them to be
5024 misaligned integers. */
5025 if (DECL_BIT_FIELD (field))
5027 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5028 i < ((int_bit_position (field) + (bit_offset % 64))
5029 + tree_low_cst (DECL_SIZE (field), 0)
5032 merge_classes (X86_64_INTEGER_CLASS,
5039 type = TREE_TYPE (field);
5041 /* Flexible array member is ignored. */
5042 if (TYPE_MODE (type) == BLKmode
5043 && TREE_CODE (type) == ARRAY_TYPE
5044 && TYPE_SIZE (type) == NULL_TREE
5045 && TYPE_DOMAIN (type) != NULL_TREE
5046 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5051 if (!warned && warn_psabi)
5054 inform (input_location,
5055 "The ABI of passing struct with"
5056 " a flexible array member has"
5057 " changed in GCC 4.4");
5061 num = classify_argument (TYPE_MODE (type), type,
5063 (int_bit_position (field)
5064 + bit_offset) % 256);
5067 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5068 for (i = 0; i < num && (i + pos) < words; i++)
5070 merge_classes (subclasses[i], classes[i + pos]);
5077 /* Arrays are handled as small records. */
5080 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5081 TREE_TYPE (type), subclasses, bit_offset);
5085 /* The partial classes are now full classes. */
5086 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5087 subclasses[0] = X86_64_SSE_CLASS;
5088 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5089 && !((bit_offset % 64) == 0 && bytes == 4))
5090 subclasses[0] = X86_64_INTEGER_CLASS;
5092 for (i = 0; i < words; i++)
5093 classes[i] = subclasses[i % num];
5098 case QUAL_UNION_TYPE:
5099 /* Unions are similar to RECORD_TYPE but offset is always 0.
5101 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5103 if (TREE_CODE (field) == FIELD_DECL)
5107 if (TREE_TYPE (field) == error_mark_node)
5110 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5111 TREE_TYPE (field), subclasses,
5115 for (i = 0; i < num; i++)
5116 classes[i] = merge_classes (subclasses[i], classes[i]);
5127 /* When size > 16 bytes, if the first one isn't
5128 X86_64_SSE_CLASS or any other ones aren't
5129 X86_64_SSEUP_CLASS, everything should be passed in
5131 if (classes[0] != X86_64_SSE_CLASS)
5134 for (i = 1; i < words; i++)
5135 if (classes[i] != X86_64_SSEUP_CLASS)
5139 /* Final merger cleanup. */
5140 for (i = 0; i < words; i++)
5142 /* If one class is MEMORY, everything should be passed in
5144 if (classes[i] == X86_64_MEMORY_CLASS)
5147 /* The X86_64_SSEUP_CLASS should be always preceded by
5148 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5149 if (classes[i] == X86_64_SSEUP_CLASS
5150 && classes[i - 1] != X86_64_SSE_CLASS
5151 && classes[i - 1] != X86_64_SSEUP_CLASS)
5153 /* The first one should never be X86_64_SSEUP_CLASS. */
5154 gcc_assert (i != 0);
5155 classes[i] = X86_64_SSE_CLASS;
5158 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5159 everything should be passed in memory. */
5160 if (classes[i] == X86_64_X87UP_CLASS
5161 && (classes[i - 1] != X86_64_X87_CLASS))
5165 /* The first one should never be X86_64_X87UP_CLASS. */
5166 gcc_assert (i != 0);
5167 if (!warned && warn_psabi)
5170 inform (input_location,
5171 "The ABI of passing union with long double"
5172 " has changed in GCC 4.4");
5180 /* Compute alignment needed. We align all types to natural boundaries with
5181 exception of XFmode that is aligned to 64bits. */
5182 if (mode != VOIDmode && mode != BLKmode)
5184 int mode_alignment = GET_MODE_BITSIZE (mode);
5187 mode_alignment = 128;
5188 else if (mode == XCmode)
5189 mode_alignment = 256;
5190 if (COMPLEX_MODE_P (mode))
5191 mode_alignment /= 2;
5192 /* Misaligned fields are always returned in memory. */
5193 if (bit_offset % mode_alignment)
5197 /* for V1xx modes, just use the base mode */
5198 if (VECTOR_MODE_P (mode) && mode != V1DImode
5199 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5200 mode = GET_MODE_INNER (mode);
5202 /* Classification of atomic types. */
5207 classes[0] = X86_64_SSE_CLASS;
5210 classes[0] = X86_64_SSE_CLASS;
5211 classes[1] = X86_64_SSEUP_CLASS;
5221 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5225 classes[0] = X86_64_INTEGERSI_CLASS;
5228 else if (size <= 64)
5230 classes[0] = X86_64_INTEGER_CLASS;
5233 else if (size <= 64+32)
5235 classes[0] = X86_64_INTEGER_CLASS;
5236 classes[1] = X86_64_INTEGERSI_CLASS;
5239 else if (size <= 64+64)
5241 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5249 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5253 /* OImode shouldn't be used directly. */
5258 if (!(bit_offset % 64))
5259 classes[0] = X86_64_SSESF_CLASS;
5261 classes[0] = X86_64_SSE_CLASS;
5264 classes[0] = X86_64_SSEDF_CLASS;
5267 classes[0] = X86_64_X87_CLASS;
5268 classes[1] = X86_64_X87UP_CLASS;
5271 classes[0] = X86_64_SSE_CLASS;
5272 classes[1] = X86_64_SSEUP_CLASS;
5275 classes[0] = X86_64_SSE_CLASS;
5276 if (!(bit_offset % 64))
5282 if (!warned && warn_psabi)
5285 inform (input_location,
5286 "The ABI of passing structure with complex float"
5287 " member has changed in GCC 4.4");
5289 classes[1] = X86_64_SSESF_CLASS;
5293 classes[0] = X86_64_SSEDF_CLASS;
5294 classes[1] = X86_64_SSEDF_CLASS;
5297 classes[0] = X86_64_COMPLEX_X87_CLASS;
5300 /* This modes is larger than 16 bytes. */
5308 classes[0] = X86_64_SSE_CLASS;
5309 classes[1] = X86_64_SSEUP_CLASS;
5310 classes[2] = X86_64_SSEUP_CLASS;
5311 classes[3] = X86_64_SSEUP_CLASS;
5319 classes[0] = X86_64_SSE_CLASS;
5320 classes[1] = X86_64_SSEUP_CLASS;
5327 classes[0] = X86_64_SSE_CLASS;
5333 gcc_assert (VECTOR_MODE_P (mode));
5338 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5340 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5341 classes[0] = X86_64_INTEGERSI_CLASS;
5343 classes[0] = X86_64_INTEGER_CLASS;
5344 classes[1] = X86_64_INTEGER_CLASS;
5345 return 1 + (bytes > 8);
5349 /* Examine the argument and return set number of register required in each
5350 class. Return 0 iff parameter should be passed in memory. */
5352 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5353 int *int_nregs, int *sse_nregs)
5355 enum x86_64_reg_class regclass[MAX_CLASSES];
5356 int n = classify_argument (mode, type, regclass, 0);
5362 for (n--; n >= 0; n--)
5363 switch (regclass[n])
5365 case X86_64_INTEGER_CLASS:
5366 case X86_64_INTEGERSI_CLASS:
5369 case X86_64_SSE_CLASS:
5370 case X86_64_SSESF_CLASS:
5371 case X86_64_SSEDF_CLASS:
5374 case X86_64_NO_CLASS:
5375 case X86_64_SSEUP_CLASS:
5377 case X86_64_X87_CLASS:
5378 case X86_64_X87UP_CLASS:
5382 case X86_64_COMPLEX_X87_CLASS:
5383 return in_return ? 2 : 0;
5384 case X86_64_MEMORY_CLASS:
5390 /* Construct container for the argument used by GCC interface. See
5391 FUNCTION_ARG for the detailed description. */
5394 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5395 const_tree type, int in_return, int nintregs, int nsseregs,
5396 const int *intreg, int sse_regno)
5398 /* The following variables hold the static issued_error state. */
5399 static bool issued_sse_arg_error;
5400 static bool issued_sse_ret_error;
5401 static bool issued_x87_ret_error;
5403 enum machine_mode tmpmode;
5405 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5406 enum x86_64_reg_class regclass[MAX_CLASSES];
5410 int needed_sseregs, needed_intregs;
5411 rtx exp[MAX_CLASSES];
5414 n = classify_argument (mode, type, regclass, 0);
5417 if (!examine_argument (mode, type, in_return, &needed_intregs,
5420 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5423 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5424 some less clueful developer tries to use floating-point anyway. */
5425 if (needed_sseregs && !TARGET_SSE)
5429 if (!issued_sse_ret_error)
5431 error ("SSE register return with SSE disabled");
5432 issued_sse_ret_error = true;
5435 else if (!issued_sse_arg_error)
5437 error ("SSE register argument with SSE disabled");
5438 issued_sse_arg_error = true;
5443 /* Likewise, error if the ABI requires us to return values in the
5444 x87 registers and the user specified -mno-80387. */
5445 if (!TARGET_80387 && in_return)
5446 for (i = 0; i < n; i++)
5447 if (regclass[i] == X86_64_X87_CLASS
5448 || regclass[i] == X86_64_X87UP_CLASS
5449 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5451 if (!issued_x87_ret_error)
5453 error ("x87 register return with x87 disabled");
5454 issued_x87_ret_error = true;
5459 /* First construct simple cases. Avoid SCmode, since we want to use
5460 single register to pass this type. */
5461 if (n == 1 && mode != SCmode)
5462 switch (regclass[0])
5464 case X86_64_INTEGER_CLASS:
5465 case X86_64_INTEGERSI_CLASS:
5466 return gen_rtx_REG (mode, intreg[0]);
5467 case X86_64_SSE_CLASS:
5468 case X86_64_SSESF_CLASS:
5469 case X86_64_SSEDF_CLASS:
5470 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5471 case X86_64_X87_CLASS:
5472 case X86_64_COMPLEX_X87_CLASS:
5473 return gen_rtx_REG (mode, FIRST_STACK_REG);
5474 case X86_64_NO_CLASS:
5475 /* Zero sized array, struct or class. */
5480 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5481 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5482 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5484 && regclass[0] == X86_64_SSE_CLASS
5485 && regclass[1] == X86_64_SSEUP_CLASS
5486 && regclass[2] == X86_64_SSEUP_CLASS
5487 && regclass[3] == X86_64_SSEUP_CLASS
5489 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5492 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5493 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5494 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5495 && regclass[1] == X86_64_INTEGER_CLASS
5496 && (mode == CDImode || mode == TImode || mode == TFmode)
5497 && intreg[0] + 1 == intreg[1])
5498 return gen_rtx_REG (mode, intreg[0]);
5500 /* Otherwise figure out the entries of the PARALLEL. */
5501 for (i = 0; i < n; i++)
5505 switch (regclass[i])
5507 case X86_64_NO_CLASS:
5509 case X86_64_INTEGER_CLASS:
5510 case X86_64_INTEGERSI_CLASS:
5511 /* Merge TImodes on aligned occasions here too. */
5512 if (i * 8 + 8 > bytes)
5513 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5514 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5518 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5519 if (tmpmode == BLKmode)
5521 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5522 gen_rtx_REG (tmpmode, *intreg),
5526 case X86_64_SSESF_CLASS:
5527 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5528 gen_rtx_REG (SFmode,
5529 SSE_REGNO (sse_regno)),
5533 case X86_64_SSEDF_CLASS:
5534 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5535 gen_rtx_REG (DFmode,
5536 SSE_REGNO (sse_regno)),
5540 case X86_64_SSE_CLASS:
5548 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5558 && regclass[1] == X86_64_SSEUP_CLASS
5559 && regclass[2] == X86_64_SSEUP_CLASS
5560 && regclass[3] == X86_64_SSEUP_CLASS);
5567 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5568 gen_rtx_REG (tmpmode,
5569 SSE_REGNO (sse_regno)),
5578 /* Empty aligned struct, union or class. */
5582 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5583 for (i = 0; i < nexps; i++)
5584 XVECEXP (ret, 0, i) = exp [i];
5588 /* Update the data in CUM to advance over an argument of mode MODE
5589 and data type TYPE. (TYPE is null for libcalls where that information
5590 may not be available.) */
5593 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5594 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5610 cum->words += words;
5611 cum->nregs -= words;
5612 cum->regno += words;
5614 if (cum->nregs <= 0)
5622 /* OImode shouldn't be used directly. */
5626 if (cum->float_in_sse < 2)
5629 if (cum->float_in_sse < 1)
5646 if (!type || !AGGREGATE_TYPE_P (type))
5648 cum->sse_words += words;
5649 cum->sse_nregs -= 1;
5650 cum->sse_regno += 1;
5651 if (cum->sse_nregs <= 0)
5664 if (!type || !AGGREGATE_TYPE_P (type))
5666 cum->mmx_words += words;
5667 cum->mmx_nregs -= 1;
5668 cum->mmx_regno += 1;
5669 if (cum->mmx_nregs <= 0)
5680 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5681 tree type, HOST_WIDE_INT words, int named)
5683 int int_nregs, sse_nregs;
5685 /* Unnamed 256bit vector mode parameters are passed on stack. */
5686 if (!named && VALID_AVX256_REG_MODE (mode))
5689 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5690 cum->words += words;
5691 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5693 cum->nregs -= int_nregs;
5694 cum->sse_nregs -= sse_nregs;
5695 cum->regno += int_nregs;
5696 cum->sse_regno += sse_nregs;
5699 cum->words += words;
5703 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5704 HOST_WIDE_INT words)
5706 /* Otherwise, this should be passed indirect. */
5707 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5709 cum->words += words;
5718 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5719 tree type, int named)
5721 HOST_WIDE_INT bytes, words;
5723 if (mode == BLKmode)
5724 bytes = int_size_in_bytes (type);
5726 bytes = GET_MODE_SIZE (mode);
5727 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5730 mode = type_natural_mode (type, NULL);
5732 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5733 function_arg_advance_ms_64 (cum, bytes, words);
5734 else if (TARGET_64BIT)
5735 function_arg_advance_64 (cum, mode, type, words, named);
5737 function_arg_advance_32 (cum, mode, type, bytes, words);
5740 /* Define where to put the arguments to a function.
5741 Value is zero to push the argument on the stack,
5742 or a hard register in which to store the argument.
5744 MODE is the argument's machine mode.
5745 TYPE is the data type of the argument (as a tree).
5746 This is null for libcalls where that information may
5748 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5749 the preceding args and about the function being called.
5750 NAMED is nonzero if this argument is a named parameter
5751 (otherwise it is an extra parameter matching an ellipsis). */
5754 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5755 enum machine_mode orig_mode, tree type,
5756 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5758 static bool warnedsse, warnedmmx;
5760 /* Avoid the AL settings for the Unix64 ABI. */
5761 if (mode == VOIDmode)
5777 if (words <= cum->nregs)
5779 int regno = cum->regno;
5781 /* Fastcall allocates the first two DWORD (SImode) or
5782 smaller arguments to ECX and EDX if it isn't an
5788 || (type && AGGREGATE_TYPE_P (type)))
5791 /* ECX not EAX is the first allocated register. */
5792 if (regno == AX_REG)
5795 return gen_rtx_REG (mode, regno);
5800 if (cum->float_in_sse < 2)
5803 if (cum->float_in_sse < 1)
5807 /* In 32bit, we pass TImode in xmm registers. */
5814 if (!type || !AGGREGATE_TYPE_P (type))
5816 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5819 warning (0, "SSE vector argument without SSE enabled "
5823 return gen_reg_or_parallel (mode, orig_mode,
5824 cum->sse_regno + FIRST_SSE_REG);
5829 /* OImode shouldn't be used directly. */
5838 if (!type || !AGGREGATE_TYPE_P (type))
5841 return gen_reg_or_parallel (mode, orig_mode,
5842 cum->sse_regno + FIRST_SSE_REG);
5851 if (!type || !AGGREGATE_TYPE_P (type))
5853 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5856 warning (0, "MMX vector argument without MMX enabled "
5860 return gen_reg_or_parallel (mode, orig_mode,
5861 cum->mmx_regno + FIRST_MMX_REG);
5870 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5871 enum machine_mode orig_mode, tree type, int named)
5873 /* Handle a hidden AL argument containing number of registers
5874 for varargs x86-64 functions. */
5875 if (mode == VOIDmode)
5876 return GEN_INT (cum->maybe_vaarg
5877 ? (cum->sse_nregs < 0
5878 ? (cum->call_abi == ix86_abi
5880 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5881 : X64_SSE_REGPARM_MAX))
5896 /* Unnamed 256bit vector mode parameters are passed on stack. */
5902 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5904 &x86_64_int_parameter_registers [cum->regno],
5909 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5910 enum machine_mode orig_mode, int named,
5911 HOST_WIDE_INT bytes)
5915 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5916 We use value of -2 to specify that current function call is MSABI. */
5917 if (mode == VOIDmode)
5918 return GEN_INT (-2);
5920 /* If we've run out of registers, it goes on the stack. */
5921 if (cum->nregs == 0)
5924 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5926 /* Only floating point modes are passed in anything but integer regs. */
5927 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5930 regno = cum->regno + FIRST_SSE_REG;
5935 /* Unnamed floating parameters are passed in both the
5936 SSE and integer registers. */
5937 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5938 t2 = gen_rtx_REG (mode, regno);
5939 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5940 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5941 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5944 /* Handle aggregated types passed in register. */
5945 if (orig_mode == BLKmode)
5947 if (bytes > 0 && bytes <= 8)
5948 mode = (bytes > 4 ? DImode : SImode);
5949 if (mode == BLKmode)
5953 return gen_reg_or_parallel (mode, orig_mode, regno);
5957 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5958 tree type, int named)
5960 enum machine_mode mode = omode;
5961 HOST_WIDE_INT bytes, words;
5963 if (mode == BLKmode)
5964 bytes = int_size_in_bytes (type);
5966 bytes = GET_MODE_SIZE (mode);
5967 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5969 /* To simplify the code below, represent vector types with a vector mode
5970 even if MMX/SSE are not active. */
5971 if (type && TREE_CODE (type) == VECTOR_TYPE)
5972 mode = type_natural_mode (type, cum);
5974 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5975 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5976 else if (TARGET_64BIT)
5977 return function_arg_64 (cum, mode, omode, type, named);
5979 return function_arg_32 (cum, mode, omode, type, bytes, words);
5982 /* A C expression that indicates when an argument must be passed by
5983 reference. If nonzero for an argument, a copy of that argument is
5984 made in memory and a pointer to the argument is passed instead of
5985 the argument itself. The pointer is passed in whatever way is
5986 appropriate for passing a pointer to that type. */
5989 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5990 enum machine_mode mode ATTRIBUTE_UNUSED,
5991 const_tree type, bool named ATTRIBUTE_UNUSED)
5993 /* See Windows x64 Software Convention. */
5994 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5996 int msize = (int) GET_MODE_SIZE (mode);
5999 /* Arrays are passed by reference. */
6000 if (TREE_CODE (type) == ARRAY_TYPE)
6003 if (AGGREGATE_TYPE_P (type))
6005 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6006 are passed by reference. */
6007 msize = int_size_in_bytes (type);
6011 /* __m128 is passed by reference. */
6013 case 1: case 2: case 4: case 8:
6019 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6025 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6028 contains_aligned_value_p (tree type)
6030 enum machine_mode mode = TYPE_MODE (type);
6031 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6035 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6037 if (TYPE_ALIGN (type) < 128)
6040 if (AGGREGATE_TYPE_P (type))
6042 /* Walk the aggregates recursively. */
6043 switch (TREE_CODE (type))
6047 case QUAL_UNION_TYPE:
6051 /* Walk all the structure fields. */
6052 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6054 if (TREE_CODE (field) == FIELD_DECL
6055 && contains_aligned_value_p (TREE_TYPE (field)))
6062 /* Just for use if some languages passes arrays by value. */
6063 if (contains_aligned_value_p (TREE_TYPE (type)))
6074 /* Gives the alignment boundary, in bits, of an argument with the
6075 specified mode and type. */
6078 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6083 /* Since canonical type is used for call, we convert it to
6084 canonical type if needed. */
6085 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6086 type = TYPE_CANONICAL (type);
6087 align = TYPE_ALIGN (type);
6090 align = GET_MODE_ALIGNMENT (mode);
6091 if (align < PARM_BOUNDARY)
6092 align = PARM_BOUNDARY;
6093 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6094 natural boundaries. */
6095 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6097 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6098 make an exception for SSE modes since these require 128bit
6101 The handling here differs from field_alignment. ICC aligns MMX
6102 arguments to 4 byte boundaries, while structure fields are aligned
6103 to 8 byte boundaries. */
6106 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6107 align = PARM_BOUNDARY;
6111 if (!contains_aligned_value_p (type))
6112 align = PARM_BOUNDARY;
6115 if (align > BIGGEST_ALIGNMENT)
6116 align = BIGGEST_ALIGNMENT;
6120 /* Return true if N is a possible register number of function value. */
6123 ix86_function_value_regno_p (int regno)
6130 case FIRST_FLOAT_REG:
6131 /* TODO: The function should depend on current function ABI but
6132 builtins.c would need updating then. Therefore we use the
6134 if (TARGET_64BIT && ix86_abi == MS_ABI)
6136 return TARGET_FLOAT_RETURNS_IN_80387;
6142 if (TARGET_MACHO || TARGET_64BIT)
6150 /* Define how to find the value returned by a function.
6151 VALTYPE is the data type of the value (as a tree).
6152 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6153 otherwise, FUNC is 0. */
6156 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6157 const_tree fntype, const_tree fn)
6161 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6162 we normally prevent this case when mmx is not available. However
6163 some ABIs may require the result to be returned like DImode. */
6164 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6165 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6167 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6168 we prevent this case when sse is not available. However some ABIs
6169 may require the result to be returned like integer TImode. */
6170 else if (mode == TImode
6171 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6172 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6174 /* 32-byte vector modes in %ymm0. */
6175 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6176 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6178 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6179 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6180 regno = FIRST_FLOAT_REG;
6182 /* Most things go in %eax. */
6185 /* Override FP return register with %xmm0 for local functions when
6186 SSE math is enabled or for functions with sseregparm attribute. */
6187 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6189 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6190 if ((sse_level >= 1 && mode == SFmode)
6191 || (sse_level == 2 && mode == DFmode))
6192 regno = FIRST_SSE_REG;
6195 /* OImode shouldn't be used directly. */
6196 gcc_assert (mode != OImode);
6198 return gen_rtx_REG (orig_mode, regno);
6202 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6207 /* Handle libcalls, which don't provide a type node. */
6208 if (valtype == NULL)
6220 return gen_rtx_REG (mode, FIRST_SSE_REG);
6223 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6227 return gen_rtx_REG (mode, AX_REG);
6231 ret = construct_container (mode, orig_mode, valtype, 1,
6232 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6233 x86_64_int_return_registers, 0);
6235 /* For zero sized structures, construct_container returns NULL, but we
6236 need to keep rest of compiler happy by returning meaningful value. */
6238 ret = gen_rtx_REG (orig_mode, AX_REG);
6244 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6246 unsigned int regno = AX_REG;
6250 switch (GET_MODE_SIZE (mode))
6253 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6254 && !COMPLEX_MODE_P (mode))
6255 regno = FIRST_SSE_REG;
6259 if (mode == SFmode || mode == DFmode)
6260 regno = FIRST_SSE_REG;
6266 return gen_rtx_REG (orig_mode, regno);
6270 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6271 enum machine_mode orig_mode, enum machine_mode mode)
6273 const_tree fn, fntype;
6276 if (fntype_or_decl && DECL_P (fntype_or_decl))
6277 fn = fntype_or_decl;
6278 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6280 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6281 return function_value_ms_64 (orig_mode, mode);
6282 else if (TARGET_64BIT)
6283 return function_value_64 (orig_mode, mode, valtype);
6285 return function_value_32 (orig_mode, mode, fntype, fn);
6289 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6290 bool outgoing ATTRIBUTE_UNUSED)
6292 enum machine_mode mode, orig_mode;
6294 orig_mode = TYPE_MODE (valtype);
6295 mode = type_natural_mode (valtype, NULL);
6296 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6300 ix86_libcall_value (enum machine_mode mode)
6302 return ix86_function_value_1 (NULL, NULL, mode, mode);
6305 /* Return true iff type is returned in memory. */
6307 static int ATTRIBUTE_UNUSED
6308 return_in_memory_32 (const_tree type, enum machine_mode mode)
6312 if (mode == BLKmode)
6315 size = int_size_in_bytes (type);
6317 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6320 if (VECTOR_MODE_P (mode) || mode == TImode)
6322 /* User-created vectors small enough to fit in EAX. */
6326 /* MMX/3dNow values are returned in MM0,
6327 except when it doesn't exits. */
6329 return (TARGET_MMX ? 0 : 1);
6331 /* SSE values are returned in XMM0, except when it doesn't exist. */
6333 return (TARGET_SSE ? 0 : 1);
6335 /* AVX values are returned in YMM0, except when it doesn't exist. */
6337 return TARGET_AVX ? 0 : 1;
6346 /* OImode shouldn't be used directly. */
6347 gcc_assert (mode != OImode);
6352 static int ATTRIBUTE_UNUSED
6353 return_in_memory_64 (const_tree type, enum machine_mode mode)
6355 int needed_intregs, needed_sseregs;
6356 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6359 static int ATTRIBUTE_UNUSED
6360 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6362 HOST_WIDE_INT size = int_size_in_bytes (type);
6364 /* __m128 is returned in xmm0. */
6365 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6366 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6369 /* Otherwise, the size must be exactly in [1248]. */
6370 return (size != 1 && size != 2 && size != 4 && size != 8);
6374 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6376 #ifdef SUBTARGET_RETURN_IN_MEMORY
6377 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6379 const enum machine_mode mode = type_natural_mode (type, NULL);
6383 if (ix86_function_type_abi (fntype) == MS_ABI)
6384 return return_in_memory_ms_64 (type, mode);
6386 return return_in_memory_64 (type, mode);
6389 return return_in_memory_32 (type, mode);
6393 /* Return false iff TYPE is returned in memory. This version is used
6394 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6395 but differs notably in that when MMX is available, 8-byte vectors
6396 are returned in memory, rather than in MMX registers. */
6399 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6402 enum machine_mode mode = type_natural_mode (type, NULL);
6405 return return_in_memory_64 (type, mode);
6407 if (mode == BLKmode)
6410 size = int_size_in_bytes (type);
6412 if (VECTOR_MODE_P (mode))
6414 /* Return in memory only if MMX registers *are* available. This
6415 seems backwards, but it is consistent with the existing
6422 else if (mode == TImode)
6424 else if (mode == XFmode)
6430 /* When returning SSE vector types, we have a choice of either
6431 (1) being abi incompatible with a -march switch, or
6432 (2) generating an error.
6433 Given no good solution, I think the safest thing is one warning.
6434 The user won't be able to use -Werror, but....
6436 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6437 called in response to actually generating a caller or callee that
6438 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6439 via aggregate_value_p for general type probing from tree-ssa. */
6442 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6444 static bool warnedsse, warnedmmx;
6446 if (!TARGET_64BIT && type)
6448 /* Look at the return type of the function, not the function type. */
6449 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6451 if (!TARGET_SSE && !warnedsse)
6454 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6457 warning (0, "SSE vector return without SSE enabled "
6462 if (!TARGET_MMX && !warnedmmx)
6464 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6467 warning (0, "MMX vector return without MMX enabled "
6477 /* Create the va_list data type. */
6479 /* Returns the calling convention specific va_list date type.
6480 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6483 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6485 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6487 /* For i386 we use plain pointer to argument area. */
6488 if (!TARGET_64BIT || abi == MS_ABI)
6489 return build_pointer_type (char_type_node);
6491 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6492 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6494 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6495 unsigned_type_node);
6496 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6497 unsigned_type_node);
6498 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6500 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6503 va_list_gpr_counter_field = f_gpr;
6504 va_list_fpr_counter_field = f_fpr;
6506 DECL_FIELD_CONTEXT (f_gpr) = record;
6507 DECL_FIELD_CONTEXT (f_fpr) = record;
6508 DECL_FIELD_CONTEXT (f_ovf) = record;
6509 DECL_FIELD_CONTEXT (f_sav) = record;
6511 TREE_CHAIN (record) = type_decl;
6512 TYPE_NAME (record) = type_decl;
6513 TYPE_FIELDS (record) = f_gpr;
6514 TREE_CHAIN (f_gpr) = f_fpr;
6515 TREE_CHAIN (f_fpr) = f_ovf;
6516 TREE_CHAIN (f_ovf) = f_sav;
6518 layout_type (record);
6520 /* The correct type is an array type of one element. */
6521 return build_array_type (record, build_index_type (size_zero_node));
6524 /* Setup the builtin va_list data type and for 64-bit the additional
6525 calling convention specific va_list data types. */
6528 ix86_build_builtin_va_list (void)
6530 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6532 /* Initialize abi specific va_list builtin types. */
6536 if (ix86_abi == MS_ABI)
6538 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6539 if (TREE_CODE (t) != RECORD_TYPE)
6540 t = build_variant_type_copy (t);
6541 sysv_va_list_type_node = t;
6546 if (TREE_CODE (t) != RECORD_TYPE)
6547 t = build_variant_type_copy (t);
6548 sysv_va_list_type_node = t;
6550 if (ix86_abi != MS_ABI)
6552 t = ix86_build_builtin_va_list_abi (MS_ABI);
6553 if (TREE_CODE (t) != RECORD_TYPE)
6554 t = build_variant_type_copy (t);
6555 ms_va_list_type_node = t;
6560 if (TREE_CODE (t) != RECORD_TYPE)
6561 t = build_variant_type_copy (t);
6562 ms_va_list_type_node = t;
6569 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6572 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6581 int regparm = ix86_regparm;
6583 if (cum->call_abi != ix86_abi)
6584 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6586 /* GPR size of varargs save area. */
6587 if (cfun->va_list_gpr_size)
6588 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6590 ix86_varargs_gpr_size = 0;
6592 /* FPR size of varargs save area. We don't need it if we don't pass
6593 anything in SSE registers. */
6594 if (cum->sse_nregs && cfun->va_list_fpr_size)
6595 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6597 ix86_varargs_fpr_size = 0;
6599 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6602 save_area = frame_pointer_rtx;
6603 set = get_varargs_alias_set ();
6605 for (i = cum->regno;
6607 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6610 mem = gen_rtx_MEM (Pmode,
6611 plus_constant (save_area, i * UNITS_PER_WORD));
6612 MEM_NOTRAP_P (mem) = 1;
6613 set_mem_alias_set (mem, set);
6614 emit_move_insn (mem, gen_rtx_REG (Pmode,
6615 x86_64_int_parameter_registers[i]));
6618 if (ix86_varargs_fpr_size)
6620 /* Now emit code to save SSE registers. The AX parameter contains number
6621 of SSE parameter registers used to call this function. We use
6622 sse_prologue_save insn template that produces computed jump across
6623 SSE saves. We need some preparation work to get this working. */
6625 label = gen_label_rtx ();
6626 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6628 /* Compute address to jump to :
6629 label - eax*4 + nnamed_sse_arguments*4 Or
6630 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6631 tmp_reg = gen_reg_rtx (Pmode);
6632 nsse_reg = gen_reg_rtx (Pmode);
6633 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6634 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6635 gen_rtx_MULT (Pmode, nsse_reg,
6638 /* vmovaps is one byte longer than movaps. */
6640 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6641 gen_rtx_PLUS (Pmode, tmp_reg,
6647 gen_rtx_CONST (DImode,
6648 gen_rtx_PLUS (DImode,
6650 GEN_INT (cum->sse_regno
6651 * (TARGET_AVX ? 5 : 4)))));
6653 emit_move_insn (nsse_reg, label_ref);
6654 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6656 /* Compute address of memory block we save into. We always use pointer
6657 pointing 127 bytes after first byte to store - this is needed to keep
6658 instruction size limited by 4 bytes (5 bytes for AVX) with one
6659 byte displacement. */
6660 tmp_reg = gen_reg_rtx (Pmode);
6661 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6662 plus_constant (save_area,
6663 ix86_varargs_gpr_size + 127)));
6664 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6665 MEM_NOTRAP_P (mem) = 1;
6666 set_mem_alias_set (mem, set);
6667 set_mem_align (mem, BITS_PER_WORD);
6669 /* And finally do the dirty job! */
6670 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6671 GEN_INT (cum->sse_regno), label));
6676 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6678 alias_set_type set = get_varargs_alias_set ();
6681 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6685 mem = gen_rtx_MEM (Pmode,
6686 plus_constant (virtual_incoming_args_rtx,
6687 i * UNITS_PER_WORD));
6688 MEM_NOTRAP_P (mem) = 1;
6689 set_mem_alias_set (mem, set);
6691 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6692 emit_move_insn (mem, reg);
6697 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6698 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6701 CUMULATIVE_ARGS next_cum;
6704 /* This argument doesn't appear to be used anymore. Which is good,
6705 because the old code here didn't suppress rtl generation. */
6706 gcc_assert (!no_rtl);
6711 fntype = TREE_TYPE (current_function_decl);
6713 /* For varargs, we do not want to skip the dummy va_dcl argument.
6714 For stdargs, we do want to skip the last named argument. */
6716 if (stdarg_p (fntype))
6717 function_arg_advance (&next_cum, mode, type, 1);
6719 if (cum->call_abi == MS_ABI)
6720 setup_incoming_varargs_ms_64 (&next_cum);
6722 setup_incoming_varargs_64 (&next_cum);
6725 /* Checks if TYPE is of kind va_list char *. */
6728 is_va_list_char_pointer (tree type)
6732 /* For 32-bit it is always true. */
6735 canonic = ix86_canonical_va_list_type (type);
6736 return (canonic == ms_va_list_type_node
6737 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6740 /* Implement va_start. */
6743 ix86_va_start (tree valist, rtx nextarg)
6745 HOST_WIDE_INT words, n_gpr, n_fpr;
6746 tree f_gpr, f_fpr, f_ovf, f_sav;
6747 tree gpr, fpr, ovf, sav, t;
6750 /* Only 64bit target needs something special. */
6751 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6753 std_expand_builtin_va_start (valist, nextarg);
6757 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6758 f_fpr = TREE_CHAIN (f_gpr);
6759 f_ovf = TREE_CHAIN (f_fpr);
6760 f_sav = TREE_CHAIN (f_ovf);
6762 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6763 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6764 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6765 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6766 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6768 /* Count number of gp and fp argument registers used. */
6769 words = crtl->args.info.words;
6770 n_gpr = crtl->args.info.regno;
6771 n_fpr = crtl->args.info.sse_regno;
6773 if (cfun->va_list_gpr_size)
6775 type = TREE_TYPE (gpr);
6776 t = build2 (MODIFY_EXPR, type,
6777 gpr, build_int_cst (type, n_gpr * 8));
6778 TREE_SIDE_EFFECTS (t) = 1;
6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6782 if (TARGET_SSE && cfun->va_list_fpr_size)
6784 type = TREE_TYPE (fpr);
6785 t = build2 (MODIFY_EXPR, type, fpr,
6786 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6787 TREE_SIDE_EFFECTS (t) = 1;
6788 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6791 /* Find the overflow area. */
6792 type = TREE_TYPE (ovf);
6793 t = make_tree (type, crtl->args.internal_arg_pointer);
6795 t = build2 (POINTER_PLUS_EXPR, type, t,
6796 size_int (words * UNITS_PER_WORD));
6797 t = build2 (MODIFY_EXPR, type, ovf, t);
6798 TREE_SIDE_EFFECTS (t) = 1;
6799 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6801 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6803 /* Find the register save area.
6804 Prologue of the function save it right above stack frame. */
6805 type = TREE_TYPE (sav);
6806 t = make_tree (type, frame_pointer_rtx);
6807 if (!ix86_varargs_gpr_size)
6808 t = build2 (POINTER_PLUS_EXPR, type, t,
6809 size_int (-8 * X86_64_REGPARM_MAX));
6810 t = build2 (MODIFY_EXPR, type, sav, t);
6811 TREE_SIDE_EFFECTS (t) = 1;
6812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6816 /* Implement va_arg. */
6819 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6822 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6823 tree f_gpr, f_fpr, f_ovf, f_sav;
6824 tree gpr, fpr, ovf, sav, t;
6826 tree lab_false, lab_over = NULL_TREE;
6831 enum machine_mode nat_mode;
6834 /* Only 64bit target needs something special. */
6835 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6836 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6838 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6839 f_fpr = TREE_CHAIN (f_gpr);
6840 f_ovf = TREE_CHAIN (f_fpr);
6841 f_sav = TREE_CHAIN (f_ovf);
6843 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6844 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6845 valist = build_va_arg_indirect_ref (valist);
6846 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6847 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6848 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6850 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6852 type = build_pointer_type (type);
6853 size = int_size_in_bytes (type);
6854 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6856 nat_mode = type_natural_mode (type, NULL);
6865 /* Unnamed 256bit vector mode parameters are passed on stack. */
6866 if (ix86_cfun_abi () == SYSV_ABI)
6873 container = construct_container (nat_mode, TYPE_MODE (type),
6874 type, 0, X86_64_REGPARM_MAX,
6875 X86_64_SSE_REGPARM_MAX, intreg,
6880 /* Pull the value out of the saved registers. */
6882 addr = create_tmp_var (ptr_type_node, "addr");
6883 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6887 int needed_intregs, needed_sseregs;
6889 tree int_addr, sse_addr;
6891 lab_false = create_artificial_label ();
6892 lab_over = create_artificial_label ();
6894 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6896 need_temp = (!REG_P (container)
6897 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6898 || TYPE_ALIGN (type) > 128));
6900 /* In case we are passing structure, verify that it is consecutive block
6901 on the register save area. If not we need to do moves. */
6902 if (!need_temp && !REG_P (container))
6904 /* Verify that all registers are strictly consecutive */
6905 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6911 rtx slot = XVECEXP (container, 0, i);
6912 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6913 || INTVAL (XEXP (slot, 1)) != i * 16)
6921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6923 rtx slot = XVECEXP (container, 0, i);
6924 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6925 || INTVAL (XEXP (slot, 1)) != i * 8)
6937 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6938 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6939 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6940 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6943 /* First ensure that we fit completely in registers. */
6946 t = build_int_cst (TREE_TYPE (gpr),
6947 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6948 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6949 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6950 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6951 gimplify_and_add (t, pre_p);
6955 t = build_int_cst (TREE_TYPE (fpr),
6956 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6957 + X86_64_REGPARM_MAX * 8);
6958 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6959 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6960 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6961 gimplify_and_add (t, pre_p);
6964 /* Compute index to start of area used for integer regs. */
6967 /* int_addr = gpr + sav; */
6968 t = fold_convert (sizetype, gpr);
6969 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6970 gimplify_assign (int_addr, t, pre_p);
6974 /* sse_addr = fpr + sav; */
6975 t = fold_convert (sizetype, fpr);
6976 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6977 gimplify_assign (sse_addr, t, pre_p);
6982 tree temp = create_tmp_var (type, "va_arg_tmp");
6985 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6986 gimplify_assign (addr, t, pre_p);
6988 for (i = 0; i < XVECLEN (container, 0); i++)
6990 rtx slot = XVECEXP (container, 0, i);
6991 rtx reg = XEXP (slot, 0);
6992 enum machine_mode mode = GET_MODE (reg);
6993 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6994 tree addr_type = build_pointer_type (piece_type);
6995 tree daddr_type = build_pointer_type_for_mode (piece_type,
6999 tree dest_addr, dest;
7001 if (SSE_REGNO_P (REGNO (reg)))
7003 src_addr = sse_addr;
7004 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7008 src_addr = int_addr;
7009 src_offset = REGNO (reg) * 8;
7011 src_addr = fold_convert (addr_type, src_addr);
7012 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7013 size_int (src_offset));
7014 src = build_va_arg_indirect_ref (src_addr);
7016 dest_addr = fold_convert (daddr_type, addr);
7017 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7018 size_int (INTVAL (XEXP (slot, 1))));
7019 dest = build_va_arg_indirect_ref (dest_addr);
7021 gimplify_assign (dest, src, pre_p);
7027 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7028 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7029 gimplify_assign (gpr, t, pre_p);
7034 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7035 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7036 gimplify_assign (fpr, t, pre_p);
7039 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7041 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7044 /* ... otherwise out of the overflow area. */
7046 /* When we align parameter on stack for caller, if the parameter
7047 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7048 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7049 here with caller. */
7050 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7051 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7052 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7054 /* Care for on-stack alignment if needed. */
7055 if (arg_boundary <= 64
7056 || integer_zerop (TYPE_SIZE (type)))
7060 HOST_WIDE_INT align = arg_boundary / 8;
7061 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7062 size_int (align - 1));
7063 t = fold_convert (sizetype, t);
7064 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7066 t = fold_convert (TREE_TYPE (ovf), t);
7068 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7069 gimplify_assign (addr, t, pre_p);
7071 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7072 size_int (rsize * UNITS_PER_WORD));
7073 gimplify_assign (unshare_expr (ovf), t, pre_p);
7076 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7078 ptrtype = build_pointer_type (type);
7079 addr = fold_convert (ptrtype, addr);
7082 addr = build_va_arg_indirect_ref (addr);
7083 return build_va_arg_indirect_ref (addr);
7086 /* Return nonzero if OPNUM's MEM should be matched
7087 in movabs* patterns. */
7090 ix86_check_movabs (rtx insn, int opnum)
7094 set = PATTERN (insn);
7095 if (GET_CODE (set) == PARALLEL)
7096 set = XVECEXP (set, 0, 0);
7097 gcc_assert (GET_CODE (set) == SET);
7098 mem = XEXP (set, opnum);
7099 while (GET_CODE (mem) == SUBREG)
7100 mem = SUBREG_REG (mem);
7101 gcc_assert (MEM_P (mem));
7102 return (volatile_ok || !MEM_VOLATILE_P (mem));
7105 /* Initialize the table of extra 80387 mathematical constants. */
7108 init_ext_80387_constants (void)
7110 static const char * cst[5] =
7112 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7113 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7114 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7115 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7116 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7120 for (i = 0; i < 5; i++)
7122 real_from_string (&ext_80387_constants_table[i], cst[i]);
7123 /* Ensure each constant is rounded to XFmode precision. */
7124 real_convert (&ext_80387_constants_table[i],
7125 XFmode, &ext_80387_constants_table[i]);
7128 ext_80387_constants_init = 1;
7131 /* Return true if the constant is something that can be loaded with
7132 a special instruction. */
7135 standard_80387_constant_p (rtx x)
7137 enum machine_mode mode = GET_MODE (x);
7141 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7144 if (x == CONST0_RTX (mode))
7146 if (x == CONST1_RTX (mode))
7149 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7151 /* For XFmode constants, try to find a special 80387 instruction when
7152 optimizing for size or on those CPUs that benefit from them. */
7154 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7158 if (! ext_80387_constants_init)
7159 init_ext_80387_constants ();
7161 for (i = 0; i < 5; i++)
7162 if (real_identical (&r, &ext_80387_constants_table[i]))
7166 /* Load of the constant -0.0 or -1.0 will be split as
7167 fldz;fchs or fld1;fchs sequence. */
7168 if (real_isnegzero (&r))
7170 if (real_identical (&r, &dconstm1))
7176 /* Return the opcode of the special instruction to be used to load
7180 standard_80387_constant_opcode (rtx x)
7182 switch (standard_80387_constant_p (x))
7206 /* Return the CONST_DOUBLE representing the 80387 constant that is
7207 loaded by the specified special instruction. The argument IDX
7208 matches the return value from standard_80387_constant_p. */
7211 standard_80387_constant_rtx (int idx)
7215 if (! ext_80387_constants_init)
7216 init_ext_80387_constants ();
7232 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7236 /* Return 1 if mode is a valid mode for sse. */
7238 standard_sse_mode_p (enum machine_mode mode)
7255 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7256 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7257 modes and AVX is enabled. */
7260 standard_sse_constant_p (rtx x)
7262 enum machine_mode mode = GET_MODE (x);
7264 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7266 if (vector_all_ones_operand (x, mode))
7268 if (standard_sse_mode_p (mode))
7269 return TARGET_SSE2 ? 2 : -2;
7270 else if (VALID_AVX256_REG_MODE (mode))
7271 return TARGET_AVX ? 3 : -3;
7277 /* Return the opcode of the special instruction to be used to load
7281 standard_sse_constant_opcode (rtx insn, rtx x)
7283 switch (standard_sse_constant_p (x))
7286 switch (get_attr_mode (insn))
7289 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7291 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7293 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7295 return "vxorps\t%x0, %x0, %x0";
7297 return "vxorpd\t%x0, %x0, %x0";
7299 return "vpxor\t%x0, %x0, %x0";
7305 switch (get_attr_mode (insn))
7310 return "vpcmpeqd\t%0, %0, %0";
7316 return "pcmpeqd\t%0, %0";
7321 /* Returns 1 if OP contains a symbol reference */
7324 symbolic_reference_mentioned_p (rtx op)
7329 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7332 fmt = GET_RTX_FORMAT (GET_CODE (op));
7333 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7339 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7340 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7344 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7351 /* Return 1 if it is appropriate to emit `ret' instructions in the
7352 body of a function. Do this only if the epilogue is simple, needing a
7353 couple of insns. Prior to reloading, we can't tell how many registers
7354 must be saved, so return 0 then. Return 0 if there is no frame
7355 marker to de-allocate. */
7358 ix86_can_use_return_insn_p (void)
7360 struct ix86_frame frame;
7362 if (! reload_completed || frame_pointer_needed)
7365 /* Don't allow more than 32 pop, since that's all we can do
7366 with one instruction. */
7367 if (crtl->args.pops_args
7368 && crtl->args.size >= 32768)
7371 ix86_compute_frame_layout (&frame);
7372 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7375 /* Value should be nonzero if functions must have frame pointers.
7376 Zero means the frame pointer need not be set up (and parms may
7377 be accessed via the stack pointer) in functions that seem suitable. */
7380 ix86_frame_pointer_required (void)
7382 /* If we accessed previous frames, then the generated code expects
7383 to be able to access the saved ebp value in our frame. */
7384 if (cfun->machine->accesses_prev_frame)
7387 /* Several x86 os'es need a frame pointer for other reasons,
7388 usually pertaining to setjmp. */
7389 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7392 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7393 the frame pointer by default. Turn it back on now if we've not
7394 got a leaf function. */
7395 if (TARGET_OMIT_LEAF_FRAME_POINTER
7396 && (!current_function_is_leaf
7397 || ix86_current_function_calls_tls_descriptor))
7406 /* Record that the current function accesses previous call frames. */
7409 ix86_setup_frame_addresses (void)
7411 cfun->machine->accesses_prev_frame = 1;
7414 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7415 # define USE_HIDDEN_LINKONCE 1
7417 # define USE_HIDDEN_LINKONCE 0
7420 static int pic_labels_used;
7422 /* Fills in the label name that should be used for a pc thunk for
7423 the given register. */
7426 get_pc_thunk_name (char name[32], unsigned int regno)
7428 gcc_assert (!TARGET_64BIT);
7430 if (USE_HIDDEN_LINKONCE)
7431 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7433 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7437 /* This function generates code for -fpic that loads %ebx with
7438 the return address of the caller and then returns. */
7441 ix86_file_end (void)
7446 for (regno = 0; regno < 8; ++regno)
7450 if (! ((pic_labels_used >> regno) & 1))
7453 get_pc_thunk_name (name, regno);
7458 switch_to_section (darwin_sections[text_coal_section]);
7459 fputs ("\t.weak_definition\t", asm_out_file);
7460 assemble_name (asm_out_file, name);
7461 fputs ("\n\t.private_extern\t", asm_out_file);
7462 assemble_name (asm_out_file, name);
7463 fputs ("\n", asm_out_file);
7464 ASM_OUTPUT_LABEL (asm_out_file, name);
7468 if (USE_HIDDEN_LINKONCE)
7472 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7474 TREE_PUBLIC (decl) = 1;
7475 TREE_STATIC (decl) = 1;
7476 DECL_ONE_ONLY (decl) = 1;
7478 (*targetm.asm_out.unique_section) (decl, 0);
7479 switch_to_section (get_named_section (decl, NULL, 0));
7481 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7482 fputs ("\t.hidden\t", asm_out_file);
7483 assemble_name (asm_out_file, name);
7484 fputc ('\n', asm_out_file);
7485 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7489 switch_to_section (text_section);
7490 ASM_OUTPUT_LABEL (asm_out_file, name);
7493 xops[0] = gen_rtx_REG (Pmode, regno);
7494 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7495 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7496 output_asm_insn ("ret", xops);
7499 if (NEED_INDICATE_EXEC_STACK)
7500 file_end_indicate_exec_stack ();
7503 /* Emit code for the SET_GOT patterns. */
7506 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7512 if (TARGET_VXWORKS_RTP && flag_pic)
7514 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7515 xops[2] = gen_rtx_MEM (Pmode,
7516 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7517 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7519 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7520 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7521 an unadorned address. */
7522 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7523 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7524 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7528 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7530 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7532 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7535 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7537 output_asm_insn ("call\t%a2", xops);
7540 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7541 is what will be referenced by the Mach-O PIC subsystem. */
7543 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7546 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7547 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7550 output_asm_insn ("pop%z0\t%0", xops);
7555 get_pc_thunk_name (name, REGNO (dest));
7556 pic_labels_used |= 1 << REGNO (dest);
7558 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7559 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7560 output_asm_insn ("call\t%X2", xops);
7561 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7562 is what will be referenced by the Mach-O PIC subsystem. */
7565 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7567 targetm.asm_out.internal_label (asm_out_file, "L",
7568 CODE_LABEL_NUMBER (label));
7575 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7576 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7578 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7583 /* Generate an "push" pattern for input ARG. */
7588 return gen_rtx_SET (VOIDmode,
7590 gen_rtx_PRE_DEC (Pmode,
7591 stack_pointer_rtx)),
7595 /* Return >= 0 if there is an unused call-clobbered register available
7596 for the entire function. */
7599 ix86_select_alt_pic_regnum (void)
7601 if (current_function_is_leaf && !crtl->profile
7602 && !ix86_current_function_calls_tls_descriptor)
7605 /* Can't use the same register for both PIC and DRAP. */
7607 drap = REGNO (crtl->drap_reg);
7610 for (i = 2; i >= 0; --i)
7611 if (i != drap && !df_regs_ever_live_p (i))
7615 return INVALID_REGNUM;
7618 /* Return 1 if we need to save REGNO. */
7620 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7622 if (pic_offset_table_rtx
7623 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7624 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7626 || crtl->calls_eh_return
7627 || crtl->uses_const_pool))
7629 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7634 if (crtl->calls_eh_return && maybe_eh_return)
7639 unsigned test = EH_RETURN_DATA_REGNO (i);
7640 if (test == INVALID_REGNUM)
7648 && regno == REGNO (crtl->drap_reg))
7651 return (df_regs_ever_live_p (regno)
7652 && !call_used_regs[regno]
7653 && !fixed_regs[regno]
7654 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7657 /* Return number of saved general prupose registers. */
7660 ix86_nsaved_regs (void)
7665 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7666 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7671 /* Return number of saved SSE registrers. */
7674 ix86_nsaved_sseregs (void)
7679 if (ix86_cfun_abi () != MS_ABI)
7681 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7682 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7687 /* Given FROM and TO register numbers, say whether this elimination is
7688 allowed. If stack alignment is needed, we can only replace argument
7689 pointer with hard frame pointer, or replace frame pointer with stack
7690 pointer. Otherwise, frame pointer elimination is automatically
7691 handled and all other eliminations are valid. */
7694 ix86_can_eliminate (int from, int to)
7696 if (stack_realign_fp)
7697 return ((from == ARG_POINTER_REGNUM
7698 && to == HARD_FRAME_POINTER_REGNUM)
7699 || (from == FRAME_POINTER_REGNUM
7700 && to == STACK_POINTER_REGNUM));
7702 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7705 /* Return the offset between two registers, one to be eliminated, and the other
7706 its replacement, at the start of a routine. */
7709 ix86_initial_elimination_offset (int from, int to)
7711 struct ix86_frame frame;
7712 ix86_compute_frame_layout (&frame);
7714 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7715 return frame.hard_frame_pointer_offset;
7716 else if (from == FRAME_POINTER_REGNUM
7717 && to == HARD_FRAME_POINTER_REGNUM)
7718 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7721 gcc_assert (to == STACK_POINTER_REGNUM);
7723 if (from == ARG_POINTER_REGNUM)
7724 return frame.stack_pointer_offset;
7726 gcc_assert (from == FRAME_POINTER_REGNUM);
7727 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7731 /* In a dynamically-aligned function, we can't know the offset from
7732 stack pointer to frame pointer, so we must ensure that setjmp
7733 eliminates fp against the hard fp (%ebp) rather than trying to
7734 index from %esp up to the top of the frame across a gap that is
7735 of unknown (at compile-time) size. */
7737 ix86_builtin_setjmp_frame_value (void)
7739 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7742 /* Fill structure ix86_frame about frame of currently computed function. */
7745 ix86_compute_frame_layout (struct ix86_frame *frame)
7747 HOST_WIDE_INT total_size;
7748 unsigned int stack_alignment_needed;
7749 HOST_WIDE_INT offset;
7750 unsigned int preferred_alignment;
7751 HOST_WIDE_INT size = get_frame_size ();
7753 frame->nregs = ix86_nsaved_regs ();
7754 frame->nsseregs = ix86_nsaved_sseregs ();
7757 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7758 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7760 /* MS ABI seem to require stack alignment to be always 16 except for function
7762 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7764 preferred_alignment = 16;
7765 stack_alignment_needed = 16;
7766 crtl->preferred_stack_boundary = 128;
7767 crtl->stack_alignment_needed = 128;
7770 gcc_assert (!size || stack_alignment_needed);
7771 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7772 gcc_assert (preferred_alignment <= stack_alignment_needed);
7774 /* During reload iteration the amount of registers saved can change.
7775 Recompute the value as needed. Do not recompute when amount of registers
7776 didn't change as reload does multiple calls to the function and does not
7777 expect the decision to change within single iteration. */
7778 if (!optimize_function_for_size_p (cfun)
7779 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7781 int count = frame->nregs;
7783 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7784 /* The fast prologue uses move instead of push to save registers. This
7785 is significantly longer, but also executes faster as modern hardware
7786 can execute the moves in parallel, but can't do that for push/pop.
7788 Be careful about choosing what prologue to emit: When function takes
7789 many instructions to execute we may use slow version as well as in
7790 case function is known to be outside hot spot (this is known with
7791 feedback only). Weight the size of function by number of registers
7792 to save as it is cheap to use one or two push instructions but very
7793 slow to use many of them. */
7795 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7796 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7797 || (flag_branch_probabilities
7798 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7799 cfun->machine->use_fast_prologue_epilogue = false;
7801 cfun->machine->use_fast_prologue_epilogue
7802 = !expensive_function_p (count);
7804 if (TARGET_PROLOGUE_USING_MOVE
7805 && cfun->machine->use_fast_prologue_epilogue)
7806 frame->save_regs_using_mov = true;
7808 frame->save_regs_using_mov = false;
7811 /* Skip return address and saved base pointer. */
7812 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7814 frame->hard_frame_pointer_offset = offset;
7816 /* Set offset to aligned because the realigned frame starts from
7818 if (stack_realign_fp)
7819 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7821 /* Register save area */
7822 offset += frame->nregs * UNITS_PER_WORD;
7824 /* Align SSE reg save area. */
7825 if (frame->nsseregs)
7826 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7828 frame->padding0 = 0;
7830 /* SSE register save area. */
7831 offset += frame->padding0 + frame->nsseregs * 16;
7834 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7835 offset += frame->va_arg_size;
7837 /* Align start of frame for local function. */
7838 frame->padding1 = ((offset + stack_alignment_needed - 1)
7839 & -stack_alignment_needed) - offset;
7841 offset += frame->padding1;
7843 /* Frame pointer points here. */
7844 frame->frame_pointer_offset = offset;
7848 /* Add outgoing arguments area. Can be skipped if we eliminated
7849 all the function calls as dead code.
7850 Skipping is however impossible when function calls alloca. Alloca
7851 expander assumes that last crtl->outgoing_args_size
7852 of stack frame are unused. */
7853 if (ACCUMULATE_OUTGOING_ARGS
7854 && (!current_function_is_leaf || cfun->calls_alloca
7855 || ix86_current_function_calls_tls_descriptor))
7857 offset += crtl->outgoing_args_size;
7858 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7861 frame->outgoing_arguments_size = 0;
7863 /* Align stack boundary. Only needed if we're calling another function
7865 if (!current_function_is_leaf || cfun->calls_alloca
7866 || ix86_current_function_calls_tls_descriptor)
7867 frame->padding2 = ((offset + preferred_alignment - 1)
7868 & -preferred_alignment) - offset;
7870 frame->padding2 = 0;
7872 offset += frame->padding2;
7874 /* We've reached end of stack frame. */
7875 frame->stack_pointer_offset = offset;
7877 /* Size prologue needs to allocate. */
7878 frame->to_allocate =
7879 (size + frame->padding1 + frame->padding2
7880 + frame->outgoing_arguments_size + frame->va_arg_size);
7882 if ((!frame->to_allocate && frame->nregs <= 1)
7883 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7884 frame->save_regs_using_mov = false;
7886 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7887 && current_function_is_leaf
7888 && !ix86_current_function_calls_tls_descriptor)
7890 frame->red_zone_size = frame->to_allocate;
7891 if (frame->save_regs_using_mov)
7892 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7893 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7894 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7897 frame->red_zone_size = 0;
7898 frame->to_allocate -= frame->red_zone_size;
7899 frame->stack_pointer_offset -= frame->red_zone_size;
7901 fprintf (stderr, "\n");
7902 fprintf (stderr, "size: %ld\n", (long)size);
7903 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7904 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7905 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7906 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7907 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7908 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7909 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7910 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7911 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7912 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7913 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7914 (long)frame->hard_frame_pointer_offset);
7915 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7916 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7917 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7918 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7922 /* Emit code to save registers in the prologue. */
7925 ix86_emit_save_regs (void)
7930 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7931 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7933 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7934 RTX_FRAME_RELATED_P (insn) = 1;
7938 /* Emit code to save registers using MOV insns. First register
7939 is restored from POINTER + OFFSET. */
7941 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7946 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7947 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7949 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7951 gen_rtx_REG (Pmode, regno));
7952 RTX_FRAME_RELATED_P (insn) = 1;
7953 offset += UNITS_PER_WORD;
7957 /* Emit code to save registers using MOV insns. First register
7958 is restored from POINTER + OFFSET. */
7960 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7966 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7967 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7969 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7970 set_mem_align (mem, 128);
7971 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7972 RTX_FRAME_RELATED_P (insn) = 1;
7977 /* Expand prologue or epilogue stack adjustment.
7978 The pattern exist to put a dependency on all ebp-based memory accesses.
7979 STYLE should be negative if instructions should be marked as frame related,
7980 zero if %r11 register is live and cannot be freely used and positive
7984 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7989 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7990 else if (x86_64_immediate_operand (offset, DImode))
7991 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7995 /* r11 is used by indirect sibcall return as well, set before the
7996 epilogue and used after the epilogue. ATM indirect sibcall
7997 shouldn't be used together with huge frame sizes in one
7998 function because of the frame_size check in sibcall.c. */
8000 r11 = gen_rtx_REG (DImode, R11_REG);
8001 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8003 RTX_FRAME_RELATED_P (insn) = 1;
8004 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8008 RTX_FRAME_RELATED_P (insn) = 1;
8011 /* Find an available register to be used as dynamic realign argument
8012 pointer regsiter. Such a register will be written in prologue and
8013 used in begin of body, so it must not be
8014 1. parameter passing register.
8016 We reuse static-chain register if it is available. Otherwise, we
8017 use DI for i386 and R13 for x86-64. We chose R13 since it has
8020 Return: the regno of chosen register. */
8023 find_drap_reg (void)
8025 tree decl = cfun->decl;
8029 /* Use R13 for nested function or function need static chain.
8030 Since function with tail call may use any caller-saved
8031 registers in epilogue, DRAP must not use caller-saved
8032 register in such case. */
8033 if ((decl_function_context (decl)
8034 && !DECL_NO_STATIC_CHAIN (decl))
8035 || crtl->tail_call_emit)
8042 /* Use DI for nested function or function need static chain.
8043 Since function with tail call may use any caller-saved
8044 registers in epilogue, DRAP must not use caller-saved
8045 register in such case. */
8046 if ((decl_function_context (decl)
8047 && !DECL_NO_STATIC_CHAIN (decl))
8048 || crtl->tail_call_emit)
8051 /* Reuse static chain register if it isn't used for parameter
8053 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8054 && !lookup_attribute ("fastcall",
8055 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8062 /* Update incoming stack boundary and estimated stack alignment. */
8065 ix86_update_stack_boundary (void)
8067 /* Prefer the one specified at command line. */
8068 ix86_incoming_stack_boundary
8069 = (ix86_user_incoming_stack_boundary
8070 ? ix86_user_incoming_stack_boundary
8071 : ix86_default_incoming_stack_boundary);
8073 /* Incoming stack alignment can be changed on individual functions
8074 via force_align_arg_pointer attribute. We use the smallest
8075 incoming stack boundary. */
8076 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8077 && lookup_attribute (ix86_force_align_arg_pointer_string,
8078 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8079 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8081 /* The incoming stack frame has to be aligned at least at
8082 parm_stack_boundary. */
8083 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8084 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8086 /* Stack at entrance of main is aligned by runtime. We use the
8087 smallest incoming stack boundary. */
8088 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8089 && DECL_NAME (current_function_decl)
8090 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8091 && DECL_FILE_SCOPE_P (current_function_decl))
8092 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8094 /* x86_64 vararg needs 16byte stack alignment for register save
8098 && crtl->stack_alignment_estimated < 128)
8099 crtl->stack_alignment_estimated = 128;
8102 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8103 needed or an rtx for DRAP otherwise. */
8106 ix86_get_drap_rtx (void)
8108 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8109 crtl->need_drap = true;
8111 if (stack_realign_drap)
8113 /* Assign DRAP to vDRAP and returns vDRAP */
8114 unsigned int regno = find_drap_reg ();
8119 arg_ptr = gen_rtx_REG (Pmode, regno);
8120 crtl->drap_reg = arg_ptr;
8123 drap_vreg = copy_to_reg (arg_ptr);
8127 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8128 RTX_FRAME_RELATED_P (insn) = 1;
8135 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8138 ix86_internal_arg_pointer (void)
8140 return virtual_incoming_args_rtx;
8143 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8144 This is called from dwarf2out.c to emit call frame instructions
8145 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8147 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8149 rtx unspec = SET_SRC (pattern);
8150 gcc_assert (GET_CODE (unspec) == UNSPEC);
8154 case UNSPEC_REG_SAVE:
8155 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8156 SET_DEST (pattern));
8158 case UNSPEC_DEF_CFA:
8159 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8160 INTVAL (XVECEXP (unspec, 0, 0)));
8167 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8168 to be generated in correct form. */
8170 ix86_finalize_stack_realign_flags (void)
8172 /* Check if stack realign is really needed after reload, and
8173 stores result in cfun */
8174 unsigned int incoming_stack_boundary
8175 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8176 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8177 unsigned int stack_realign = (incoming_stack_boundary
8178 < (current_function_is_leaf
8179 ? crtl->max_used_stack_slot_alignment
8180 : crtl->stack_alignment_needed));
8182 if (crtl->stack_realign_finalized)
8184 /* After stack_realign_needed is finalized, we can't no longer
8186 gcc_assert (crtl->stack_realign_needed == stack_realign);
8190 crtl->stack_realign_needed = stack_realign;
8191 crtl->stack_realign_finalized = true;
8195 /* Expand the prologue into a bunch of separate insns. */
8198 ix86_expand_prologue (void)
8202 struct ix86_frame frame;
8203 HOST_WIDE_INT allocate;
8205 ix86_finalize_stack_realign_flags ();
8207 /* DRAP should not coexist with stack_realign_fp */
8208 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8210 ix86_compute_frame_layout (&frame);
8212 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8213 of DRAP is needed and stack realignment is really needed after reload */
8214 if (crtl->drap_reg && crtl->stack_realign_needed)
8217 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8218 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8219 ? 0 : UNITS_PER_WORD);
8221 gcc_assert (stack_realign_drap);
8223 /* Grab the argument pointer. */
8224 x = plus_constant (stack_pointer_rtx,
8225 (UNITS_PER_WORD + param_ptr_offset));
8228 /* Only need to push parameter pointer reg if it is caller
8230 if (!call_used_regs[REGNO (crtl->drap_reg)])
8232 /* Push arg pointer reg */
8233 insn = emit_insn (gen_push (y));
8234 RTX_FRAME_RELATED_P (insn) = 1;
8237 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8238 RTX_FRAME_RELATED_P (insn) = 1;
8240 /* Align the stack. */
8241 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8243 GEN_INT (-align_bytes)));
8244 RTX_FRAME_RELATED_P (insn) = 1;
8246 /* Replicate the return address on the stack so that return
8247 address can be reached via (argp - 1) slot. This is needed
8248 to implement macro RETURN_ADDR_RTX and intrinsic function
8249 expand_builtin_return_addr etc. */
8251 x = gen_frame_mem (Pmode,
8252 plus_constant (x, -UNITS_PER_WORD));
8253 insn = emit_insn (gen_push (x));
8254 RTX_FRAME_RELATED_P (insn) = 1;
8257 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8258 slower on all targets. Also sdb doesn't like it. */
8260 if (frame_pointer_needed)
8262 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8263 RTX_FRAME_RELATED_P (insn) = 1;
8265 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8266 RTX_FRAME_RELATED_P (insn) = 1;
8269 if (stack_realign_fp)
8271 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8272 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8274 /* Align the stack. */
8275 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8277 GEN_INT (-align_bytes)));
8278 RTX_FRAME_RELATED_P (insn) = 1;
8281 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8283 if (!frame.save_regs_using_mov)
8284 ix86_emit_save_regs ();
8286 allocate += frame.nregs * UNITS_PER_WORD;
8288 /* When using red zone we may start register saving before allocating
8289 the stack frame saving one cycle of the prologue. However I will
8290 avoid doing this if I am going to have to probe the stack since
8291 at least on x86_64 the stack probe can turn into a call that clobbers
8292 a red zone location */
8293 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8294 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8295 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8296 && !crtl->stack_realign_needed)
8297 ? hard_frame_pointer_rtx
8298 : stack_pointer_rtx,
8299 -frame.nregs * UNITS_PER_WORD);
8303 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8305 GEN_INT (-allocate), -1);
8308 /* Only valid for Win32. */
8309 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8313 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8315 if (cfun->machine->call_abi == MS_ABI)
8318 eax_live = ix86_eax_live_at_start_p ();
8322 emit_insn (gen_push (eax));
8323 allocate -= UNITS_PER_WORD;
8326 emit_move_insn (eax, GEN_INT (allocate));
8329 insn = gen_allocate_stack_worker_64 (eax, eax);
8331 insn = gen_allocate_stack_worker_32 (eax, eax);
8332 insn = emit_insn (insn);
8333 RTX_FRAME_RELATED_P (insn) = 1;
8334 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8335 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8336 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8337 t, REG_NOTES (insn));
8341 if (frame_pointer_needed)
8342 t = plus_constant (hard_frame_pointer_rtx,
8345 - frame.nregs * UNITS_PER_WORD);
8347 t = plus_constant (stack_pointer_rtx, allocate);
8348 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8352 if (frame.save_regs_using_mov
8353 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8354 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8356 if (!frame_pointer_needed
8357 || !frame.to_allocate
8358 || crtl->stack_realign_needed)
8359 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8361 + frame.nsseregs * 16 + frame.padding0);
8363 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8364 -frame.nregs * UNITS_PER_WORD);
8366 if (!frame_pointer_needed
8367 || !frame.to_allocate
8368 || crtl->stack_realign_needed)
8369 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8372 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8373 - frame.nregs * UNITS_PER_WORD
8374 - frame.nsseregs * 16
8377 pic_reg_used = false;
8378 if (pic_offset_table_rtx
8379 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8382 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8384 if (alt_pic_reg_used != INVALID_REGNUM)
8385 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8387 pic_reg_used = true;
8394 if (ix86_cmodel == CM_LARGE_PIC)
8396 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8397 rtx label = gen_label_rtx ();
8399 LABEL_PRESERVE_P (label) = 1;
8400 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8401 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8402 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8403 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8404 pic_offset_table_rtx, tmp_reg));
8407 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8410 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8413 /* In the pic_reg_used case, make sure that the got load isn't deleted
8414 when mcount needs it. Blockage to avoid call movement across mcount
8415 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8417 if (crtl->profile && pic_reg_used)
8418 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8420 if (crtl->drap_reg && !crtl->stack_realign_needed)
8422 /* vDRAP is setup but after reload it turns out stack realign
8423 isn't necessary, here we will emit prologue to setup DRAP
8424 without stack realign adjustment */
8425 int drap_bp_offset = UNITS_PER_WORD * 2;
8426 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8427 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8430 /* Prevent instructions from being scheduled into register save push
8431 sequence when access to the redzone area is done through frame pointer.
8432 The offset betweeh the frame pointer and the stack pointer is calculated
8433 relative to the value of the stack pointer at the end of the function
8434 prologue, and moving instructions that access redzone area via frame
8435 pointer inside push sequence violates this assumption. */
8436 if (frame_pointer_needed && frame.red_zone_size)
8437 emit_insn (gen_memory_blockage ());
8439 /* Emit cld instruction if stringops are used in the function. */
8440 if (TARGET_CLD && ix86_current_function_needs_cld)
8441 emit_insn (gen_cld ());
8444 /* Emit code to restore saved registers using MOV insns. First register
8445 is restored from POINTER + OFFSET. */
8447 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8448 int maybe_eh_return)
8451 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8453 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8454 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8456 /* Ensure that adjust_address won't be forced to produce pointer
8457 out of range allowed by x86-64 instruction set. */
8458 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8462 r11 = gen_rtx_REG (DImode, R11_REG);
8463 emit_move_insn (r11, GEN_INT (offset));
8464 emit_insn (gen_adddi3 (r11, r11, pointer));
8465 base_address = gen_rtx_MEM (Pmode, r11);
8468 emit_move_insn (gen_rtx_REG (Pmode, regno),
8469 adjust_address (base_address, Pmode, offset));
8470 offset += UNITS_PER_WORD;
8474 /* Emit code to restore saved registers using MOV insns. First register
8475 is restored from POINTER + OFFSET. */
8477 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8478 int maybe_eh_return)
8481 rtx base_address = gen_rtx_MEM (TImode, pointer);
8484 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8485 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8487 /* Ensure that adjust_address won't be forced to produce pointer
8488 out of range allowed by x86-64 instruction set. */
8489 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8493 r11 = gen_rtx_REG (DImode, R11_REG);
8494 emit_move_insn (r11, GEN_INT (offset));
8495 emit_insn (gen_adddi3 (r11, r11, pointer));
8496 base_address = gen_rtx_MEM (TImode, r11);
8499 mem = adjust_address (base_address, TImode, offset);
8500 set_mem_align (mem, 128);
8501 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8506 /* Restore function stack, frame, and registers. */
8509 ix86_expand_epilogue (int style)
8513 struct ix86_frame frame;
8514 HOST_WIDE_INT offset;
8516 ix86_finalize_stack_realign_flags ();
8518 /* When stack is realigned, SP must be valid. */
8519 sp_valid = (!frame_pointer_needed
8520 || current_function_sp_is_unchanging
8521 || stack_realign_fp);
8523 ix86_compute_frame_layout (&frame);
8525 /* See the comment about red zone and frame
8526 pointer usage in ix86_expand_prologue. */
8527 if (frame_pointer_needed && frame.red_zone_size)
8528 emit_insn (gen_memory_blockage ());
8530 /* Calculate start of saved registers relative to ebp. Special care
8531 must be taken for the normal return case of a function using
8532 eh_return: the eax and edx registers are marked as saved, but not
8533 restored along this path. */
8534 offset = frame.nregs;
8535 if (crtl->calls_eh_return && style != 2)
8537 offset *= -UNITS_PER_WORD;
8538 offset -= frame.nsseregs * 16 + frame.padding0;
8540 /* If we're only restoring one register and sp is not valid then
8541 using a move instruction to restore the register since it's
8542 less work than reloading sp and popping the register.
8544 The default code result in stack adjustment using add/lea instruction,
8545 while this code results in LEAVE instruction (or discrete equivalent),
8546 so it is profitable in some other cases as well. Especially when there
8547 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8548 and there is exactly one register to pop. This heuristic may need some
8549 tuning in future. */
8550 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8551 || (TARGET_EPILOGUE_USING_MOVE
8552 && cfun->machine->use_fast_prologue_epilogue
8553 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8554 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8555 || (frame_pointer_needed && TARGET_USE_LEAVE
8556 && cfun->machine->use_fast_prologue_epilogue
8557 && (frame.nregs + frame.nsseregs) == 1)
8558 || crtl->calls_eh_return)
8560 /* Restore registers. We can use ebp or esp to address the memory
8561 locations. If both are available, default to ebp, since offsets
8562 are known to be small. Only exception is esp pointing directly
8563 to the end of block of saved registers, where we may simplify
8566 If we are realigning stack with bp and sp, regs restore can't
8567 be addressed by bp. sp must be used instead. */
8569 if (!frame_pointer_needed
8570 || (sp_valid && !frame.to_allocate)
8571 || stack_realign_fp)
8573 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8574 frame.to_allocate, style == 2);
8575 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8577 + frame.nsseregs * 16
8578 + frame.padding0, style == 2);
8582 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8583 offset, style == 2);
8584 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8586 + frame.nsseregs * 16
8587 + frame.padding0, style == 2);
8590 /* eh_return epilogues need %ecx added to the stack pointer. */
8593 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8595 /* Stack align doesn't work with eh_return. */
8596 gcc_assert (!crtl->stack_realign_needed);
8598 if (frame_pointer_needed)
8600 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8601 tmp = plus_constant (tmp, UNITS_PER_WORD);
8602 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8604 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8605 emit_move_insn (hard_frame_pointer_rtx, tmp);
8607 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8612 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8613 tmp = plus_constant (tmp, (frame.to_allocate
8614 + frame.nregs * UNITS_PER_WORD
8615 + frame.nsseregs * 16
8617 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8620 else if (!frame_pointer_needed)
8621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8622 GEN_INT (frame.to_allocate
8623 + frame.nregs * UNITS_PER_WORD
8624 + frame.nsseregs * 16
8627 /* If not an i386, mov & pop is faster than "leave". */
8628 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8629 || !cfun->machine->use_fast_prologue_epilogue)
8630 emit_insn ((*ix86_gen_leave) ());
8633 pro_epilogue_adjust_stack (stack_pointer_rtx,
8634 hard_frame_pointer_rtx,
8637 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8642 /* First step is to deallocate the stack frame so that we can
8645 If we realign stack with frame pointer, then stack pointer
8646 won't be able to recover via lea $offset(%bp), %sp, because
8647 there is a padding area between bp and sp for realign.
8648 "add $to_allocate, %sp" must be used instead. */
8651 gcc_assert (frame_pointer_needed);
8652 gcc_assert (!stack_realign_fp);
8653 pro_epilogue_adjust_stack (stack_pointer_rtx,
8654 hard_frame_pointer_rtx,
8655 GEN_INT (offset), style);
8656 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8657 frame.to_allocate, style == 2);
8658 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8659 GEN_INT (frame.nsseregs * 16), style);
8661 else if (frame.to_allocate || frame.nsseregs)
8663 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8666 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8667 GEN_INT (frame.to_allocate
8668 + frame.nsseregs * 16
8669 + frame.padding0), style);
8672 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8673 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8674 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8675 if (frame_pointer_needed)
8677 /* Leave results in shorter dependency chains on CPUs that are
8678 able to grok it fast. */
8679 if (TARGET_USE_LEAVE)
8680 emit_insn ((*ix86_gen_leave) ());
8683 /* For stack realigned really happens, recover stack
8684 pointer to hard frame pointer is a must, if not using
8686 if (stack_realign_fp)
8687 pro_epilogue_adjust_stack (stack_pointer_rtx,
8688 hard_frame_pointer_rtx,
8690 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8695 if (crtl->drap_reg && crtl->stack_realign_needed)
8697 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8698 ? 0 : UNITS_PER_WORD);
8699 gcc_assert (stack_realign_drap);
8700 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8702 GEN_INT (-(UNITS_PER_WORD
8703 + param_ptr_offset))));
8704 if (!call_used_regs[REGNO (crtl->drap_reg)])
8705 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8709 /* Sibcall epilogues don't want a return instruction. */
8713 if (crtl->args.pops_args && crtl->args.size)
8715 rtx popc = GEN_INT (crtl->args.pops_args);
8717 /* i386 can only pop 64K bytes. If asked to pop more, pop
8718 return address, do explicit add, and jump indirectly to the
8721 if (crtl->args.pops_args >= 65536)
8723 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8725 /* There is no "pascal" calling convention in any 64bit ABI. */
8726 gcc_assert (!TARGET_64BIT);
8728 emit_insn (gen_popsi1 (ecx));
8729 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8730 emit_jump_insn (gen_return_indirect_internal (ecx));
8733 emit_jump_insn (gen_return_pop_internal (popc));
8736 emit_jump_insn (gen_return_internal ());
8739 /* Reset from the function's potential modifications. */
8742 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8743 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8745 if (pic_offset_table_rtx)
8746 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8748 /* Mach-O doesn't support labels at the end of objects, so if
8749 it looks like we might want one, insert a NOP. */
8751 rtx insn = get_last_insn ();
8754 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8755 insn = PREV_INSN (insn);
8759 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8760 fputs ("\tnop\n", file);
8766 /* Extract the parts of an RTL expression that is a valid memory address
8767 for an instruction. Return 0 if the structure of the address is
8768 grossly off. Return -1 if the address contains ASHIFT, so it is not
8769 strictly valid, but still used for computing length of lea instruction. */
8772 ix86_decompose_address (rtx addr, struct ix86_address *out)
8774 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8775 rtx base_reg, index_reg;
8776 HOST_WIDE_INT scale = 1;
8777 rtx scale_rtx = NULL_RTX;
8779 enum ix86_address_seg seg = SEG_DEFAULT;
8781 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8783 else if (GET_CODE (addr) == PLUS)
8793 addends[n++] = XEXP (op, 1);
8796 while (GET_CODE (op) == PLUS);
8801 for (i = n; i >= 0; --i)
8804 switch (GET_CODE (op))
8809 index = XEXP (op, 0);
8810 scale_rtx = XEXP (op, 1);
8814 if (XINT (op, 1) == UNSPEC_TP
8815 && TARGET_TLS_DIRECT_SEG_REFS
8816 && seg == SEG_DEFAULT)
8817 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8846 else if (GET_CODE (addr) == MULT)
8848 index = XEXP (addr, 0); /* index*scale */
8849 scale_rtx = XEXP (addr, 1);
8851 else if (GET_CODE (addr) == ASHIFT)
8855 /* We're called for lea too, which implements ashift on occasion. */
8856 index = XEXP (addr, 0);
8857 tmp = XEXP (addr, 1);
8858 if (!CONST_INT_P (tmp))
8860 scale = INTVAL (tmp);
8861 if ((unsigned HOST_WIDE_INT) scale > 3)
8867 disp = addr; /* displacement */
8869 /* Extract the integral value of scale. */
8872 if (!CONST_INT_P (scale_rtx))
8874 scale = INTVAL (scale_rtx);
8877 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8878 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8880 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8881 if (base_reg && index_reg && scale == 1
8882 && (index_reg == arg_pointer_rtx
8883 || index_reg == frame_pointer_rtx
8884 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8887 tmp = base, base = index, index = tmp;
8888 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8891 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8892 if ((base_reg == hard_frame_pointer_rtx
8893 || base_reg == frame_pointer_rtx
8894 || base_reg == arg_pointer_rtx) && !disp)
8897 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8898 Avoid this by transforming to [%esi+0].
8899 Reload calls address legitimization without cfun defined, so we need
8900 to test cfun for being non-NULL. */
8901 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8902 && base_reg && !index_reg && !disp
8904 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8907 /* Special case: encode reg+reg instead of reg*2. */
8908 if (!base && index && scale && scale == 2)
8909 base = index, base_reg = index_reg, scale = 1;
8911 /* Special case: scaling cannot be encoded without base or displacement. */
8912 if (!base && !disp && index && scale != 1)
8924 /* Return cost of the memory address x.
8925 For i386, it is better to use a complex address than let gcc copy
8926 the address into a reg and make a new pseudo. But not if the address
8927 requires to two regs - that would mean more pseudos with longer
8930 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8932 struct ix86_address parts;
8934 int ok = ix86_decompose_address (x, &parts);
8938 if (parts.base && GET_CODE (parts.base) == SUBREG)
8939 parts.base = SUBREG_REG (parts.base);
8940 if (parts.index && GET_CODE (parts.index) == SUBREG)
8941 parts.index = SUBREG_REG (parts.index);
8943 /* Attempt to minimize number of registers in the address. */
8945 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8947 && (!REG_P (parts.index)
8948 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8952 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8954 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8955 && parts.base != parts.index)
8958 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8959 since it's predecode logic can't detect the length of instructions
8960 and it degenerates to vector decoded. Increase cost of such
8961 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8962 to split such addresses or even refuse such addresses at all.
8964 Following addressing modes are affected:
8969 The first and last case may be avoidable by explicitly coding the zero in
8970 memory address, but I don't have AMD-K6 machine handy to check this
8974 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8975 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8976 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8983 this is used for to form addresses to local data when -fPIC is in
8987 darwin_local_data_pic (rtx disp)
8989 return (GET_CODE (disp) == UNSPEC
8990 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8993 /* Determine if a given RTX is a valid constant. We already know this
8994 satisfies CONSTANT_P. */
8997 legitimate_constant_p (rtx x)
8999 switch (GET_CODE (x))
9004 if (GET_CODE (x) == PLUS)
9006 if (!CONST_INT_P (XEXP (x, 1)))
9011 if (TARGET_MACHO && darwin_local_data_pic (x))
9014 /* Only some unspecs are valid as "constants". */
9015 if (GET_CODE (x) == UNSPEC)
9016 switch (XINT (x, 1))
9021 return TARGET_64BIT;
9024 x = XVECEXP (x, 0, 0);
9025 return (GET_CODE (x) == SYMBOL_REF
9026 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9028 x = XVECEXP (x, 0, 0);
9029 return (GET_CODE (x) == SYMBOL_REF
9030 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9035 /* We must have drilled down to a symbol. */
9036 if (GET_CODE (x) == LABEL_REF)
9038 if (GET_CODE (x) != SYMBOL_REF)
9043 /* TLS symbols are never valid. */
9044 if (SYMBOL_REF_TLS_MODEL (x))
9047 /* DLLIMPORT symbols are never valid. */
9048 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9049 && SYMBOL_REF_DLLIMPORT_P (x))
9054 if (GET_MODE (x) == TImode
9055 && x != CONST0_RTX (TImode)
9061 if (!standard_sse_constant_p (x))
9068 /* Otherwise we handle everything else in the move patterns. */
9072 /* Determine if it's legal to put X into the constant pool. This
9073 is not possible for the address of thread-local symbols, which
9074 is checked above. */
9077 ix86_cannot_force_const_mem (rtx x)
9079 /* We can always put integral constants and vectors in memory. */
9080 switch (GET_CODE (x))
9090 return !legitimate_constant_p (x);
9093 /* Determine if a given RTX is a valid constant address. */
9096 constant_address_p (rtx x)
9098 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9101 /* Nonzero if the constant value X is a legitimate general operand
9102 when generating PIC code. It is given that flag_pic is on and
9103 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9106 legitimate_pic_operand_p (rtx x)
9110 switch (GET_CODE (x))
9113 inner = XEXP (x, 0);
9114 if (GET_CODE (inner) == PLUS
9115 && CONST_INT_P (XEXP (inner, 1)))
9116 inner = XEXP (inner, 0);
9118 /* Only some unspecs are valid as "constants". */
9119 if (GET_CODE (inner) == UNSPEC)
9120 switch (XINT (inner, 1))
9125 return TARGET_64BIT;
9127 x = XVECEXP (inner, 0, 0);
9128 return (GET_CODE (x) == SYMBOL_REF
9129 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9130 case UNSPEC_MACHOPIC_OFFSET:
9131 return legitimate_pic_address_disp_p (x);
9139 return legitimate_pic_address_disp_p (x);
9146 /* Determine if a given CONST RTX is a valid memory displacement
9150 legitimate_pic_address_disp_p (rtx disp)
9154 /* In 64bit mode we can allow direct addresses of symbols and labels
9155 when they are not dynamic symbols. */
9158 rtx op0 = disp, op1;
9160 switch (GET_CODE (disp))
9166 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9168 op0 = XEXP (XEXP (disp, 0), 0);
9169 op1 = XEXP (XEXP (disp, 0), 1);
9170 if (!CONST_INT_P (op1)
9171 || INTVAL (op1) >= 16*1024*1024
9172 || INTVAL (op1) < -16*1024*1024)
9174 if (GET_CODE (op0) == LABEL_REF)
9176 if (GET_CODE (op0) != SYMBOL_REF)
9181 /* TLS references should always be enclosed in UNSPEC. */
9182 if (SYMBOL_REF_TLS_MODEL (op0))
9184 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9185 && ix86_cmodel != CM_LARGE_PIC)
9193 if (GET_CODE (disp) != CONST)
9195 disp = XEXP (disp, 0);
9199 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9200 of GOT tables. We should not need these anyway. */
9201 if (GET_CODE (disp) != UNSPEC
9202 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9203 && XINT (disp, 1) != UNSPEC_GOTOFF
9204 && XINT (disp, 1) != UNSPEC_PLTOFF))
9207 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9208 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9214 if (GET_CODE (disp) == PLUS)
9216 if (!CONST_INT_P (XEXP (disp, 1)))
9218 disp = XEXP (disp, 0);
9222 if (TARGET_MACHO && darwin_local_data_pic (disp))
9225 if (GET_CODE (disp) != UNSPEC)
9228 switch (XINT (disp, 1))
9233 /* We need to check for both symbols and labels because VxWorks loads
9234 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9236 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9237 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9239 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9240 While ABI specify also 32bit relocation but we don't produce it in
9241 small PIC model at all. */
9242 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9243 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9245 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9247 case UNSPEC_GOTTPOFF:
9248 case UNSPEC_GOTNTPOFF:
9249 case UNSPEC_INDNTPOFF:
9252 disp = XVECEXP (disp, 0, 0);
9253 return (GET_CODE (disp) == SYMBOL_REF
9254 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9256 disp = XVECEXP (disp, 0, 0);
9257 return (GET_CODE (disp) == SYMBOL_REF
9258 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9260 disp = XVECEXP (disp, 0, 0);
9261 return (GET_CODE (disp) == SYMBOL_REF
9262 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9268 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9269 memory address for an instruction. The MODE argument is the machine mode
9270 for the MEM expression that wants to use this address.
9272 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9273 convert common non-canonical forms to canonical form so that they will
9277 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9278 rtx addr, int strict)
9280 struct ix86_address parts;
9281 rtx base, index, disp;
9282 HOST_WIDE_INT scale;
9283 const char *reason = NULL;
9284 rtx reason_rtx = NULL_RTX;
9286 if (ix86_decompose_address (addr, &parts) <= 0)
9288 reason = "decomposition failed";
9293 index = parts.index;
9295 scale = parts.scale;
9297 /* Validate base register.
9299 Don't allow SUBREG's that span more than a word here. It can lead to spill
9300 failures when the base is one word out of a two word structure, which is
9301 represented internally as a DImode int. */
9310 else if (GET_CODE (base) == SUBREG
9311 && REG_P (SUBREG_REG (base))
9312 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9314 reg = SUBREG_REG (base);
9317 reason = "base is not a register";
9321 if (GET_MODE (base) != Pmode)
9323 reason = "base is not in Pmode";
9327 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9328 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9330 reason = "base is not valid";
9335 /* Validate index register.
9337 Don't allow SUBREG's that span more than a word here -- same as above. */
9346 else if (GET_CODE (index) == SUBREG
9347 && REG_P (SUBREG_REG (index))
9348 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9350 reg = SUBREG_REG (index);
9353 reason = "index is not a register";
9357 if (GET_MODE (index) != Pmode)
9359 reason = "index is not in Pmode";
9363 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9364 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9366 reason = "index is not valid";
9371 /* Validate scale factor. */
9374 reason_rtx = GEN_INT (scale);
9377 reason = "scale without index";
9381 if (scale != 2 && scale != 4 && scale != 8)
9383 reason = "scale is not a valid multiplier";
9388 /* Validate displacement. */
9393 if (GET_CODE (disp) == CONST
9394 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9395 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9396 switch (XINT (XEXP (disp, 0), 1))
9398 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9399 used. While ABI specify also 32bit relocations, we don't produce
9400 them at all and use IP relative instead. */
9403 gcc_assert (flag_pic);
9405 goto is_legitimate_pic;
9406 reason = "64bit address unspec";
9409 case UNSPEC_GOTPCREL:
9410 gcc_assert (flag_pic);
9411 goto is_legitimate_pic;
9413 case UNSPEC_GOTTPOFF:
9414 case UNSPEC_GOTNTPOFF:
9415 case UNSPEC_INDNTPOFF:
9421 reason = "invalid address unspec";
9425 else if (SYMBOLIC_CONST (disp)
9429 && MACHOPIC_INDIRECT
9430 && !machopic_operand_p (disp)
9436 if (TARGET_64BIT && (index || base))
9438 /* foo@dtpoff(%rX) is ok. */
9439 if (GET_CODE (disp) != CONST
9440 || GET_CODE (XEXP (disp, 0)) != PLUS
9441 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9442 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9443 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9444 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9446 reason = "non-constant pic memory reference";
9450 else if (! legitimate_pic_address_disp_p (disp))
9452 reason = "displacement is an invalid pic construct";
9456 /* This code used to verify that a symbolic pic displacement
9457 includes the pic_offset_table_rtx register.
9459 While this is good idea, unfortunately these constructs may
9460 be created by "adds using lea" optimization for incorrect
9469 This code is nonsensical, but results in addressing
9470 GOT table with pic_offset_table_rtx base. We can't
9471 just refuse it easily, since it gets matched by
9472 "addsi3" pattern, that later gets split to lea in the
9473 case output register differs from input. While this
9474 can be handled by separate addsi pattern for this case
9475 that never results in lea, this seems to be easier and
9476 correct fix for crash to disable this test. */
9478 else if (GET_CODE (disp) != LABEL_REF
9479 && !CONST_INT_P (disp)
9480 && (GET_CODE (disp) != CONST
9481 || !legitimate_constant_p (disp))
9482 && (GET_CODE (disp) != SYMBOL_REF
9483 || !legitimate_constant_p (disp)))
9485 reason = "displacement is not constant";
9488 else if (TARGET_64BIT
9489 && !x86_64_immediate_operand (disp, VOIDmode))
9491 reason = "displacement is out of range";
9496 /* Everything looks valid. */
9503 /* Return a unique alias set for the GOT. */
9505 static alias_set_type
9506 ix86_GOT_alias_set (void)
9508 static alias_set_type set = -1;
9510 set = new_alias_set ();
9514 /* Return a legitimate reference for ORIG (an address) using the
9515 register REG. If REG is 0, a new pseudo is generated.
9517 There are two types of references that must be handled:
9519 1. Global data references must load the address from the GOT, via
9520 the PIC reg. An insn is emitted to do this load, and the reg is
9523 2. Static data references, constant pool addresses, and code labels
9524 compute the address as an offset from the GOT, whose base is in
9525 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9526 differentiate them from global data objects. The returned
9527 address is the PIC reg + an unspec constant.
9529 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9530 reg also appears in the address. */
9533 legitimize_pic_address (rtx orig, rtx reg)
9540 if (TARGET_MACHO && !TARGET_64BIT)
9543 reg = gen_reg_rtx (Pmode);
9544 /* Use the generic Mach-O PIC machinery. */
9545 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9549 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9551 else if (TARGET_64BIT
9552 && ix86_cmodel != CM_SMALL_PIC
9553 && gotoff_operand (addr, Pmode))
9556 /* This symbol may be referenced via a displacement from the PIC
9557 base address (@GOTOFF). */
9559 if (reload_in_progress)
9560 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9561 if (GET_CODE (addr) == CONST)
9562 addr = XEXP (addr, 0);
9563 if (GET_CODE (addr) == PLUS)
9565 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9567 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9570 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9571 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9573 tmpreg = gen_reg_rtx (Pmode);
9576 emit_move_insn (tmpreg, new_rtx);
9580 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9581 tmpreg, 1, OPTAB_DIRECT);
9584 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9586 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9588 /* This symbol may be referenced via a displacement from the PIC
9589 base address (@GOTOFF). */
9591 if (reload_in_progress)
9592 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9593 if (GET_CODE (addr) == CONST)
9594 addr = XEXP (addr, 0);
9595 if (GET_CODE (addr) == PLUS)
9597 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9599 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9602 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9603 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9604 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9608 emit_move_insn (reg, new_rtx);
9612 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9613 /* We can't use @GOTOFF for text labels on VxWorks;
9614 see gotoff_operand. */
9615 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9617 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9619 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9620 return legitimize_dllimport_symbol (addr, true);
9621 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9622 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9623 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9625 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9626 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9630 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9632 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9633 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9634 new_rtx = gen_const_mem (Pmode, new_rtx);
9635 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9638 reg = gen_reg_rtx (Pmode);
9639 /* Use directly gen_movsi, otherwise the address is loaded
9640 into register for CSE. We don't want to CSE this addresses,
9641 instead we CSE addresses from the GOT table, so skip this. */
9642 emit_insn (gen_movsi (reg, new_rtx));
9647 /* This symbol must be referenced via a load from the
9648 Global Offset Table (@GOT). */
9650 if (reload_in_progress)
9651 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9655 new_rtx = force_reg (Pmode, new_rtx);
9656 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9657 new_rtx = gen_const_mem (Pmode, new_rtx);
9658 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9661 reg = gen_reg_rtx (Pmode);
9662 emit_move_insn (reg, new_rtx);
9668 if (CONST_INT_P (addr)
9669 && !x86_64_immediate_operand (addr, VOIDmode))
9673 emit_move_insn (reg, addr);
9677 new_rtx = force_reg (Pmode, addr);
9679 else if (GET_CODE (addr) == CONST)
9681 addr = XEXP (addr, 0);
9683 /* We must match stuff we generate before. Assume the only
9684 unspecs that can get here are ours. Not that we could do
9685 anything with them anyway.... */
9686 if (GET_CODE (addr) == UNSPEC
9687 || (GET_CODE (addr) == PLUS
9688 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9690 gcc_assert (GET_CODE (addr) == PLUS);
9692 if (GET_CODE (addr) == PLUS)
9694 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9696 /* Check first to see if this is a constant offset from a @GOTOFF
9697 symbol reference. */
9698 if (gotoff_operand (op0, Pmode)
9699 && CONST_INT_P (op1))
9703 if (reload_in_progress)
9704 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9705 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9707 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9708 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9709 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9713 emit_move_insn (reg, new_rtx);
9719 if (INTVAL (op1) < -16*1024*1024
9720 || INTVAL (op1) >= 16*1024*1024)
9722 if (!x86_64_immediate_operand (op1, Pmode))
9723 op1 = force_reg (Pmode, op1);
9724 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9730 base = legitimize_pic_address (XEXP (addr, 0), reg);
9731 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9732 base == reg ? NULL_RTX : reg);
9734 if (CONST_INT_P (new_rtx))
9735 new_rtx = plus_constant (base, INTVAL (new_rtx));
9738 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9740 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9741 new_rtx = XEXP (new_rtx, 1);
9743 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9751 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9754 get_thread_pointer (int to_reg)
9758 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9762 reg = gen_reg_rtx (Pmode);
9763 insn = gen_rtx_SET (VOIDmode, reg, tp);
9764 insn = emit_insn (insn);
9769 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9770 false if we expect this to be used for a memory address and true if
9771 we expect to load the address into a register. */
9774 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9776 rtx dest, base, off, pic, tp;
9781 case TLS_MODEL_GLOBAL_DYNAMIC:
9782 dest = gen_reg_rtx (Pmode);
9783 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9785 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9787 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9790 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9791 insns = get_insns ();
9794 RTL_CONST_CALL_P (insns) = 1;
9795 emit_libcall_block (insns, dest, rax, x);
9797 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9798 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9800 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9802 if (TARGET_GNU2_TLS)
9804 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9806 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9810 case TLS_MODEL_LOCAL_DYNAMIC:
9811 base = gen_reg_rtx (Pmode);
9812 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9814 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9816 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9819 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9820 insns = get_insns ();
9823 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9824 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9825 RTL_CONST_CALL_P (insns) = 1;
9826 emit_libcall_block (insns, base, rax, note);
9828 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9829 emit_insn (gen_tls_local_dynamic_base_64 (base));
9831 emit_insn (gen_tls_local_dynamic_base_32 (base));
9833 if (TARGET_GNU2_TLS)
9835 rtx x = ix86_tls_module_base ();
9837 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9838 gen_rtx_MINUS (Pmode, x, tp));
9841 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9842 off = gen_rtx_CONST (Pmode, off);
9844 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9846 if (TARGET_GNU2_TLS)
9848 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9850 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9855 case TLS_MODEL_INITIAL_EXEC:
9859 type = UNSPEC_GOTNTPOFF;
9863 if (reload_in_progress)
9864 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9865 pic = pic_offset_table_rtx;
9866 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9868 else if (!TARGET_ANY_GNU_TLS)
9870 pic = gen_reg_rtx (Pmode);
9871 emit_insn (gen_set_got (pic));
9872 type = UNSPEC_GOTTPOFF;
9877 type = UNSPEC_INDNTPOFF;
9880 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9881 off = gen_rtx_CONST (Pmode, off);
9883 off = gen_rtx_PLUS (Pmode, pic, off);
9884 off = gen_const_mem (Pmode, off);
9885 set_mem_alias_set (off, ix86_GOT_alias_set ());
9887 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9889 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9890 off = force_reg (Pmode, off);
9891 return gen_rtx_PLUS (Pmode, base, off);
9895 base = get_thread_pointer (true);
9896 dest = gen_reg_rtx (Pmode);
9897 emit_insn (gen_subsi3 (dest, base, off));
9901 case TLS_MODEL_LOCAL_EXEC:
9902 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9903 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9904 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9905 off = gen_rtx_CONST (Pmode, off);
9907 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9909 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9910 return gen_rtx_PLUS (Pmode, base, off);
9914 base = get_thread_pointer (true);
9915 dest = gen_reg_rtx (Pmode);
9916 emit_insn (gen_subsi3 (dest, base, off));
9927 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9930 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9931 htab_t dllimport_map;
9934 get_dllimport_decl (tree decl)
9936 struct tree_map *h, in;
9940 size_t namelen, prefixlen;
9946 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9948 in.hash = htab_hash_pointer (decl);
9949 in.base.from = decl;
9950 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9951 h = (struct tree_map *) *loc;
9955 *loc = h = GGC_NEW (struct tree_map);
9957 h->base.from = decl;
9958 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9959 DECL_ARTIFICIAL (to) = 1;
9960 DECL_IGNORED_P (to) = 1;
9961 DECL_EXTERNAL (to) = 1;
9962 TREE_READONLY (to) = 1;
9964 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9965 name = targetm.strip_name_encoding (name);
9966 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9967 ? "*__imp_" : "*__imp__";
9968 namelen = strlen (name);
9969 prefixlen = strlen (prefix);
9970 imp_name = (char *) alloca (namelen + prefixlen + 1);
9971 memcpy (imp_name, prefix, prefixlen);
9972 memcpy (imp_name + prefixlen, name, namelen + 1);
9974 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9975 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9976 SET_SYMBOL_REF_DECL (rtl, to);
9977 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9979 rtl = gen_const_mem (Pmode, rtl);
9980 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9982 SET_DECL_RTL (to, rtl);
9983 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9988 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9989 true if we require the result be a register. */
9992 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9997 gcc_assert (SYMBOL_REF_DECL (symbol));
9998 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10000 x = DECL_RTL (imp_decl);
10002 x = force_reg (Pmode, x);
10006 /* Try machine-dependent ways of modifying an illegitimate address
10007 to be legitimate. If we find one, return the new, valid address.
10008 This macro is used in only one place: `memory_address' in explow.c.
10010 OLDX is the address as it was before break_out_memory_refs was called.
10011 In some cases it is useful to look at this to decide what needs to be done.
10013 MODE and WIN are passed so that this macro can use
10014 GO_IF_LEGITIMATE_ADDRESS.
10016 It is always safe for this macro to do nothing. It exists to recognize
10017 opportunities to optimize the output.
10019 For the 80386, we handle X+REG by loading X into a register R and
10020 using R+REG. R will go in a general reg and indexing will be used.
10021 However, if REG is a broken-out memory address or multiplication,
10022 nothing needs to be done because REG can certainly go in a general reg.
10024 When -fpic is used, special handling is needed for symbolic references.
10025 See comments by legitimize_pic_address in i386.c for details. */
10028 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10033 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10035 return legitimize_tls_address (x, (enum tls_model) log, false);
10036 if (GET_CODE (x) == CONST
10037 && GET_CODE (XEXP (x, 0)) == PLUS
10038 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10039 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10041 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10042 (enum tls_model) log, false);
10043 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10046 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10048 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10049 return legitimize_dllimport_symbol (x, true);
10050 if (GET_CODE (x) == CONST
10051 && GET_CODE (XEXP (x, 0)) == PLUS
10052 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10053 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10055 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10056 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10060 if (flag_pic && SYMBOLIC_CONST (x))
10061 return legitimize_pic_address (x, 0);
10063 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10064 if (GET_CODE (x) == ASHIFT
10065 && CONST_INT_P (XEXP (x, 1))
10066 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10069 log = INTVAL (XEXP (x, 1));
10070 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10071 GEN_INT (1 << log));
10074 if (GET_CODE (x) == PLUS)
10076 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10078 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10079 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10080 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10083 log = INTVAL (XEXP (XEXP (x, 0), 1));
10084 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10085 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10086 GEN_INT (1 << log));
10089 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10090 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10091 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10094 log = INTVAL (XEXP (XEXP (x, 1), 1));
10095 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10096 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10097 GEN_INT (1 << log));
10100 /* Put multiply first if it isn't already. */
10101 if (GET_CODE (XEXP (x, 1)) == MULT)
10103 rtx tmp = XEXP (x, 0);
10104 XEXP (x, 0) = XEXP (x, 1);
10109 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10110 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10111 created by virtual register instantiation, register elimination, and
10112 similar optimizations. */
10113 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10116 x = gen_rtx_PLUS (Pmode,
10117 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10118 XEXP (XEXP (x, 1), 0)),
10119 XEXP (XEXP (x, 1), 1));
10123 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10124 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10125 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10126 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10127 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10128 && CONSTANT_P (XEXP (x, 1)))
10131 rtx other = NULL_RTX;
10133 if (CONST_INT_P (XEXP (x, 1)))
10135 constant = XEXP (x, 1);
10136 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10138 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10140 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10141 other = XEXP (x, 1);
10149 x = gen_rtx_PLUS (Pmode,
10150 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10151 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10152 plus_constant (other, INTVAL (constant)));
10156 if (changed && legitimate_address_p (mode, x, FALSE))
10159 if (GET_CODE (XEXP (x, 0)) == MULT)
10162 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10165 if (GET_CODE (XEXP (x, 1)) == MULT)
10168 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10172 && REG_P (XEXP (x, 1))
10173 && REG_P (XEXP (x, 0)))
10176 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10179 x = legitimize_pic_address (x, 0);
10182 if (changed && legitimate_address_p (mode, x, FALSE))
10185 if (REG_P (XEXP (x, 0)))
10187 rtx temp = gen_reg_rtx (Pmode);
10188 rtx val = force_operand (XEXP (x, 1), temp);
10190 emit_move_insn (temp, val);
10192 XEXP (x, 1) = temp;
10196 else if (REG_P (XEXP (x, 1)))
10198 rtx temp = gen_reg_rtx (Pmode);
10199 rtx val = force_operand (XEXP (x, 0), temp);
10201 emit_move_insn (temp, val);
10203 XEXP (x, 0) = temp;
10211 /* Print an integer constant expression in assembler syntax. Addition
10212 and subtraction are the only arithmetic that may appear in these
10213 expressions. FILE is the stdio stream to write to, X is the rtx, and
10214 CODE is the operand print code from the output string. */
10217 output_pic_addr_const (FILE *file, rtx x, int code)
10221 switch (GET_CODE (x))
10224 gcc_assert (flag_pic);
10229 if (! TARGET_MACHO || TARGET_64BIT)
10230 output_addr_const (file, x);
10233 const char *name = XSTR (x, 0);
10235 /* Mark the decl as referenced so that cgraph will
10236 output the function. */
10237 if (SYMBOL_REF_DECL (x))
10238 mark_decl_referenced (SYMBOL_REF_DECL (x));
10241 if (MACHOPIC_INDIRECT
10242 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10243 name = machopic_indirection_name (x, /*stub_p=*/true);
10245 assemble_name (file, name);
10247 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10248 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10249 fputs ("@PLT", file);
10256 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10257 assemble_name (asm_out_file, buf);
10261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10265 /* This used to output parentheses around the expression,
10266 but that does not work on the 386 (either ATT or BSD assembler). */
10267 output_pic_addr_const (file, XEXP (x, 0), code);
10271 if (GET_MODE (x) == VOIDmode)
10273 /* We can use %d if the number is <32 bits and positive. */
10274 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10275 fprintf (file, "0x%lx%08lx",
10276 (unsigned long) CONST_DOUBLE_HIGH (x),
10277 (unsigned long) CONST_DOUBLE_LOW (x));
10279 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10282 /* We can't handle floating point constants;
10283 PRINT_OPERAND must handle them. */
10284 output_operand_lossage ("floating constant misused");
10288 /* Some assemblers need integer constants to appear first. */
10289 if (CONST_INT_P (XEXP (x, 0)))
10291 output_pic_addr_const (file, XEXP (x, 0), code);
10293 output_pic_addr_const (file, XEXP (x, 1), code);
10297 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10298 output_pic_addr_const (file, XEXP (x, 1), code);
10300 output_pic_addr_const (file, XEXP (x, 0), code);
10306 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10307 output_pic_addr_const (file, XEXP (x, 0), code);
10309 output_pic_addr_const (file, XEXP (x, 1), code);
10311 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10315 gcc_assert (XVECLEN (x, 0) == 1);
10316 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10317 switch (XINT (x, 1))
10320 fputs ("@GOT", file);
10322 case UNSPEC_GOTOFF:
10323 fputs ("@GOTOFF", file);
10325 case UNSPEC_PLTOFF:
10326 fputs ("@PLTOFF", file);
10328 case UNSPEC_GOTPCREL:
10329 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10330 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10332 case UNSPEC_GOTTPOFF:
10333 /* FIXME: This might be @TPOFF in Sun ld too. */
10334 fputs ("@GOTTPOFF", file);
10337 fputs ("@TPOFF", file);
10339 case UNSPEC_NTPOFF:
10341 fputs ("@TPOFF", file);
10343 fputs ("@NTPOFF", file);
10345 case UNSPEC_DTPOFF:
10346 fputs ("@DTPOFF", file);
10348 case UNSPEC_GOTNTPOFF:
10350 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10351 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10353 fputs ("@GOTNTPOFF", file);
10355 case UNSPEC_INDNTPOFF:
10356 fputs ("@INDNTPOFF", file);
10359 case UNSPEC_MACHOPIC_OFFSET:
10361 machopic_output_function_base_name (file);
10365 output_operand_lossage ("invalid UNSPEC as operand");
10371 output_operand_lossage ("invalid expression as operand");
10375 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10376 We need to emit DTP-relative relocations. */
10378 static void ATTRIBUTE_UNUSED
10379 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10381 fputs (ASM_LONG, file);
10382 output_addr_const (file, x);
10383 fputs ("@DTPOFF", file);
10389 fputs (", 0", file);
10392 gcc_unreachable ();
10396 /* Return true if X is a representation of the PIC register. This copes
10397 with calls from ix86_find_base_term, where the register might have
10398 been replaced by a cselib value. */
10401 ix86_pic_register_p (rtx x)
10403 if (GET_CODE (x) == VALUE)
10404 return (pic_offset_table_rtx
10405 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10407 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10410 /* In the name of slightly smaller debug output, and to cater to
10411 general assembler lossage, recognize PIC+GOTOFF and turn it back
10412 into a direct symbol reference.
10414 On Darwin, this is necessary to avoid a crash, because Darwin
10415 has a different PIC label for each routine but the DWARF debugging
10416 information is not associated with any particular routine, so it's
10417 necessary to remove references to the PIC label from RTL stored by
10418 the DWARF output code. */
10421 ix86_delegitimize_address (rtx orig_x)
10424 /* reg_addend is NULL or a multiple of some register. */
10425 rtx reg_addend = NULL_RTX;
10426 /* const_addend is NULL or a const_int. */
10427 rtx const_addend = NULL_RTX;
10428 /* This is the result, or NULL. */
10429 rtx result = NULL_RTX;
10436 if (GET_CODE (x) != CONST
10437 || GET_CODE (XEXP (x, 0)) != UNSPEC
10438 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10439 || !MEM_P (orig_x))
10441 return XVECEXP (XEXP (x, 0), 0, 0);
10444 if (GET_CODE (x) != PLUS
10445 || GET_CODE (XEXP (x, 1)) != CONST)
10448 if (ix86_pic_register_p (XEXP (x, 0)))
10449 /* %ebx + GOT/GOTOFF */
10451 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10453 /* %ebx + %reg * scale + GOT/GOTOFF */
10454 reg_addend = XEXP (x, 0);
10455 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10456 reg_addend = XEXP (reg_addend, 1);
10457 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10458 reg_addend = XEXP (reg_addend, 0);
10461 if (!REG_P (reg_addend)
10462 && GET_CODE (reg_addend) != MULT
10463 && GET_CODE (reg_addend) != ASHIFT)
10469 x = XEXP (XEXP (x, 1), 0);
10470 if (GET_CODE (x) == PLUS
10471 && CONST_INT_P (XEXP (x, 1)))
10473 const_addend = XEXP (x, 1);
10477 if (GET_CODE (x) == UNSPEC
10478 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10479 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10480 result = XVECEXP (x, 0, 0);
10482 if (TARGET_MACHO && darwin_local_data_pic (x)
10483 && !MEM_P (orig_x))
10484 result = XVECEXP (x, 0, 0);
10490 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10492 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10496 /* If X is a machine specific address (i.e. a symbol or label being
10497 referenced as a displacement from the GOT implemented using an
10498 UNSPEC), then return the base term. Otherwise return X. */
10501 ix86_find_base_term (rtx x)
10507 if (GET_CODE (x) != CONST)
10509 term = XEXP (x, 0);
10510 if (GET_CODE (term) == PLUS
10511 && (CONST_INT_P (XEXP (term, 1))
10512 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10513 term = XEXP (term, 0);
10514 if (GET_CODE (term) != UNSPEC
10515 || XINT (term, 1) != UNSPEC_GOTPCREL)
10518 return XVECEXP (term, 0, 0);
10521 return ix86_delegitimize_address (x);
10525 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10526 int fp, FILE *file)
10528 const char *suffix;
10530 if (mode == CCFPmode || mode == CCFPUmode)
10532 enum rtx_code second_code, bypass_code;
10533 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10534 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10535 code = ix86_fp_compare_code_to_integer (code);
10539 code = reverse_condition (code);
10590 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10594 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10595 Those same assemblers have the same but opposite lossage on cmov. */
10596 if (mode == CCmode)
10597 suffix = fp ? "nbe" : "a";
10598 else if (mode == CCCmode)
10601 gcc_unreachable ();
10617 gcc_unreachable ();
10621 gcc_assert (mode == CCmode || mode == CCCmode);
10638 gcc_unreachable ();
10642 /* ??? As above. */
10643 gcc_assert (mode == CCmode || mode == CCCmode);
10644 suffix = fp ? "nb" : "ae";
10647 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10651 /* ??? As above. */
10652 if (mode == CCmode)
10654 else if (mode == CCCmode)
10655 suffix = fp ? "nb" : "ae";
10657 gcc_unreachable ();
10660 suffix = fp ? "u" : "p";
10663 suffix = fp ? "nu" : "np";
10666 gcc_unreachable ();
10668 fputs (suffix, file);
10671 /* Print the name of register X to FILE based on its machine mode and number.
10672 If CODE is 'w', pretend the mode is HImode.
10673 If CODE is 'b', pretend the mode is QImode.
10674 If CODE is 'k', pretend the mode is SImode.
10675 If CODE is 'q', pretend the mode is DImode.
10676 If CODE is 'x', pretend the mode is V4SFmode.
10677 If CODE is 't', pretend the mode is V8SFmode.
10678 If CODE is 'h', pretend the reg is the 'high' byte register.
10679 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10680 If CODE is 'd', duplicate the operand for AVX instruction.
10684 print_reg (rtx x, int code, FILE *file)
10687 bool duplicated = code == 'd' && TARGET_AVX;
10689 gcc_assert (x == pc_rtx
10690 || (REGNO (x) != ARG_POINTER_REGNUM
10691 && REGNO (x) != FRAME_POINTER_REGNUM
10692 && REGNO (x) != FLAGS_REG
10693 && REGNO (x) != FPSR_REG
10694 && REGNO (x) != FPCR_REG));
10696 if (ASSEMBLER_DIALECT == ASM_ATT)
10701 gcc_assert (TARGET_64BIT);
10702 fputs ("rip", file);
10706 if (code == 'w' || MMX_REG_P (x))
10708 else if (code == 'b')
10710 else if (code == 'k')
10712 else if (code == 'q')
10714 else if (code == 'y')
10716 else if (code == 'h')
10718 else if (code == 'x')
10720 else if (code == 't')
10723 code = GET_MODE_SIZE (GET_MODE (x));
10725 /* Irritatingly, AMD extended registers use different naming convention
10726 from the normal registers. */
10727 if (REX_INT_REG_P (x))
10729 gcc_assert (TARGET_64BIT);
10733 error ("extended registers have no high halves");
10736 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10739 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10742 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10745 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10748 error ("unsupported operand size for extended register");
10758 if (STACK_TOP_P (x))
10767 if (! ANY_FP_REG_P (x))
10768 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10773 reg = hi_reg_name[REGNO (x)];
10776 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10778 reg = qi_reg_name[REGNO (x)];
10781 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10783 reg = qi_high_reg_name[REGNO (x)];
10788 gcc_assert (!duplicated);
10790 fputs (hi_reg_name[REGNO (x)] + 1, file);
10795 gcc_unreachable ();
10801 if (ASSEMBLER_DIALECT == ASM_ATT)
10802 fprintf (file, ", %%%s", reg);
10804 fprintf (file, ", %s", reg);
10808 /* Locate some local-dynamic symbol still in use by this function
10809 so that we can print its name in some tls_local_dynamic_base
10813 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10817 if (GET_CODE (x) == SYMBOL_REF
10818 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10820 cfun->machine->some_ld_name = XSTR (x, 0);
10827 static const char *
10828 get_some_local_dynamic_name (void)
10832 if (cfun->machine->some_ld_name)
10833 return cfun->machine->some_ld_name;
10835 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10837 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10838 return cfun->machine->some_ld_name;
10840 gcc_unreachable ();
10843 /* Meaning of CODE:
10844 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10845 C -- print opcode suffix for set/cmov insn.
10846 c -- like C, but print reversed condition
10847 E,e -- likewise, but for compare-and-branch fused insn.
10848 F,f -- likewise, but for floating-point.
10849 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10851 R -- print the prefix for register names.
10852 z -- print the opcode suffix for the size of the current operand.
10853 * -- print a star (in certain assembler syntax)
10854 A -- print an absolute memory reference.
10855 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10856 s -- print a shift double count, followed by the assemblers argument
10858 b -- print the QImode name of the register for the indicated operand.
10859 %b0 would print %al if operands[0] is reg 0.
10860 w -- likewise, print the HImode name of the register.
10861 k -- likewise, print the SImode name of the register.
10862 q -- likewise, print the DImode name of the register.
10863 x -- likewise, print the V4SFmode name of the register.
10864 t -- likewise, print the V8SFmode name of the register.
10865 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10866 y -- print "st(0)" instead of "st" as a register.
10867 d -- print duplicated register operand for AVX instruction.
10868 D -- print condition for SSE cmp instruction.
10869 P -- if PIC, print an @PLT suffix.
10870 X -- don't print any sort of PIC '@' suffix for a symbol.
10871 & -- print some in-use local-dynamic symbol name.
10872 H -- print a memory address offset by 8; used for sse high-parts
10873 Y -- print condition for SSE5 com* instruction.
10874 + -- print a branch hint as 'cs' or 'ds' prefix
10875 ; -- print a semicolon (after prefixes due to bug in older gas).
10879 print_operand (FILE *file, rtx x, int code)
10886 if (ASSEMBLER_DIALECT == ASM_ATT)
10891 assemble_name (file, get_some_local_dynamic_name ());
10895 switch (ASSEMBLER_DIALECT)
10902 /* Intel syntax. For absolute addresses, registers should not
10903 be surrounded by braces. */
10907 PRINT_OPERAND (file, x, 0);
10914 gcc_unreachable ();
10917 PRINT_OPERAND (file, x, 0);
10922 if (ASSEMBLER_DIALECT == ASM_ATT)
10927 if (ASSEMBLER_DIALECT == ASM_ATT)
10932 if (ASSEMBLER_DIALECT == ASM_ATT)
10937 if (ASSEMBLER_DIALECT == ASM_ATT)
10942 if (ASSEMBLER_DIALECT == ASM_ATT)
10947 if (ASSEMBLER_DIALECT == ASM_ATT)
10952 /* 387 opcodes don't get size suffixes if the operands are
10954 if (STACK_REG_P (x))
10957 /* Likewise if using Intel opcodes. */
10958 if (ASSEMBLER_DIALECT == ASM_INTEL)
10961 /* This is the size of op from size of operand. */
10962 switch (GET_MODE_SIZE (GET_MODE (x)))
10971 #ifdef HAVE_GAS_FILDS_FISTS
10981 if (GET_MODE (x) == SFmode)
10996 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11000 #ifdef GAS_MNEMONICS
11015 gcc_unreachable ();
11032 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11034 PRINT_OPERAND (file, x, 0);
11035 fputs (", ", file);
11040 /* Little bit of braindamage here. The SSE compare instructions
11041 does use completely different names for the comparisons that the
11042 fp conditional moves. */
11045 switch (GET_CODE (x))
11048 fputs ("eq", file);
11051 fputs ("eq_us", file);
11054 fputs ("lt", file);
11057 fputs ("nge", file);
11060 fputs ("le", file);
11063 fputs ("ngt", file);
11066 fputs ("unord", file);
11069 fputs ("neq", file);
11072 fputs ("neq_oq", file);
11075 fputs ("ge", file);
11078 fputs ("nlt", file);
11081 fputs ("gt", file);
11084 fputs ("nle", file);
11087 fputs ("ord", file);
11090 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11096 switch (GET_CODE (x))
11100 fputs ("eq", file);
11104 fputs ("lt", file);
11108 fputs ("le", file);
11111 fputs ("unord", file);
11115 fputs ("neq", file);
11119 fputs ("nlt", file);
11123 fputs ("nle", file);
11126 fputs ("ord", file);
11129 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11135 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11136 if (ASSEMBLER_DIALECT == ASM_ATT)
11138 switch (GET_MODE (x))
11140 case HImode: putc ('w', file); break;
11142 case SFmode: putc ('l', file); break;
11144 case DFmode: putc ('q', file); break;
11145 default: gcc_unreachable ();
11152 if (!COMPARISON_P (x))
11154 output_operand_lossage ("operand is neither a constant nor a "
11155 "condition code, invalid operand code "
11159 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11162 if (!COMPARISON_P (x))
11164 output_operand_lossage ("operand is neither a constant nor a "
11165 "condition code, invalid operand code "
11169 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11170 if (ASSEMBLER_DIALECT == ASM_ATT)
11173 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11176 /* Like above, but reverse condition */
11178 /* Check to see if argument to %c is really a constant
11179 and not a condition code which needs to be reversed. */
11180 if (!COMPARISON_P (x))
11182 output_operand_lossage ("operand is neither a constant nor a "
11183 "condition code, invalid operand "
11187 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11190 if (!COMPARISON_P (x))
11192 output_operand_lossage ("operand is neither a constant nor a "
11193 "condition code, invalid operand "
11197 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11198 if (ASSEMBLER_DIALECT == ASM_ATT)
11201 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11205 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11209 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11213 /* It doesn't actually matter what mode we use here, as we're
11214 only going to use this for printing. */
11215 x = adjust_address_nv (x, DImode, 8);
11223 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11226 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11229 int pred_val = INTVAL (XEXP (x, 0));
11231 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11232 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11234 int taken = pred_val > REG_BR_PROB_BASE / 2;
11235 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11237 /* Emit hints only in the case default branch prediction
11238 heuristics would fail. */
11239 if (taken != cputaken)
11241 /* We use 3e (DS) prefix for taken branches and
11242 2e (CS) prefix for not taken branches. */
11244 fputs ("ds ; ", file);
11246 fputs ("cs ; ", file);
11254 switch (GET_CODE (x))
11257 fputs ("neq", file);
11260 fputs ("eq", file);
11264 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11268 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11272 fputs ("le", file);
11276 fputs ("lt", file);
11279 fputs ("unord", file);
11282 fputs ("ord", file);
11285 fputs ("ueq", file);
11288 fputs ("nlt", file);
11291 fputs ("nle", file);
11294 fputs ("ule", file);
11297 fputs ("ult", file);
11300 fputs ("une", file);
11303 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11310 fputs (" ; ", file);
11317 output_operand_lossage ("invalid operand code '%c'", code);
11322 print_reg (x, code, file);
11324 else if (MEM_P (x))
11326 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11327 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11328 && GET_MODE (x) != BLKmode)
11331 switch (GET_MODE_SIZE (GET_MODE (x)))
11333 case 1: size = "BYTE"; break;
11334 case 2: size = "WORD"; break;
11335 case 4: size = "DWORD"; break;
11336 case 8: size = "QWORD"; break;
11337 case 12: size = "XWORD"; break;
11339 if (GET_MODE (x) == XFmode)
11345 gcc_unreachable ();
11348 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11351 else if (code == 'w')
11353 else if (code == 'k')
11356 fputs (size, file);
11357 fputs (" PTR ", file);
11361 /* Avoid (%rip) for call operands. */
11362 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11363 && !CONST_INT_P (x))
11364 output_addr_const (file, x);
11365 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11366 output_operand_lossage ("invalid constraints for operand");
11368 output_address (x);
11371 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11376 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11377 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11379 if (ASSEMBLER_DIALECT == ASM_ATT)
11381 fprintf (file, "0x%08lx", (long unsigned int) l);
11384 /* These float cases don't actually occur as immediate operands. */
11385 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11389 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11390 fprintf (file, "%s", dstr);
11393 else if (GET_CODE (x) == CONST_DOUBLE
11394 && GET_MODE (x) == XFmode)
11398 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11399 fprintf (file, "%s", dstr);
11404 /* We have patterns that allow zero sets of memory, for instance.
11405 In 64-bit mode, we should probably support all 8-byte vectors,
11406 since we can in fact encode that into an immediate. */
11407 if (GET_CODE (x) == CONST_VECTOR)
11409 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11415 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11417 if (ASSEMBLER_DIALECT == ASM_ATT)
11420 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11421 || GET_CODE (x) == LABEL_REF)
11423 if (ASSEMBLER_DIALECT == ASM_ATT)
11426 fputs ("OFFSET FLAT:", file);
11429 if (CONST_INT_P (x))
11430 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11432 output_pic_addr_const (file, x, code);
11434 output_addr_const (file, x);
11438 /* Print a memory operand whose address is ADDR. */
11441 print_operand_address (FILE *file, rtx addr)
11443 struct ix86_address parts;
11444 rtx base, index, disp;
11446 int ok = ix86_decompose_address (addr, &parts);
11451 index = parts.index;
11453 scale = parts.scale;
11461 if (ASSEMBLER_DIALECT == ASM_ATT)
11463 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11466 gcc_unreachable ();
11469 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11470 if (TARGET_64BIT && !base && !index)
11474 if (GET_CODE (disp) == CONST
11475 && GET_CODE (XEXP (disp, 0)) == PLUS
11476 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11477 symbol = XEXP (XEXP (disp, 0), 0);
11479 if (GET_CODE (symbol) == LABEL_REF
11480 || (GET_CODE (symbol) == SYMBOL_REF
11481 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11484 if (!base && !index)
11486 /* Displacement only requires special attention. */
11488 if (CONST_INT_P (disp))
11490 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11491 fputs ("ds:", file);
11492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11495 output_pic_addr_const (file, disp, 0);
11497 output_addr_const (file, disp);
11501 if (ASSEMBLER_DIALECT == ASM_ATT)
11506 output_pic_addr_const (file, disp, 0);
11507 else if (GET_CODE (disp) == LABEL_REF)
11508 output_asm_label (disp);
11510 output_addr_const (file, disp);
11515 print_reg (base, 0, file);
11519 print_reg (index, 0, file);
11521 fprintf (file, ",%d", scale);
11527 rtx offset = NULL_RTX;
11531 /* Pull out the offset of a symbol; print any symbol itself. */
11532 if (GET_CODE (disp) == CONST
11533 && GET_CODE (XEXP (disp, 0)) == PLUS
11534 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11536 offset = XEXP (XEXP (disp, 0), 1);
11537 disp = gen_rtx_CONST (VOIDmode,
11538 XEXP (XEXP (disp, 0), 0));
11542 output_pic_addr_const (file, disp, 0);
11543 else if (GET_CODE (disp) == LABEL_REF)
11544 output_asm_label (disp);
11545 else if (CONST_INT_P (disp))
11548 output_addr_const (file, disp);
11554 print_reg (base, 0, file);
11557 if (INTVAL (offset) >= 0)
11559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11570 print_reg (index, 0, file);
11572 fprintf (file, "*%d", scale);
11580 output_addr_const_extra (FILE *file, rtx x)
11584 if (GET_CODE (x) != UNSPEC)
11587 op = XVECEXP (x, 0, 0);
11588 switch (XINT (x, 1))
11590 case UNSPEC_GOTTPOFF:
11591 output_addr_const (file, op);
11592 /* FIXME: This might be @TPOFF in Sun ld. */
11593 fputs ("@GOTTPOFF", file);
11596 output_addr_const (file, op);
11597 fputs ("@TPOFF", file);
11599 case UNSPEC_NTPOFF:
11600 output_addr_const (file, op);
11602 fputs ("@TPOFF", file);
11604 fputs ("@NTPOFF", file);
11606 case UNSPEC_DTPOFF:
11607 output_addr_const (file, op);
11608 fputs ("@DTPOFF", file);
11610 case UNSPEC_GOTNTPOFF:
11611 output_addr_const (file, op);
11613 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11614 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11616 fputs ("@GOTNTPOFF", file);
11618 case UNSPEC_INDNTPOFF:
11619 output_addr_const (file, op);
11620 fputs ("@INDNTPOFF", file);
11623 case UNSPEC_MACHOPIC_OFFSET:
11624 output_addr_const (file, op);
11626 machopic_output_function_base_name (file);
11637 /* Split one or more DImode RTL references into pairs of SImode
11638 references. The RTL can be REG, offsettable MEM, integer constant, or
11639 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11640 split and "num" is its length. lo_half and hi_half are output arrays
11641 that parallel "operands". */
11644 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11648 rtx op = operands[num];
11650 /* simplify_subreg refuse to split volatile memory addresses,
11651 but we still have to handle it. */
11654 lo_half[num] = adjust_address (op, SImode, 0);
11655 hi_half[num] = adjust_address (op, SImode, 4);
11659 lo_half[num] = simplify_gen_subreg (SImode, op,
11660 GET_MODE (op) == VOIDmode
11661 ? DImode : GET_MODE (op), 0);
11662 hi_half[num] = simplify_gen_subreg (SImode, op,
11663 GET_MODE (op) == VOIDmode
11664 ? DImode : GET_MODE (op), 4);
11668 /* Split one or more TImode RTL references into pairs of DImode
11669 references. The RTL can be REG, offsettable MEM, integer constant, or
11670 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11671 split and "num" is its length. lo_half and hi_half are output arrays
11672 that parallel "operands". */
11675 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11679 rtx op = operands[num];
11681 /* simplify_subreg refuse to split volatile memory addresses, but we
11682 still have to handle it. */
11685 lo_half[num] = adjust_address (op, DImode, 0);
11686 hi_half[num] = adjust_address (op, DImode, 8);
11690 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11691 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11696 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11697 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11698 is the expression of the binary operation. The output may either be
11699 emitted here, or returned to the caller, like all output_* functions.
11701 There is no guarantee that the operands are the same mode, as they
11702 might be within FLOAT or FLOAT_EXTEND expressions. */
11704 #ifndef SYSV386_COMPAT
11705 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11706 wants to fix the assemblers because that causes incompatibility
11707 with gcc. No-one wants to fix gcc because that causes
11708 incompatibility with assemblers... You can use the option of
11709 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11710 #define SYSV386_COMPAT 1
11714 output_387_binary_op (rtx insn, rtx *operands)
11716 static char buf[40];
11719 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11721 #ifdef ENABLE_CHECKING
11722 /* Even if we do not want to check the inputs, this documents input
11723 constraints. Which helps in understanding the following code. */
11724 if (STACK_REG_P (operands[0])
11725 && ((REG_P (operands[1])
11726 && REGNO (operands[0]) == REGNO (operands[1])
11727 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11728 || (REG_P (operands[2])
11729 && REGNO (operands[0]) == REGNO (operands[2])
11730 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11731 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11734 gcc_assert (is_sse);
11737 switch (GET_CODE (operands[3]))
11740 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11741 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11749 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11750 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11758 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11759 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11767 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11768 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11776 gcc_unreachable ();
11783 strcpy (buf, ssep);
11784 if (GET_MODE (operands[0]) == SFmode)
11785 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11787 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11791 strcpy (buf, ssep + 1);
11792 if (GET_MODE (operands[0]) == SFmode)
11793 strcat (buf, "ss\t{%2, %0|%0, %2}");
11795 strcat (buf, "sd\t{%2, %0|%0, %2}");
11801 switch (GET_CODE (operands[3]))
11805 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11807 rtx temp = operands[2];
11808 operands[2] = operands[1];
11809 operands[1] = temp;
11812 /* know operands[0] == operands[1]. */
11814 if (MEM_P (operands[2]))
11820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11822 if (STACK_TOP_P (operands[0]))
11823 /* How is it that we are storing to a dead operand[2]?
11824 Well, presumably operands[1] is dead too. We can't
11825 store the result to st(0) as st(0) gets popped on this
11826 instruction. Instead store to operands[2] (which I
11827 think has to be st(1)). st(1) will be popped later.
11828 gcc <= 2.8.1 didn't have this check and generated
11829 assembly code that the Unixware assembler rejected. */
11830 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11832 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11836 if (STACK_TOP_P (operands[0]))
11837 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11839 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11844 if (MEM_P (operands[1]))
11850 if (MEM_P (operands[2]))
11856 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11859 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11860 derived assemblers, confusingly reverse the direction of
11861 the operation for fsub{r} and fdiv{r} when the
11862 destination register is not st(0). The Intel assembler
11863 doesn't have this brain damage. Read !SYSV386_COMPAT to
11864 figure out what the hardware really does. */
11865 if (STACK_TOP_P (operands[0]))
11866 p = "{p\t%0, %2|rp\t%2, %0}";
11868 p = "{rp\t%2, %0|p\t%0, %2}";
11870 if (STACK_TOP_P (operands[0]))
11871 /* As above for fmul/fadd, we can't store to st(0). */
11872 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11874 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11879 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11882 if (STACK_TOP_P (operands[0]))
11883 p = "{rp\t%0, %1|p\t%1, %0}";
11885 p = "{p\t%1, %0|rp\t%0, %1}";
11887 if (STACK_TOP_P (operands[0]))
11888 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11890 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11895 if (STACK_TOP_P (operands[0]))
11897 if (STACK_TOP_P (operands[1]))
11898 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11900 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11903 else if (STACK_TOP_P (operands[1]))
11906 p = "{\t%1, %0|r\t%0, %1}";
11908 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11914 p = "{r\t%2, %0|\t%0, %2}";
11916 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11922 gcc_unreachable ();
11929 /* Return needed mode for entity in optimize_mode_switching pass. */
11932 ix86_mode_needed (int entity, rtx insn)
11934 enum attr_i387_cw mode;
11936 /* The mode UNINITIALIZED is used to store control word after a
11937 function call or ASM pattern. The mode ANY specify that function
11938 has no requirements on the control word and make no changes in the
11939 bits we are interested in. */
11942 || (NONJUMP_INSN_P (insn)
11943 && (asm_noperands (PATTERN (insn)) >= 0
11944 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11945 return I387_CW_UNINITIALIZED;
11947 if (recog_memoized (insn) < 0)
11948 return I387_CW_ANY;
11950 mode = get_attr_i387_cw (insn);
11955 if (mode == I387_CW_TRUNC)
11960 if (mode == I387_CW_FLOOR)
11965 if (mode == I387_CW_CEIL)
11970 if (mode == I387_CW_MASK_PM)
11975 gcc_unreachable ();
11978 return I387_CW_ANY;
11981 /* Output code to initialize control word copies used by trunc?f?i and
11982 rounding patterns. CURRENT_MODE is set to current control word,
11983 while NEW_MODE is set to new control word. */
11986 emit_i387_cw_initialization (int mode)
11988 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11991 enum ix86_stack_slot slot;
11993 rtx reg = gen_reg_rtx (HImode);
11995 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11996 emit_move_insn (reg, copy_rtx (stored_mode));
11998 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11999 || optimize_function_for_size_p (cfun))
12003 case I387_CW_TRUNC:
12004 /* round toward zero (truncate) */
12005 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12006 slot = SLOT_CW_TRUNC;
12009 case I387_CW_FLOOR:
12010 /* round down toward -oo */
12011 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12012 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12013 slot = SLOT_CW_FLOOR;
12017 /* round up toward +oo */
12018 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12019 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12020 slot = SLOT_CW_CEIL;
12023 case I387_CW_MASK_PM:
12024 /* mask precision exception for nearbyint() */
12025 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12026 slot = SLOT_CW_MASK_PM;
12030 gcc_unreachable ();
12037 case I387_CW_TRUNC:
12038 /* round toward zero (truncate) */
12039 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12040 slot = SLOT_CW_TRUNC;
12043 case I387_CW_FLOOR:
12044 /* round down toward -oo */
12045 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12046 slot = SLOT_CW_FLOOR;
12050 /* round up toward +oo */
12051 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12052 slot = SLOT_CW_CEIL;
12055 case I387_CW_MASK_PM:
12056 /* mask precision exception for nearbyint() */
12057 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12058 slot = SLOT_CW_MASK_PM;
12062 gcc_unreachable ();
12066 gcc_assert (slot < MAX_386_STACK_LOCALS);
12068 new_mode = assign_386_stack_local (HImode, slot);
12069 emit_move_insn (new_mode, reg);
12072 /* Output code for INSN to convert a float to a signed int. OPERANDS
12073 are the insn operands. The output may be [HSD]Imode and the input
12074 operand may be [SDX]Fmode. */
12077 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12079 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12080 int dimode_p = GET_MODE (operands[0]) == DImode;
12081 int round_mode = get_attr_i387_cw (insn);
12083 /* Jump through a hoop or two for DImode, since the hardware has no
12084 non-popping instruction. We used to do this a different way, but
12085 that was somewhat fragile and broke with post-reload splitters. */
12086 if ((dimode_p || fisttp) && !stack_top_dies)
12087 output_asm_insn ("fld\t%y1", operands);
12089 gcc_assert (STACK_TOP_P (operands[1]));
12090 gcc_assert (MEM_P (operands[0]));
12091 gcc_assert (GET_MODE (operands[1]) != TFmode);
12094 output_asm_insn ("fisttp%z0\t%0", operands);
12097 if (round_mode != I387_CW_ANY)
12098 output_asm_insn ("fldcw\t%3", operands);
12099 if (stack_top_dies || dimode_p)
12100 output_asm_insn ("fistp%z0\t%0", operands);
12102 output_asm_insn ("fist%z0\t%0", operands);
12103 if (round_mode != I387_CW_ANY)
12104 output_asm_insn ("fldcw\t%2", operands);
12110 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12111 have the values zero or one, indicates the ffreep insn's operand
12112 from the OPERANDS array. */
12114 static const char *
12115 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12117 if (TARGET_USE_FFREEP)
12118 #if HAVE_AS_IX86_FFREEP
12119 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12122 static char retval[] = ".word\t0xc_df";
12123 int regno = REGNO (operands[opno]);
12125 gcc_assert (FP_REGNO_P (regno));
12127 retval[9] = '0' + (regno - FIRST_STACK_REG);
12132 return opno ? "fstp\t%y1" : "fstp\t%y0";
12136 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12137 should be used. UNORDERED_P is true when fucom should be used. */
12140 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12142 int stack_top_dies;
12143 rtx cmp_op0, cmp_op1;
12144 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12148 cmp_op0 = operands[0];
12149 cmp_op1 = operands[1];
12153 cmp_op0 = operands[1];
12154 cmp_op1 = operands[2];
12159 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12160 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12161 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12162 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12164 if (GET_MODE (operands[0]) == SFmode)
12166 return &ucomiss[TARGET_AVX ? 0 : 1];
12168 return &comiss[TARGET_AVX ? 0 : 1];
12171 return &ucomisd[TARGET_AVX ? 0 : 1];
12173 return &comisd[TARGET_AVX ? 0 : 1];
12176 gcc_assert (STACK_TOP_P (cmp_op0));
12178 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12180 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12182 if (stack_top_dies)
12184 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12185 return output_387_ffreep (operands, 1);
12188 return "ftst\n\tfnstsw\t%0";
12191 if (STACK_REG_P (cmp_op1)
12193 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12194 && REGNO (cmp_op1) != FIRST_STACK_REG)
12196 /* If both the top of the 387 stack dies, and the other operand
12197 is also a stack register that dies, then this must be a
12198 `fcompp' float compare */
12202 /* There is no double popping fcomi variant. Fortunately,
12203 eflags is immune from the fstp's cc clobbering. */
12205 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12207 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12208 return output_387_ffreep (operands, 0);
12213 return "fucompp\n\tfnstsw\t%0";
12215 return "fcompp\n\tfnstsw\t%0";
12220 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12222 static const char * const alt[16] =
12224 "fcom%z2\t%y2\n\tfnstsw\t%0",
12225 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12226 "fucom%z2\t%y2\n\tfnstsw\t%0",
12227 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12229 "ficom%z2\t%y2\n\tfnstsw\t%0",
12230 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12234 "fcomi\t{%y1, %0|%0, %y1}",
12235 "fcomip\t{%y1, %0|%0, %y1}",
12236 "fucomi\t{%y1, %0|%0, %y1}",
12237 "fucomip\t{%y1, %0|%0, %y1}",
12248 mask = eflags_p << 3;
12249 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12250 mask |= unordered_p << 1;
12251 mask |= stack_top_dies;
12253 gcc_assert (mask < 16);
12262 ix86_output_addr_vec_elt (FILE *file, int value)
12264 const char *directive = ASM_LONG;
12268 directive = ASM_QUAD;
12270 gcc_assert (!TARGET_64BIT);
12273 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12277 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12279 const char *directive = ASM_LONG;
12282 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12283 directive = ASM_QUAD;
12285 gcc_assert (!TARGET_64BIT);
12287 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12288 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12289 fprintf (file, "%s%s%d-%s%d\n",
12290 directive, LPREFIX, value, LPREFIX, rel);
12291 else if (HAVE_AS_GOTOFF_IN_DATA)
12292 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12294 else if (TARGET_MACHO)
12296 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12297 machopic_output_function_base_name (file);
12298 fprintf(file, "\n");
12302 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12303 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12306 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12310 ix86_expand_clear (rtx dest)
12314 /* We play register width games, which are only valid after reload. */
12315 gcc_assert (reload_completed);
12317 /* Avoid HImode and its attendant prefix byte. */
12318 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12319 dest = gen_rtx_REG (SImode, REGNO (dest));
12320 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12322 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12323 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12325 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12326 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12332 /* X is an unchanging MEM. If it is a constant pool reference, return
12333 the constant pool rtx, else NULL. */
12336 maybe_get_pool_constant (rtx x)
12338 x = ix86_delegitimize_address (XEXP (x, 0));
12340 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12341 return get_pool_constant (x);
12347 ix86_expand_move (enum machine_mode mode, rtx operands[])
12350 enum tls_model model;
12355 if (GET_CODE (op1) == SYMBOL_REF)
12357 model = SYMBOL_REF_TLS_MODEL (op1);
12360 op1 = legitimize_tls_address (op1, model, true);
12361 op1 = force_operand (op1, op0);
12365 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12366 && SYMBOL_REF_DLLIMPORT_P (op1))
12367 op1 = legitimize_dllimport_symbol (op1, false);
12369 else if (GET_CODE (op1) == CONST
12370 && GET_CODE (XEXP (op1, 0)) == PLUS
12371 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12373 rtx addend = XEXP (XEXP (op1, 0), 1);
12374 rtx symbol = XEXP (XEXP (op1, 0), 0);
12377 model = SYMBOL_REF_TLS_MODEL (symbol);
12379 tmp = legitimize_tls_address (symbol, model, true);
12380 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12381 && SYMBOL_REF_DLLIMPORT_P (symbol))
12382 tmp = legitimize_dllimport_symbol (symbol, true);
12386 tmp = force_operand (tmp, NULL);
12387 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12388 op0, 1, OPTAB_DIRECT);
12394 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12396 if (TARGET_MACHO && !TARGET_64BIT)
12401 rtx temp = ((reload_in_progress
12402 || ((op0 && REG_P (op0))
12404 ? op0 : gen_reg_rtx (Pmode));
12405 op1 = machopic_indirect_data_reference (op1, temp);
12406 op1 = machopic_legitimize_pic_address (op1, mode,
12407 temp == op1 ? 0 : temp);
12409 else if (MACHOPIC_INDIRECT)
12410 op1 = machopic_indirect_data_reference (op1, 0);
12418 op1 = force_reg (Pmode, op1);
12419 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12421 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12422 op1 = legitimize_pic_address (op1, reg);
12431 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12432 || !push_operand (op0, mode))
12434 op1 = force_reg (mode, op1);
12436 if (push_operand (op0, mode)
12437 && ! general_no_elim_operand (op1, mode))
12438 op1 = copy_to_mode_reg (mode, op1);
12440 /* Force large constants in 64bit compilation into register
12441 to get them CSEed. */
12442 if (can_create_pseudo_p ()
12443 && (mode == DImode) && TARGET_64BIT
12444 && immediate_operand (op1, mode)
12445 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12446 && !register_operand (op0, mode)
12448 op1 = copy_to_mode_reg (mode, op1);
12450 if (can_create_pseudo_p ()
12451 && FLOAT_MODE_P (mode)
12452 && GET_CODE (op1) == CONST_DOUBLE)
12454 /* If we are loading a floating point constant to a register,
12455 force the value to memory now, since we'll get better code
12456 out the back end. */
12458 op1 = validize_mem (force_const_mem (mode, op1));
12459 if (!register_operand (op0, mode))
12461 rtx temp = gen_reg_rtx (mode);
12462 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12463 emit_move_insn (op0, temp);
12469 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12473 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12475 rtx op0 = operands[0], op1 = operands[1];
12476 unsigned int align = GET_MODE_ALIGNMENT (mode);
12478 /* Force constants other than zero into memory. We do not know how
12479 the instructions used to build constants modify the upper 64 bits
12480 of the register, once we have that information we may be able
12481 to handle some of them more efficiently. */
12482 if (can_create_pseudo_p ()
12483 && register_operand (op0, mode)
12484 && (CONSTANT_P (op1)
12485 || (GET_CODE (op1) == SUBREG
12486 && CONSTANT_P (SUBREG_REG (op1))))
12487 && standard_sse_constant_p (op1) <= 0)
12488 op1 = validize_mem (force_const_mem (mode, op1));
12490 /* We need to check memory alignment for SSE mode since attribute
12491 can make operands unaligned. */
12492 if (can_create_pseudo_p ()
12493 && SSE_REG_MODE_P (mode)
12494 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12495 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12499 /* ix86_expand_vector_move_misalign() does not like constants ... */
12500 if (CONSTANT_P (op1)
12501 || (GET_CODE (op1) == SUBREG
12502 && CONSTANT_P (SUBREG_REG (op1))))
12503 op1 = validize_mem (force_const_mem (mode, op1));
12505 /* ... nor both arguments in memory. */
12506 if (!register_operand (op0, mode)
12507 && !register_operand (op1, mode))
12508 op1 = force_reg (mode, op1);
12510 tmp[0] = op0; tmp[1] = op1;
12511 ix86_expand_vector_move_misalign (mode, tmp);
12515 /* Make operand1 a register if it isn't already. */
12516 if (can_create_pseudo_p ()
12517 && !register_operand (op0, mode)
12518 && !register_operand (op1, mode))
12520 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12524 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12527 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12528 straight to ix86_expand_vector_move. */
12529 /* Code generation for scalar reg-reg moves of single and double precision data:
12530 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12534 if (x86_sse_partial_reg_dependency == true)
12539 Code generation for scalar loads of double precision data:
12540 if (x86_sse_split_regs == true)
12541 movlpd mem, reg (gas syntax)
12545 Code generation for unaligned packed loads of single precision data
12546 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12547 if (x86_sse_unaligned_move_optimal)
12550 if (x86_sse_partial_reg_dependency == true)
12562 Code generation for unaligned packed loads of double precision data
12563 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12564 if (x86_sse_unaligned_move_optimal)
12567 if (x86_sse_split_regs == true)
12580 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12589 switch (GET_MODE_CLASS (mode))
12591 case MODE_VECTOR_INT:
12593 switch (GET_MODE_SIZE (mode))
12596 op0 = gen_lowpart (V16QImode, op0);
12597 op1 = gen_lowpart (V16QImode, op1);
12598 emit_insn (gen_avx_movdqu (op0, op1));
12601 op0 = gen_lowpart (V32QImode, op0);
12602 op1 = gen_lowpart (V32QImode, op1);
12603 emit_insn (gen_avx_movdqu256 (op0, op1));
12606 gcc_unreachable ();
12609 case MODE_VECTOR_FLOAT:
12610 op0 = gen_lowpart (mode, op0);
12611 op1 = gen_lowpart (mode, op1);
12616 emit_insn (gen_avx_movups (op0, op1));
12619 emit_insn (gen_avx_movups256 (op0, op1));
12622 emit_insn (gen_avx_movupd (op0, op1));
12625 emit_insn (gen_avx_movupd256 (op0, op1));
12628 gcc_unreachable ();
12633 gcc_unreachable ();
12641 /* If we're optimizing for size, movups is the smallest. */
12642 if (optimize_insn_for_size_p ())
12644 op0 = gen_lowpart (V4SFmode, op0);
12645 op1 = gen_lowpart (V4SFmode, op1);
12646 emit_insn (gen_sse_movups (op0, op1));
12650 /* ??? If we have typed data, then it would appear that using
12651 movdqu is the only way to get unaligned data loaded with
12653 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12655 op0 = gen_lowpart (V16QImode, op0);
12656 op1 = gen_lowpart (V16QImode, op1);
12657 emit_insn (gen_sse2_movdqu (op0, op1));
12661 if (TARGET_SSE2 && mode == V2DFmode)
12665 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12667 op0 = gen_lowpart (V2DFmode, op0);
12668 op1 = gen_lowpart (V2DFmode, op1);
12669 emit_insn (gen_sse2_movupd (op0, op1));
12673 /* When SSE registers are split into halves, we can avoid
12674 writing to the top half twice. */
12675 if (TARGET_SSE_SPLIT_REGS)
12677 emit_clobber (op0);
12682 /* ??? Not sure about the best option for the Intel chips.
12683 The following would seem to satisfy; the register is
12684 entirely cleared, breaking the dependency chain. We
12685 then store to the upper half, with a dependency depth
12686 of one. A rumor has it that Intel recommends two movsd
12687 followed by an unpacklpd, but this is unconfirmed. And
12688 given that the dependency depth of the unpacklpd would
12689 still be one, I'm not sure why this would be better. */
12690 zero = CONST0_RTX (V2DFmode);
12693 m = adjust_address (op1, DFmode, 0);
12694 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12695 m = adjust_address (op1, DFmode, 8);
12696 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12700 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12702 op0 = gen_lowpart (V4SFmode, op0);
12703 op1 = gen_lowpart (V4SFmode, op1);
12704 emit_insn (gen_sse_movups (op0, op1));
12708 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12709 emit_move_insn (op0, CONST0_RTX (mode));
12711 emit_clobber (op0);
12713 if (mode != V4SFmode)
12714 op0 = gen_lowpart (V4SFmode, op0);
12715 m = adjust_address (op1, V2SFmode, 0);
12716 emit_insn (gen_sse_loadlps (op0, op0, m));
12717 m = adjust_address (op1, V2SFmode, 8);
12718 emit_insn (gen_sse_loadhps (op0, op0, m));
12721 else if (MEM_P (op0))
12723 /* If we're optimizing for size, movups is the smallest. */
12724 if (optimize_insn_for_size_p ())
12726 op0 = gen_lowpart (V4SFmode, op0);
12727 op1 = gen_lowpart (V4SFmode, op1);
12728 emit_insn (gen_sse_movups (op0, op1));
12732 /* ??? Similar to above, only less clear because of quote
12733 typeless stores unquote. */
12734 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12735 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12737 op0 = gen_lowpart (V16QImode, op0);
12738 op1 = gen_lowpart (V16QImode, op1);
12739 emit_insn (gen_sse2_movdqu (op0, op1));
12743 if (TARGET_SSE2 && mode == V2DFmode)
12745 m = adjust_address (op0, DFmode, 0);
12746 emit_insn (gen_sse2_storelpd (m, op1));
12747 m = adjust_address (op0, DFmode, 8);
12748 emit_insn (gen_sse2_storehpd (m, op1));
12752 if (mode != V4SFmode)
12753 op1 = gen_lowpart (V4SFmode, op1);
12754 m = adjust_address (op0, V2SFmode, 0);
12755 emit_insn (gen_sse_storelps (m, op1));
12756 m = adjust_address (op0, V2SFmode, 8);
12757 emit_insn (gen_sse_storehps (m, op1));
12761 gcc_unreachable ();
12764 /* Expand a push in MODE. This is some mode for which we do not support
12765 proper push instructions, at least from the registers that we expect
12766 the value to live in. */
12769 ix86_expand_push (enum machine_mode mode, rtx x)
12773 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12774 GEN_INT (-GET_MODE_SIZE (mode)),
12775 stack_pointer_rtx, 1, OPTAB_DIRECT);
12776 if (tmp != stack_pointer_rtx)
12777 emit_move_insn (stack_pointer_rtx, tmp);
12779 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12781 /* When we push an operand onto stack, it has to be aligned at least
12782 at the function argument boundary. However since we don't have
12783 the argument type, we can't determine the actual argument
12785 emit_move_insn (tmp, x);
12788 /* Helper function of ix86_fixup_binary_operands to canonicalize
12789 operand order. Returns true if the operands should be swapped. */
12792 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12795 rtx dst = operands[0];
12796 rtx src1 = operands[1];
12797 rtx src2 = operands[2];
12799 /* If the operation is not commutative, we can't do anything. */
12800 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12803 /* Highest priority is that src1 should match dst. */
12804 if (rtx_equal_p (dst, src1))
12806 if (rtx_equal_p (dst, src2))
12809 /* Next highest priority is that immediate constants come second. */
12810 if (immediate_operand (src2, mode))
12812 if (immediate_operand (src1, mode))
12815 /* Lowest priority is that memory references should come second. */
12825 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12826 destination to use for the operation. If different from the true
12827 destination in operands[0], a copy operation will be required. */
12830 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12833 rtx dst = operands[0];
12834 rtx src1 = operands[1];
12835 rtx src2 = operands[2];
12837 /* Canonicalize operand order. */
12838 if (ix86_swap_binary_operands_p (code, mode, operands))
12842 /* It is invalid to swap operands of different modes. */
12843 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12850 /* Both source operands cannot be in memory. */
12851 if (MEM_P (src1) && MEM_P (src2))
12853 /* Optimization: Only read from memory once. */
12854 if (rtx_equal_p (src1, src2))
12856 src2 = force_reg (mode, src2);
12860 src2 = force_reg (mode, src2);
12863 /* If the destination is memory, and we do not have matching source
12864 operands, do things in registers. */
12865 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12866 dst = gen_reg_rtx (mode);
12868 /* Source 1 cannot be a constant. */
12869 if (CONSTANT_P (src1))
12870 src1 = force_reg (mode, src1);
12872 /* Source 1 cannot be a non-matching memory. */
12873 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12874 src1 = force_reg (mode, src1);
12876 operands[1] = src1;
12877 operands[2] = src2;
12881 /* Similarly, but assume that the destination has already been
12882 set up properly. */
12885 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12886 enum machine_mode mode, rtx operands[])
12888 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12889 gcc_assert (dst == operands[0]);
12892 /* Attempt to expand a binary operator. Make the expansion closer to the
12893 actual machine, then just general_operand, which will allow 3 separate
12894 memory references (one output, two input) in a single insn. */
12897 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12900 rtx src1, src2, dst, op, clob;
12902 dst = ix86_fixup_binary_operands (code, mode, operands);
12903 src1 = operands[1];
12904 src2 = operands[2];
12906 /* Emit the instruction. */
12908 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12909 if (reload_in_progress)
12911 /* Reload doesn't know about the flags register, and doesn't know that
12912 it doesn't want to clobber it. We can only do this with PLUS. */
12913 gcc_assert (code == PLUS);
12918 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12919 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12922 /* Fix up the destination if needed. */
12923 if (dst != operands[0])
12924 emit_move_insn (operands[0], dst);
12927 /* Return TRUE or FALSE depending on whether the binary operator meets the
12928 appropriate constraints. */
12931 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12934 rtx dst = operands[0];
12935 rtx src1 = operands[1];
12936 rtx src2 = operands[2];
12938 /* Both source operands cannot be in memory. */
12939 if (MEM_P (src1) && MEM_P (src2))
12942 /* Canonicalize operand order for commutative operators. */
12943 if (ix86_swap_binary_operands_p (code, mode, operands))
12950 /* If the destination is memory, we must have a matching source operand. */
12951 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12954 /* Source 1 cannot be a constant. */
12955 if (CONSTANT_P (src1))
12958 /* Source 1 cannot be a non-matching memory. */
12959 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12965 /* Attempt to expand a unary operator. Make the expansion closer to the
12966 actual machine, then just general_operand, which will allow 2 separate
12967 memory references (one output, one input) in a single insn. */
12970 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12973 int matching_memory;
12974 rtx src, dst, op, clob;
12979 /* If the destination is memory, and we do not have matching source
12980 operands, do things in registers. */
12981 matching_memory = 0;
12984 if (rtx_equal_p (dst, src))
12985 matching_memory = 1;
12987 dst = gen_reg_rtx (mode);
12990 /* When source operand is memory, destination must match. */
12991 if (MEM_P (src) && !matching_memory)
12992 src = force_reg (mode, src);
12994 /* Emit the instruction. */
12996 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12997 if (reload_in_progress || code == NOT)
12999 /* Reload doesn't know about the flags register, and doesn't know that
13000 it doesn't want to clobber it. */
13001 gcc_assert (code == NOT);
13006 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13010 /* Fix up the destination if needed. */
13011 if (dst != operands[0])
13012 emit_move_insn (operands[0], dst);
13015 /* Return TRUE or FALSE depending on whether the unary operator meets the
13016 appropriate constraints. */
13019 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13020 enum machine_mode mode ATTRIBUTE_UNUSED,
13021 rtx operands[2] ATTRIBUTE_UNUSED)
13023 /* If one of operands is memory, source and destination must match. */
13024 if ((MEM_P (operands[0])
13025 || MEM_P (operands[1]))
13026 && ! rtx_equal_p (operands[0], operands[1]))
13031 /* Post-reload splitter for converting an SF or DFmode value in an
13032 SSE register into an unsigned SImode. */
13035 ix86_split_convert_uns_si_sse (rtx operands[])
13037 enum machine_mode vecmode;
13038 rtx value, large, zero_or_two31, input, two31, x;
13040 large = operands[1];
13041 zero_or_two31 = operands[2];
13042 input = operands[3];
13043 two31 = operands[4];
13044 vecmode = GET_MODE (large);
13045 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13047 /* Load up the value into the low element. We must ensure that the other
13048 elements are valid floats -- zero is the easiest such value. */
13051 if (vecmode == V4SFmode)
13052 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13054 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13058 input = gen_rtx_REG (vecmode, REGNO (input));
13059 emit_move_insn (value, CONST0_RTX (vecmode));
13060 if (vecmode == V4SFmode)
13061 emit_insn (gen_sse_movss (value, value, input));
13063 emit_insn (gen_sse2_movsd (value, value, input));
13066 emit_move_insn (large, two31);
13067 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13069 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13070 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13072 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13073 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13075 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13076 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13078 large = gen_rtx_REG (V4SImode, REGNO (large));
13079 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13081 x = gen_rtx_REG (V4SImode, REGNO (value));
13082 if (vecmode == V4SFmode)
13083 emit_insn (gen_sse2_cvttps2dq (x, value));
13085 emit_insn (gen_sse2_cvttpd2dq (x, value));
13088 emit_insn (gen_xorv4si3 (value, value, large));
13091 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13092 Expects the 64-bit DImode to be supplied in a pair of integral
13093 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13094 -mfpmath=sse, !optimize_size only. */
13097 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13099 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13100 rtx int_xmm, fp_xmm;
13101 rtx biases, exponents;
13104 int_xmm = gen_reg_rtx (V4SImode);
13105 if (TARGET_INTER_UNIT_MOVES)
13106 emit_insn (gen_movdi_to_sse (int_xmm, input));
13107 else if (TARGET_SSE_SPLIT_REGS)
13109 emit_clobber (int_xmm);
13110 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13114 x = gen_reg_rtx (V2DImode);
13115 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13116 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13119 x = gen_rtx_CONST_VECTOR (V4SImode,
13120 gen_rtvec (4, GEN_INT (0x43300000UL),
13121 GEN_INT (0x45300000UL),
13122 const0_rtx, const0_rtx));
13123 exponents = validize_mem (force_const_mem (V4SImode, x));
13125 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13126 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13128 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13129 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13130 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13131 (0x1.0p84 + double(fp_value_hi_xmm)).
13132 Note these exponents differ by 32. */
13134 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13136 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13137 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13138 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13139 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13140 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13141 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13142 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13143 biases = validize_mem (force_const_mem (V2DFmode, biases));
13144 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13146 /* Add the upper and lower DFmode values together. */
13148 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13151 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13152 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13153 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13156 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13159 /* Not used, but eases macroization of patterns. */
13161 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13162 rtx input ATTRIBUTE_UNUSED)
13164 gcc_unreachable ();
13167 /* Convert an unsigned SImode value into a DFmode. Only currently used
13168 for SSE, but applicable anywhere. */
13171 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13173 REAL_VALUE_TYPE TWO31r;
13176 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13177 NULL, 1, OPTAB_DIRECT);
13179 fp = gen_reg_rtx (DFmode);
13180 emit_insn (gen_floatsidf2 (fp, x));
13182 real_ldexp (&TWO31r, &dconst1, 31);
13183 x = const_double_from_real_value (TWO31r, DFmode);
13185 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13187 emit_move_insn (target, x);
13190 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13191 32-bit mode; otherwise we have a direct convert instruction. */
13194 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13196 REAL_VALUE_TYPE TWO32r;
13197 rtx fp_lo, fp_hi, x;
13199 fp_lo = gen_reg_rtx (DFmode);
13200 fp_hi = gen_reg_rtx (DFmode);
13202 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13204 real_ldexp (&TWO32r, &dconst1, 32);
13205 x = const_double_from_real_value (TWO32r, DFmode);
13206 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13208 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13210 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13213 emit_move_insn (target, x);
13216 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13217 For x86_32, -mfpmath=sse, !optimize_size only. */
13219 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13221 REAL_VALUE_TYPE ONE16r;
13222 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13224 real_ldexp (&ONE16r, &dconst1, 16);
13225 x = const_double_from_real_value (ONE16r, SFmode);
13226 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13227 NULL, 0, OPTAB_DIRECT);
13228 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13229 NULL, 0, OPTAB_DIRECT);
13230 fp_hi = gen_reg_rtx (SFmode);
13231 fp_lo = gen_reg_rtx (SFmode);
13232 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13233 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13234 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13236 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13238 if (!rtx_equal_p (target, fp_hi))
13239 emit_move_insn (target, fp_hi);
13242 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13243 then replicate the value for all elements of the vector
13247 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13254 v = gen_rtvec (4, value, value, value, value);
13255 return gen_rtx_CONST_VECTOR (V4SImode, v);
13259 v = gen_rtvec (2, value, value);
13260 return gen_rtx_CONST_VECTOR (V2DImode, v);
13264 v = gen_rtvec (4, value, value, value, value);
13266 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13267 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13268 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13272 v = gen_rtvec (2, value, value);
13274 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13275 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13278 gcc_unreachable ();
13282 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13283 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13284 for an SSE register. If VECT is true, then replicate the mask for
13285 all elements of the vector register. If INVERT is true, then create
13286 a mask excluding the sign bit. */
13289 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13291 enum machine_mode vec_mode, imode;
13292 HOST_WIDE_INT hi, lo;
13297 /* Find the sign bit, sign extended to 2*HWI. */
13303 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13304 lo = 0x80000000, hi = lo < 0;
13310 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13311 if (HOST_BITS_PER_WIDE_INT >= 64)
13312 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13314 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13319 vec_mode = VOIDmode;
13320 if (HOST_BITS_PER_WIDE_INT >= 64)
13323 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13330 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13334 lo = ~lo, hi = ~hi;
13340 mask = immed_double_const (lo, hi, imode);
13342 vec = gen_rtvec (2, v, mask);
13343 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13344 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13351 gcc_unreachable ();
13355 lo = ~lo, hi = ~hi;
13357 /* Force this value into the low part of a fp vector constant. */
13358 mask = immed_double_const (lo, hi, imode);
13359 mask = gen_lowpart (mode, mask);
13361 if (vec_mode == VOIDmode)
13362 return force_reg (mode, mask);
13364 v = ix86_build_const_vector (mode, vect, mask);
13365 return force_reg (vec_mode, v);
13368 /* Generate code for floating point ABS or NEG. */
13371 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13374 rtx mask, set, use, clob, dst, src;
13375 bool use_sse = false;
13376 bool vector_mode = VECTOR_MODE_P (mode);
13377 enum machine_mode elt_mode = mode;
13381 elt_mode = GET_MODE_INNER (mode);
13384 else if (mode == TFmode)
13386 else if (TARGET_SSE_MATH)
13387 use_sse = SSE_FLOAT_MODE_P (mode);
13389 /* NEG and ABS performed with SSE use bitwise mask operations.
13390 Create the appropriate mask now. */
13392 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13401 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13402 set = gen_rtx_SET (VOIDmode, dst, set);
13407 set = gen_rtx_fmt_e (code, mode, src);
13408 set = gen_rtx_SET (VOIDmode, dst, set);
13411 use = gen_rtx_USE (VOIDmode, mask);
13412 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13413 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13414 gen_rtvec (3, set, use, clob)));
13421 /* Expand a copysign operation. Special case operand 0 being a constant. */
13424 ix86_expand_copysign (rtx operands[])
13426 enum machine_mode mode;
13427 rtx dest, op0, op1, mask, nmask;
13429 dest = operands[0];
13433 mode = GET_MODE (dest);
13435 if (GET_CODE (op0) == CONST_DOUBLE)
13437 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13439 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13440 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13442 if (mode == SFmode || mode == DFmode)
13444 enum machine_mode vmode;
13446 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13448 if (op0 == CONST0_RTX (mode))
13449 op0 = CONST0_RTX (vmode);
13454 if (mode == SFmode)
13455 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13456 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13458 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13460 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13463 else if (op0 != CONST0_RTX (mode))
13464 op0 = force_reg (mode, op0);
13466 mask = ix86_build_signbit_mask (mode, 0, 0);
13468 if (mode == SFmode)
13469 copysign_insn = gen_copysignsf3_const;
13470 else if (mode == DFmode)
13471 copysign_insn = gen_copysigndf3_const;
13473 copysign_insn = gen_copysigntf3_const;
13475 emit_insn (copysign_insn (dest, op0, op1, mask));
13479 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13481 nmask = ix86_build_signbit_mask (mode, 0, 1);
13482 mask = ix86_build_signbit_mask (mode, 0, 0);
13484 if (mode == SFmode)
13485 copysign_insn = gen_copysignsf3_var;
13486 else if (mode == DFmode)
13487 copysign_insn = gen_copysigndf3_var;
13489 copysign_insn = gen_copysigntf3_var;
13491 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13495 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13496 be a constant, and so has already been expanded into a vector constant. */
13499 ix86_split_copysign_const (rtx operands[])
13501 enum machine_mode mode, vmode;
13502 rtx dest, op0, op1, mask, x;
13504 dest = operands[0];
13507 mask = operands[3];
13509 mode = GET_MODE (dest);
13510 vmode = GET_MODE (mask);
13512 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13513 x = gen_rtx_AND (vmode, dest, mask);
13514 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13516 if (op0 != CONST0_RTX (vmode))
13518 x = gen_rtx_IOR (vmode, dest, op0);
13519 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13523 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13524 so we have to do two masks. */
13527 ix86_split_copysign_var (rtx operands[])
13529 enum machine_mode mode, vmode;
13530 rtx dest, scratch, op0, op1, mask, nmask, x;
13532 dest = operands[0];
13533 scratch = operands[1];
13536 nmask = operands[4];
13537 mask = operands[5];
13539 mode = GET_MODE (dest);
13540 vmode = GET_MODE (mask);
13542 if (rtx_equal_p (op0, op1))
13544 /* Shouldn't happen often (it's useless, obviously), but when it does
13545 we'd generate incorrect code if we continue below. */
13546 emit_move_insn (dest, op0);
13550 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13552 gcc_assert (REGNO (op1) == REGNO (scratch));
13554 x = gen_rtx_AND (vmode, scratch, mask);
13555 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13558 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13559 x = gen_rtx_NOT (vmode, dest);
13560 x = gen_rtx_AND (vmode, x, op0);
13561 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13565 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13567 x = gen_rtx_AND (vmode, scratch, mask);
13569 else /* alternative 2,4 */
13571 gcc_assert (REGNO (mask) == REGNO (scratch));
13572 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13573 x = gen_rtx_AND (vmode, scratch, op1);
13575 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13577 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13579 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13580 x = gen_rtx_AND (vmode, dest, nmask);
13582 else /* alternative 3,4 */
13584 gcc_assert (REGNO (nmask) == REGNO (dest));
13586 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13587 x = gen_rtx_AND (vmode, dest, op0);
13589 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13592 x = gen_rtx_IOR (vmode, dest, scratch);
13593 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13596 /* Return TRUE or FALSE depending on whether the first SET in INSN
13597 has source and destination with matching CC modes, and that the
13598 CC mode is at least as constrained as REQ_MODE. */
13601 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13604 enum machine_mode set_mode;
13606 set = PATTERN (insn);
13607 if (GET_CODE (set) == PARALLEL)
13608 set = XVECEXP (set, 0, 0);
13609 gcc_assert (GET_CODE (set) == SET);
13610 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13612 set_mode = GET_MODE (SET_DEST (set));
13616 if (req_mode != CCNOmode
13617 && (req_mode != CCmode
13618 || XEXP (SET_SRC (set), 1) != const0_rtx))
13622 if (req_mode == CCGCmode)
13626 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13630 if (req_mode == CCZmode)
13641 gcc_unreachable ();
13644 return (GET_MODE (SET_SRC (set)) == set_mode);
13647 /* Generate insn patterns to do an integer compare of OPERANDS. */
13650 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13652 enum machine_mode cmpmode;
13655 cmpmode = SELECT_CC_MODE (code, op0, op1);
13656 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13658 /* This is very simple, but making the interface the same as in the
13659 FP case makes the rest of the code easier. */
13660 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13661 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13663 /* Return the test that should be put into the flags user, i.e.
13664 the bcc, scc, or cmov instruction. */
13665 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13668 /* Figure out whether to use ordered or unordered fp comparisons.
13669 Return the appropriate mode to use. */
13672 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13674 /* ??? In order to make all comparisons reversible, we do all comparisons
13675 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13676 all forms trapping and nontrapping comparisons, we can make inequality
13677 comparisons trapping again, since it results in better code when using
13678 FCOM based compares. */
13679 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13683 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13685 enum machine_mode mode = GET_MODE (op0);
13687 if (SCALAR_FLOAT_MODE_P (mode))
13689 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13690 return ix86_fp_compare_mode (code);
13695 /* Only zero flag is needed. */
13696 case EQ: /* ZF=0 */
13697 case NE: /* ZF!=0 */
13699 /* Codes needing carry flag. */
13700 case GEU: /* CF=0 */
13701 case LTU: /* CF=1 */
13702 /* Detect overflow checks. They need just the carry flag. */
13703 if (GET_CODE (op0) == PLUS
13704 && rtx_equal_p (op1, XEXP (op0, 0)))
13708 case GTU: /* CF=0 & ZF=0 */
13709 case LEU: /* CF=1 | ZF=1 */
13710 /* Detect overflow checks. They need just the carry flag. */
13711 if (GET_CODE (op0) == MINUS
13712 && rtx_equal_p (op1, XEXP (op0, 0)))
13716 /* Codes possibly doable only with sign flag when
13717 comparing against zero. */
13718 case GE: /* SF=OF or SF=0 */
13719 case LT: /* SF<>OF or SF=1 */
13720 if (op1 == const0_rtx)
13723 /* For other cases Carry flag is not required. */
13725 /* Codes doable only with sign flag when comparing
13726 against zero, but we miss jump instruction for it
13727 so we need to use relational tests against overflow
13728 that thus needs to be zero. */
13729 case GT: /* ZF=0 & SF=OF */
13730 case LE: /* ZF=1 | SF<>OF */
13731 if (op1 == const0_rtx)
13735 /* strcmp pattern do (use flags) and combine may ask us for proper
13740 gcc_unreachable ();
13744 /* Return the fixed registers used for condition codes. */
13747 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13754 /* If two condition code modes are compatible, return a condition code
13755 mode which is compatible with both. Otherwise, return
13758 static enum machine_mode
13759 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13764 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13767 if ((m1 == CCGCmode && m2 == CCGOCmode)
13768 || (m1 == CCGOCmode && m2 == CCGCmode))
13774 gcc_unreachable ();
13804 /* These are only compatible with themselves, which we already
13810 /* Split comparison code CODE into comparisons we can do using branch
13811 instructions. BYPASS_CODE is comparison code for branch that will
13812 branch around FIRST_CODE and SECOND_CODE. If some of branches
13813 is not required, set value to UNKNOWN.
13814 We never require more than two branches. */
13817 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13818 enum rtx_code *first_code,
13819 enum rtx_code *second_code)
13821 *first_code = code;
13822 *bypass_code = UNKNOWN;
13823 *second_code = UNKNOWN;
13825 /* The fcomi comparison sets flags as follows:
13835 case GT: /* GTU - CF=0 & ZF=0 */
13836 case GE: /* GEU - CF=0 */
13837 case ORDERED: /* PF=0 */
13838 case UNORDERED: /* PF=1 */
13839 case UNEQ: /* EQ - ZF=1 */
13840 case UNLT: /* LTU - CF=1 */
13841 case UNLE: /* LEU - CF=1 | ZF=1 */
13842 case LTGT: /* EQ - ZF=0 */
13844 case LT: /* LTU - CF=1 - fails on unordered */
13845 *first_code = UNLT;
13846 *bypass_code = UNORDERED;
13848 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13849 *first_code = UNLE;
13850 *bypass_code = UNORDERED;
13852 case EQ: /* EQ - ZF=1 - fails on unordered */
13853 *first_code = UNEQ;
13854 *bypass_code = UNORDERED;
13856 case NE: /* NE - ZF=0 - fails on unordered */
13857 *first_code = LTGT;
13858 *second_code = UNORDERED;
13860 case UNGE: /* GEU - CF=0 - fails on unordered */
13862 *second_code = UNORDERED;
13864 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13866 *second_code = UNORDERED;
13869 gcc_unreachable ();
13871 if (!TARGET_IEEE_FP)
13873 *second_code = UNKNOWN;
13874 *bypass_code = UNKNOWN;
13878 /* Return cost of comparison done fcom + arithmetics operations on AX.
13879 All following functions do use number of instructions as a cost metrics.
13880 In future this should be tweaked to compute bytes for optimize_size and
13881 take into account performance of various instructions on various CPUs. */
13883 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13885 if (!TARGET_IEEE_FP)
13887 /* The cost of code output by ix86_expand_fp_compare. */
13911 gcc_unreachable ();
13915 /* Return cost of comparison done using fcomi operation.
13916 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13918 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13920 enum rtx_code bypass_code, first_code, second_code;
13921 /* Return arbitrarily high cost when instruction is not supported - this
13922 prevents gcc from using it. */
13925 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13926 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13929 /* Return cost of comparison done using sahf operation.
13930 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13932 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13934 enum rtx_code bypass_code, first_code, second_code;
13935 /* Return arbitrarily high cost when instruction is not preferred - this
13936 avoids gcc from using it. */
13937 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13939 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13940 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13943 /* Compute cost of the comparison done using any method.
13944 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13946 ix86_fp_comparison_cost (enum rtx_code code)
13948 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13951 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13952 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13954 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13955 if (min > sahf_cost)
13957 if (min > fcomi_cost)
13962 /* Return true if we should use an FCOMI instruction for this
13966 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13968 enum rtx_code swapped_code = swap_condition (code);
13970 return ((ix86_fp_comparison_cost (code)
13971 == ix86_fp_comparison_fcomi_cost (code))
13972 || (ix86_fp_comparison_cost (swapped_code)
13973 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13976 /* Swap, force into registers, or otherwise massage the two operands
13977 to a fp comparison. The operands are updated in place; the new
13978 comparison code is returned. */
13980 static enum rtx_code
13981 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13983 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13984 rtx op0 = *pop0, op1 = *pop1;
13985 enum machine_mode op_mode = GET_MODE (op0);
13986 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13988 /* All of the unordered compare instructions only work on registers.
13989 The same is true of the fcomi compare instructions. The XFmode
13990 compare instructions require registers except when comparing
13991 against zero or when converting operand 1 from fixed point to
13995 && (fpcmp_mode == CCFPUmode
13996 || (op_mode == XFmode
13997 && ! (standard_80387_constant_p (op0) == 1
13998 || standard_80387_constant_p (op1) == 1)
13999 && GET_CODE (op1) != FLOAT)
14000 || ix86_use_fcomi_compare (code)))
14002 op0 = force_reg (op_mode, op0);
14003 op1 = force_reg (op_mode, op1);
14007 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14008 things around if they appear profitable, otherwise force op0
14009 into a register. */
14011 if (standard_80387_constant_p (op0) == 0
14013 && ! (standard_80387_constant_p (op1) == 0
14017 tmp = op0, op0 = op1, op1 = tmp;
14018 code = swap_condition (code);
14022 op0 = force_reg (op_mode, op0);
14024 if (CONSTANT_P (op1))
14026 int tmp = standard_80387_constant_p (op1);
14028 op1 = validize_mem (force_const_mem (op_mode, op1));
14032 op1 = force_reg (op_mode, op1);
14035 op1 = force_reg (op_mode, op1);
14039 /* Try to rearrange the comparison to make it cheaper. */
14040 if (ix86_fp_comparison_cost (code)
14041 > ix86_fp_comparison_cost (swap_condition (code))
14042 && (REG_P (op1) || can_create_pseudo_p ()))
14045 tmp = op0, op0 = op1, op1 = tmp;
14046 code = swap_condition (code);
14048 op0 = force_reg (op_mode, op0);
14056 /* Convert comparison codes we use to represent FP comparison to integer
14057 code that will result in proper branch. Return UNKNOWN if no such code
14061 ix86_fp_compare_code_to_integer (enum rtx_code code)
14090 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14093 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14094 rtx *second_test, rtx *bypass_test)
14096 enum machine_mode fpcmp_mode, intcmp_mode;
14098 int cost = ix86_fp_comparison_cost (code);
14099 enum rtx_code bypass_code, first_code, second_code;
14101 fpcmp_mode = ix86_fp_compare_mode (code);
14102 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14105 *second_test = NULL_RTX;
14107 *bypass_test = NULL_RTX;
14109 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14111 /* Do fcomi/sahf based test when profitable. */
14112 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14113 && (bypass_code == UNKNOWN || bypass_test)
14114 && (second_code == UNKNOWN || second_test))
14116 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14117 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14123 gcc_assert (TARGET_SAHF);
14126 scratch = gen_reg_rtx (HImode);
14127 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14129 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14132 /* The FP codes work out to act like unsigned. */
14133 intcmp_mode = fpcmp_mode;
14135 if (bypass_code != UNKNOWN)
14136 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14137 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14139 if (second_code != UNKNOWN)
14140 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14141 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14146 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14147 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14148 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14150 scratch = gen_reg_rtx (HImode);
14151 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14153 /* In the unordered case, we have to check C2 for NaN's, which
14154 doesn't happen to work out to anything nice combination-wise.
14155 So do some bit twiddling on the value we've got in AH to come
14156 up with an appropriate set of condition codes. */
14158 intcmp_mode = CCNOmode;
14163 if (code == GT || !TARGET_IEEE_FP)
14165 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14171 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14172 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14173 intcmp_mode = CCmode;
14179 if (code == LT && TARGET_IEEE_FP)
14181 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14182 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14183 intcmp_mode = CCmode;
14188 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14194 if (code == GE || !TARGET_IEEE_FP)
14196 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14201 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14202 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14209 if (code == LE && TARGET_IEEE_FP)
14211 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14212 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14213 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14214 intcmp_mode = CCmode;
14219 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14225 if (code == EQ && TARGET_IEEE_FP)
14227 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14228 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14229 intcmp_mode = CCmode;
14234 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14241 if (code == NE && TARGET_IEEE_FP)
14243 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14244 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14250 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14256 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14265 gcc_unreachable ();
14269 /* Return the test that should be put into the flags user, i.e.
14270 the bcc, scc, or cmov instruction. */
14271 return gen_rtx_fmt_ee (code, VOIDmode,
14272 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14277 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14280 op0 = ix86_compare_op0;
14281 op1 = ix86_compare_op1;
14284 *second_test = NULL_RTX;
14286 *bypass_test = NULL_RTX;
14288 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14289 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14291 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14293 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14294 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14295 second_test, bypass_test);
14298 ret = ix86_expand_int_compare (code, op0, op1);
14303 /* Return true if the CODE will result in nontrivial jump sequence. */
14305 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14307 enum rtx_code bypass_code, first_code, second_code;
14310 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14311 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14315 ix86_expand_branch (enum rtx_code code, rtx label)
14319 switch (GET_MODE (ix86_compare_op0))
14325 tmp = ix86_expand_compare (code, NULL, NULL);
14326 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14327 gen_rtx_LABEL_REF (VOIDmode, label),
14329 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14338 enum rtx_code bypass_code, first_code, second_code;
14340 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14341 &ix86_compare_op1);
14343 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14345 /* Check whether we will use the natural sequence with one jump. If
14346 so, we can expand jump early. Otherwise delay expansion by
14347 creating compound insn to not confuse optimizers. */
14348 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14350 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14351 gen_rtx_LABEL_REF (VOIDmode, label),
14352 pc_rtx, NULL_RTX, NULL_RTX);
14356 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14357 ix86_compare_op0, ix86_compare_op1);
14358 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14359 gen_rtx_LABEL_REF (VOIDmode, label),
14361 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14363 use_fcomi = ix86_use_fcomi_compare (code);
14364 vec = rtvec_alloc (3 + !use_fcomi);
14365 RTVEC_ELT (vec, 0) = tmp;
14367 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14369 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14372 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14374 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14383 /* Expand DImode branch into multiple compare+branch. */
14385 rtx lo[2], hi[2], label2;
14386 enum rtx_code code1, code2, code3;
14387 enum machine_mode submode;
14389 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14391 tmp = ix86_compare_op0;
14392 ix86_compare_op0 = ix86_compare_op1;
14393 ix86_compare_op1 = tmp;
14394 code = swap_condition (code);
14396 if (GET_MODE (ix86_compare_op0) == DImode)
14398 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14399 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14404 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14405 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14409 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14410 avoid two branches. This costs one extra insn, so disable when
14411 optimizing for size. */
14413 if ((code == EQ || code == NE)
14414 && (!optimize_insn_for_size_p ()
14415 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14420 if (hi[1] != const0_rtx)
14421 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14422 NULL_RTX, 0, OPTAB_WIDEN);
14425 if (lo[1] != const0_rtx)
14426 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14427 NULL_RTX, 0, OPTAB_WIDEN);
14429 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14430 NULL_RTX, 0, OPTAB_WIDEN);
14432 ix86_compare_op0 = tmp;
14433 ix86_compare_op1 = const0_rtx;
14434 ix86_expand_branch (code, label);
14438 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14439 op1 is a constant and the low word is zero, then we can just
14440 examine the high word. Similarly for low word -1 and
14441 less-or-equal-than or greater-than. */
14443 if (CONST_INT_P (hi[1]))
14446 case LT: case LTU: case GE: case GEU:
14447 if (lo[1] == const0_rtx)
14449 ix86_compare_op0 = hi[0];
14450 ix86_compare_op1 = hi[1];
14451 ix86_expand_branch (code, label);
14455 case LE: case LEU: case GT: case GTU:
14456 if (lo[1] == constm1_rtx)
14458 ix86_compare_op0 = hi[0];
14459 ix86_compare_op1 = hi[1];
14460 ix86_expand_branch (code, label);
14468 /* Otherwise, we need two or three jumps. */
14470 label2 = gen_label_rtx ();
14473 code2 = swap_condition (code);
14474 code3 = unsigned_condition (code);
14478 case LT: case GT: case LTU: case GTU:
14481 case LE: code1 = LT; code2 = GT; break;
14482 case GE: code1 = GT; code2 = LT; break;
14483 case LEU: code1 = LTU; code2 = GTU; break;
14484 case GEU: code1 = GTU; code2 = LTU; break;
14486 case EQ: code1 = UNKNOWN; code2 = NE; break;
14487 case NE: code2 = UNKNOWN; break;
14490 gcc_unreachable ();
14495 * if (hi(a) < hi(b)) goto true;
14496 * if (hi(a) > hi(b)) goto false;
14497 * if (lo(a) < lo(b)) goto true;
14501 ix86_compare_op0 = hi[0];
14502 ix86_compare_op1 = hi[1];
14504 if (code1 != UNKNOWN)
14505 ix86_expand_branch (code1, label);
14506 if (code2 != UNKNOWN)
14507 ix86_expand_branch (code2, label2);
14509 ix86_compare_op0 = lo[0];
14510 ix86_compare_op1 = lo[1];
14511 ix86_expand_branch (code3, label);
14513 if (code2 != UNKNOWN)
14514 emit_label (label2);
14519 /* If we have already emitted a compare insn, go straight to simple.
14520 ix86_expand_compare won't emit anything if ix86_compare_emitted
14522 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14527 /* Split branch based on floating point condition. */
14529 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14530 rtx target1, rtx target2, rtx tmp, rtx pushed)
14532 rtx second, bypass;
14533 rtx label = NULL_RTX;
14535 int bypass_probability = -1, second_probability = -1, probability = -1;
14538 if (target2 != pc_rtx)
14541 code = reverse_condition_maybe_unordered (code);
14546 condition = ix86_expand_fp_compare (code, op1, op2,
14547 tmp, &second, &bypass);
14549 /* Remove pushed operand from stack. */
14551 ix86_free_from_memory (GET_MODE (pushed));
14553 if (split_branch_probability >= 0)
14555 /* Distribute the probabilities across the jumps.
14556 Assume the BYPASS and SECOND to be always test
14558 probability = split_branch_probability;
14560 /* Value of 1 is low enough to make no need for probability
14561 to be updated. Later we may run some experiments and see
14562 if unordered values are more frequent in practice. */
14564 bypass_probability = 1;
14566 second_probability = 1;
14568 if (bypass != NULL_RTX)
14570 label = gen_label_rtx ();
14571 i = emit_jump_insn (gen_rtx_SET
14573 gen_rtx_IF_THEN_ELSE (VOIDmode,
14575 gen_rtx_LABEL_REF (VOIDmode,
14578 if (bypass_probability >= 0)
14580 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14581 GEN_INT (bypass_probability),
14584 i = emit_jump_insn (gen_rtx_SET
14586 gen_rtx_IF_THEN_ELSE (VOIDmode,
14587 condition, target1, target2)));
14588 if (probability >= 0)
14590 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14591 GEN_INT (probability),
14593 if (second != NULL_RTX)
14595 i = emit_jump_insn (gen_rtx_SET
14597 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14599 if (second_probability >= 0)
14601 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14602 GEN_INT (second_probability),
14605 if (label != NULL_RTX)
14606 emit_label (label);
14610 ix86_expand_setcc (enum rtx_code code, rtx dest)
14612 rtx ret, tmp, tmpreg, equiv;
14613 rtx second_test, bypass_test;
14615 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14616 return 0; /* FAIL */
14618 gcc_assert (GET_MODE (dest) == QImode);
14620 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14621 PUT_MODE (ret, QImode);
14626 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14627 if (bypass_test || second_test)
14629 rtx test = second_test;
14631 rtx tmp2 = gen_reg_rtx (QImode);
14634 gcc_assert (!second_test);
14635 test = bypass_test;
14637 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14639 PUT_MODE (test, QImode);
14640 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14643 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14645 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14648 /* Attach a REG_EQUAL note describing the comparison result. */
14649 if (ix86_compare_op0 && ix86_compare_op1)
14651 equiv = simplify_gen_relational (code, QImode,
14652 GET_MODE (ix86_compare_op0),
14653 ix86_compare_op0, ix86_compare_op1);
14654 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14657 return 1; /* DONE */
14660 /* Expand comparison setting or clearing carry flag. Return true when
14661 successful and set pop for the operation. */
14663 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14665 enum machine_mode mode =
14666 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14668 /* Do not handle DImode compares that go through special path. */
14669 if (mode == (TARGET_64BIT ? TImode : DImode))
14672 if (SCALAR_FLOAT_MODE_P (mode))
14674 rtx second_test = NULL, bypass_test = NULL;
14675 rtx compare_op, compare_seq;
14677 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14679 /* Shortcut: following common codes never translate
14680 into carry flag compares. */
14681 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14682 || code == ORDERED || code == UNORDERED)
14685 /* These comparisons require zero flag; swap operands so they won't. */
14686 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14687 && !TARGET_IEEE_FP)
14692 code = swap_condition (code);
14695 /* Try to expand the comparison and verify that we end up with
14696 carry flag based comparison. This fails to be true only when
14697 we decide to expand comparison using arithmetic that is not
14698 too common scenario. */
14700 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14701 &second_test, &bypass_test);
14702 compare_seq = get_insns ();
14705 if (second_test || bypass_test)
14708 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14709 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14710 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14712 code = GET_CODE (compare_op);
14714 if (code != LTU && code != GEU)
14717 emit_insn (compare_seq);
14722 if (!INTEGRAL_MODE_P (mode))
14731 /* Convert a==0 into (unsigned)a<1. */
14734 if (op1 != const0_rtx)
14737 code = (code == EQ ? LTU : GEU);
14740 /* Convert a>b into b<a or a>=b-1. */
14743 if (CONST_INT_P (op1))
14745 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14746 /* Bail out on overflow. We still can swap operands but that
14747 would force loading of the constant into register. */
14748 if (op1 == const0_rtx
14749 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14751 code = (code == GTU ? GEU : LTU);
14758 code = (code == GTU ? LTU : GEU);
14762 /* Convert a>=0 into (unsigned)a<0x80000000. */
14765 if (mode == DImode || op1 != const0_rtx)
14767 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14768 code = (code == LT ? GEU : LTU);
14772 if (mode == DImode || op1 != constm1_rtx)
14774 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14775 code = (code == LE ? GEU : LTU);
14781 /* Swapping operands may cause constant to appear as first operand. */
14782 if (!nonimmediate_operand (op0, VOIDmode))
14784 if (!can_create_pseudo_p ())
14786 op0 = force_reg (mode, op0);
14788 ix86_compare_op0 = op0;
14789 ix86_compare_op1 = op1;
14790 *pop = ix86_expand_compare (code, NULL, NULL);
14791 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14796 ix86_expand_int_movcc (rtx operands[])
14798 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14799 rtx compare_seq, compare_op;
14800 rtx second_test, bypass_test;
14801 enum machine_mode mode = GET_MODE (operands[0]);
14802 bool sign_bit_compare_p = false;;
14805 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14806 compare_seq = get_insns ();
14809 compare_code = GET_CODE (compare_op);
14811 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14812 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14813 sign_bit_compare_p = true;
14815 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14816 HImode insns, we'd be swallowed in word prefix ops. */
14818 if ((mode != HImode || TARGET_FAST_PREFIX)
14819 && (mode != (TARGET_64BIT ? TImode : DImode))
14820 && CONST_INT_P (operands[2])
14821 && CONST_INT_P (operands[3]))
14823 rtx out = operands[0];
14824 HOST_WIDE_INT ct = INTVAL (operands[2]);
14825 HOST_WIDE_INT cf = INTVAL (operands[3]);
14826 HOST_WIDE_INT diff;
14829 /* Sign bit compares are better done using shifts than we do by using
14831 if (sign_bit_compare_p
14832 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14833 ix86_compare_op1, &compare_op))
14835 /* Detect overlap between destination and compare sources. */
14838 if (!sign_bit_compare_p)
14840 bool fpcmp = false;
14842 compare_code = GET_CODE (compare_op);
14844 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14845 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14848 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14851 /* To simplify rest of code, restrict to the GEU case. */
14852 if (compare_code == LTU)
14854 HOST_WIDE_INT tmp = ct;
14857 compare_code = reverse_condition (compare_code);
14858 code = reverse_condition (code);
14863 PUT_CODE (compare_op,
14864 reverse_condition_maybe_unordered
14865 (GET_CODE (compare_op)));
14867 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14871 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14872 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14873 tmp = gen_reg_rtx (mode);
14875 if (mode == DImode)
14876 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14878 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14882 if (code == GT || code == GE)
14883 code = reverse_condition (code);
14886 HOST_WIDE_INT tmp = ct;
14891 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14892 ix86_compare_op1, VOIDmode, 0, -1);
14905 tmp = expand_simple_binop (mode, PLUS,
14907 copy_rtx (tmp), 1, OPTAB_DIRECT);
14918 tmp = expand_simple_binop (mode, IOR,
14920 copy_rtx (tmp), 1, OPTAB_DIRECT);
14922 else if (diff == -1 && ct)
14932 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14934 tmp = expand_simple_binop (mode, PLUS,
14935 copy_rtx (tmp), GEN_INT (cf),
14936 copy_rtx (tmp), 1, OPTAB_DIRECT);
14944 * andl cf - ct, dest
14954 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14957 tmp = expand_simple_binop (mode, AND,
14959 gen_int_mode (cf - ct, mode),
14960 copy_rtx (tmp), 1, OPTAB_DIRECT);
14962 tmp = expand_simple_binop (mode, PLUS,
14963 copy_rtx (tmp), GEN_INT (ct),
14964 copy_rtx (tmp), 1, OPTAB_DIRECT);
14967 if (!rtx_equal_p (tmp, out))
14968 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14970 return 1; /* DONE */
14975 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14978 tmp = ct, ct = cf, cf = tmp;
14981 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14983 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14985 /* We may be reversing unordered compare to normal compare, that
14986 is not valid in general (we may convert non-trapping condition
14987 to trapping one), however on i386 we currently emit all
14988 comparisons unordered. */
14989 compare_code = reverse_condition_maybe_unordered (compare_code);
14990 code = reverse_condition_maybe_unordered (code);
14994 compare_code = reverse_condition (compare_code);
14995 code = reverse_condition (code);
14999 compare_code = UNKNOWN;
15000 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15001 && CONST_INT_P (ix86_compare_op1))
15003 if (ix86_compare_op1 == const0_rtx
15004 && (code == LT || code == GE))
15005 compare_code = code;
15006 else if (ix86_compare_op1 == constm1_rtx)
15010 else if (code == GT)
15015 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15016 if (compare_code != UNKNOWN
15017 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15018 && (cf == -1 || ct == -1))
15020 /* If lea code below could be used, only optimize
15021 if it results in a 2 insn sequence. */
15023 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15024 || diff == 3 || diff == 5 || diff == 9)
15025 || (compare_code == LT && ct == -1)
15026 || (compare_code == GE && cf == -1))
15029 * notl op1 (if necessary)
15037 code = reverse_condition (code);
15040 out = emit_store_flag (out, code, ix86_compare_op0,
15041 ix86_compare_op1, VOIDmode, 0, -1);
15043 out = expand_simple_binop (mode, IOR,
15045 out, 1, OPTAB_DIRECT);
15046 if (out != operands[0])
15047 emit_move_insn (operands[0], out);
15049 return 1; /* DONE */
15054 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15055 || diff == 3 || diff == 5 || diff == 9)
15056 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15058 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15064 * lea cf(dest*(ct-cf)),dest
15068 * This also catches the degenerate setcc-only case.
15074 out = emit_store_flag (out, code, ix86_compare_op0,
15075 ix86_compare_op1, VOIDmode, 0, 1);
15078 /* On x86_64 the lea instruction operates on Pmode, so we need
15079 to get arithmetics done in proper mode to match. */
15081 tmp = copy_rtx (out);
15085 out1 = copy_rtx (out);
15086 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15090 tmp = gen_rtx_PLUS (mode, tmp, out1);
15096 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15099 if (!rtx_equal_p (tmp, out))
15102 out = force_operand (tmp, copy_rtx (out));
15104 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15106 if (!rtx_equal_p (out, operands[0]))
15107 emit_move_insn (operands[0], copy_rtx (out));
15109 return 1; /* DONE */
15113 * General case: Jumpful:
15114 * xorl dest,dest cmpl op1, op2
15115 * cmpl op1, op2 movl ct, dest
15116 * setcc dest jcc 1f
15117 * decl dest movl cf, dest
15118 * andl (cf-ct),dest 1:
15121 * Size 20. Size 14.
15123 * This is reasonably steep, but branch mispredict costs are
15124 * high on modern cpus, so consider failing only if optimizing
15128 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15129 && BRANCH_COST (optimize_insn_for_speed_p (),
15134 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15139 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15141 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15143 /* We may be reversing unordered compare to normal compare,
15144 that is not valid in general (we may convert non-trapping
15145 condition to trapping one), however on i386 we currently
15146 emit all comparisons unordered. */
15147 code = reverse_condition_maybe_unordered (code);
15151 code = reverse_condition (code);
15152 if (compare_code != UNKNOWN)
15153 compare_code = reverse_condition (compare_code);
15157 if (compare_code != UNKNOWN)
15159 /* notl op1 (if needed)
15164 For x < 0 (resp. x <= -1) there will be no notl,
15165 so if possible swap the constants to get rid of the
15167 True/false will be -1/0 while code below (store flag
15168 followed by decrement) is 0/-1, so the constants need
15169 to be exchanged once more. */
15171 if (compare_code == GE || !cf)
15173 code = reverse_condition (code);
15178 HOST_WIDE_INT tmp = cf;
15183 out = emit_store_flag (out, code, ix86_compare_op0,
15184 ix86_compare_op1, VOIDmode, 0, -1);
15188 out = emit_store_flag (out, code, ix86_compare_op0,
15189 ix86_compare_op1, VOIDmode, 0, 1);
15191 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15192 copy_rtx (out), 1, OPTAB_DIRECT);
15195 out = expand_simple_binop (mode, AND, copy_rtx (out),
15196 gen_int_mode (cf - ct, mode),
15197 copy_rtx (out), 1, OPTAB_DIRECT);
15199 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15200 copy_rtx (out), 1, OPTAB_DIRECT);
15201 if (!rtx_equal_p (out, operands[0]))
15202 emit_move_insn (operands[0], copy_rtx (out));
15204 return 1; /* DONE */
15208 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15210 /* Try a few things more with specific constants and a variable. */
15213 rtx var, orig_out, out, tmp;
15215 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15216 return 0; /* FAIL */
15218 /* If one of the two operands is an interesting constant, load a
15219 constant with the above and mask it in with a logical operation. */
15221 if (CONST_INT_P (operands[2]))
15224 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15225 operands[3] = constm1_rtx, op = and_optab;
15226 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15227 operands[3] = const0_rtx, op = ior_optab;
15229 return 0; /* FAIL */
15231 else if (CONST_INT_P (operands[3]))
15234 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15235 operands[2] = constm1_rtx, op = and_optab;
15236 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15237 operands[2] = const0_rtx, op = ior_optab;
15239 return 0; /* FAIL */
15242 return 0; /* FAIL */
15244 orig_out = operands[0];
15245 tmp = gen_reg_rtx (mode);
15248 /* Recurse to get the constant loaded. */
15249 if (ix86_expand_int_movcc (operands) == 0)
15250 return 0; /* FAIL */
15252 /* Mask in the interesting variable. */
15253 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15255 if (!rtx_equal_p (out, orig_out))
15256 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15258 return 1; /* DONE */
15262 * For comparison with above,
15272 if (! nonimmediate_operand (operands[2], mode))
15273 operands[2] = force_reg (mode, operands[2]);
15274 if (! nonimmediate_operand (operands[3], mode))
15275 operands[3] = force_reg (mode, operands[3]);
15277 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15279 rtx tmp = gen_reg_rtx (mode);
15280 emit_move_insn (tmp, operands[3]);
15283 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15285 rtx tmp = gen_reg_rtx (mode);
15286 emit_move_insn (tmp, operands[2]);
15290 if (! register_operand (operands[2], VOIDmode)
15292 || ! register_operand (operands[3], VOIDmode)))
15293 operands[2] = force_reg (mode, operands[2]);
15296 && ! register_operand (operands[3], VOIDmode))
15297 operands[3] = force_reg (mode, operands[3]);
15299 emit_insn (compare_seq);
15300 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15301 gen_rtx_IF_THEN_ELSE (mode,
15302 compare_op, operands[2],
15305 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15306 gen_rtx_IF_THEN_ELSE (mode,
15308 copy_rtx (operands[3]),
15309 copy_rtx (operands[0]))));
15311 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15312 gen_rtx_IF_THEN_ELSE (mode,
15314 copy_rtx (operands[2]),
15315 copy_rtx (operands[0]))));
15317 return 1; /* DONE */
15320 /* Swap, force into registers, or otherwise massage the two operands
15321 to an sse comparison with a mask result. Thus we differ a bit from
15322 ix86_prepare_fp_compare_args which expects to produce a flags result.
15324 The DEST operand exists to help determine whether to commute commutative
15325 operators. The POP0/POP1 operands are updated in place. The new
15326 comparison code is returned, or UNKNOWN if not implementable. */
15328 static enum rtx_code
15329 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15330 rtx *pop0, rtx *pop1)
15338 /* We have no LTGT as an operator. We could implement it with
15339 NE & ORDERED, but this requires an extra temporary. It's
15340 not clear that it's worth it. */
15347 /* These are supported directly. */
15354 /* For commutative operators, try to canonicalize the destination
15355 operand to be first in the comparison - this helps reload to
15356 avoid extra moves. */
15357 if (!dest || !rtx_equal_p (dest, *pop1))
15365 /* These are not supported directly. Swap the comparison operands
15366 to transform into something that is supported. */
15370 code = swap_condition (code);
15374 gcc_unreachable ();
15380 /* Detect conditional moves that exactly match min/max operational
15381 semantics. Note that this is IEEE safe, as long as we don't
15382 interchange the operands.
15384 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15385 and TRUE if the operation is successful and instructions are emitted. */
15388 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15389 rtx cmp_op1, rtx if_true, rtx if_false)
15391 enum machine_mode mode;
15397 else if (code == UNGE)
15400 if_true = if_false;
15406 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15408 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15413 mode = GET_MODE (dest);
15415 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15416 but MODE may be a vector mode and thus not appropriate. */
15417 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15419 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15422 if_true = force_reg (mode, if_true);
15423 v = gen_rtvec (2, if_true, if_false);
15424 tmp = gen_rtx_UNSPEC (mode, v, u);
15428 code = is_min ? SMIN : SMAX;
15429 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15432 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15436 /* Expand an sse vector comparison. Return the register with the result. */
15439 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15440 rtx op_true, rtx op_false)
15442 enum machine_mode mode = GET_MODE (dest);
15445 cmp_op0 = force_reg (mode, cmp_op0);
15446 if (!nonimmediate_operand (cmp_op1, mode))
15447 cmp_op1 = force_reg (mode, cmp_op1);
15450 || reg_overlap_mentioned_p (dest, op_true)
15451 || reg_overlap_mentioned_p (dest, op_false))
15452 dest = gen_reg_rtx (mode);
15454 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15455 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15460 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15461 operations. This is used for both scalar and vector conditional moves. */
15464 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15466 enum machine_mode mode = GET_MODE (dest);
15469 if (op_false == CONST0_RTX (mode))
15471 op_true = force_reg (mode, op_true);
15472 x = gen_rtx_AND (mode, cmp, op_true);
15473 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15475 else if (op_true == CONST0_RTX (mode))
15477 op_false = force_reg (mode, op_false);
15478 x = gen_rtx_NOT (mode, cmp);
15479 x = gen_rtx_AND (mode, x, op_false);
15480 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15482 else if (TARGET_SSE5)
15484 rtx pcmov = gen_rtx_SET (mode, dest,
15485 gen_rtx_IF_THEN_ELSE (mode, cmp,
15492 op_true = force_reg (mode, op_true);
15493 op_false = force_reg (mode, op_false);
15495 t2 = gen_reg_rtx (mode);
15497 t3 = gen_reg_rtx (mode);
15501 x = gen_rtx_AND (mode, op_true, cmp);
15502 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15504 x = gen_rtx_NOT (mode, cmp);
15505 x = gen_rtx_AND (mode, x, op_false);
15506 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15508 x = gen_rtx_IOR (mode, t3, t2);
15509 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15513 /* Expand a floating-point conditional move. Return true if successful. */
15516 ix86_expand_fp_movcc (rtx operands[])
15518 enum machine_mode mode = GET_MODE (operands[0]);
15519 enum rtx_code code = GET_CODE (operands[1]);
15520 rtx tmp, compare_op, second_test, bypass_test;
15522 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15524 enum machine_mode cmode;
15526 /* Since we've no cmove for sse registers, don't force bad register
15527 allocation just to gain access to it. Deny movcc when the
15528 comparison mode doesn't match the move mode. */
15529 cmode = GET_MODE (ix86_compare_op0);
15530 if (cmode == VOIDmode)
15531 cmode = GET_MODE (ix86_compare_op1);
15535 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15537 &ix86_compare_op1);
15538 if (code == UNKNOWN)
15541 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15542 ix86_compare_op1, operands[2],
15546 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15547 ix86_compare_op1, operands[2], operands[3]);
15548 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15552 /* The floating point conditional move instructions don't directly
15553 support conditions resulting from a signed integer comparison. */
15555 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15557 /* The floating point conditional move instructions don't directly
15558 support signed integer comparisons. */
15560 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15562 gcc_assert (!second_test && !bypass_test);
15563 tmp = gen_reg_rtx (QImode);
15564 ix86_expand_setcc (code, tmp);
15566 ix86_compare_op0 = tmp;
15567 ix86_compare_op1 = const0_rtx;
15568 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15570 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15572 tmp = gen_reg_rtx (mode);
15573 emit_move_insn (tmp, operands[3]);
15576 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15578 tmp = gen_reg_rtx (mode);
15579 emit_move_insn (tmp, operands[2]);
15583 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15584 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15585 operands[2], operands[3])));
15587 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15588 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15589 operands[3], operands[0])));
15591 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15592 gen_rtx_IF_THEN_ELSE (mode, second_test,
15593 operands[2], operands[0])));
15598 /* Expand a floating-point vector conditional move; a vcond operation
15599 rather than a movcc operation. */
15602 ix86_expand_fp_vcond (rtx operands[])
15604 enum rtx_code code = GET_CODE (operands[3]);
15607 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15608 &operands[4], &operands[5]);
15609 if (code == UNKNOWN)
15612 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15613 operands[5], operands[1], operands[2]))
15616 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15617 operands[1], operands[2]);
15618 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15622 /* Expand a signed/unsigned integral vector conditional move. */
15625 ix86_expand_int_vcond (rtx operands[])
15627 enum machine_mode mode = GET_MODE (operands[0]);
15628 enum rtx_code code = GET_CODE (operands[3]);
15629 bool negate = false;
15632 cop0 = operands[4];
15633 cop1 = operands[5];
15635 /* SSE5 supports all of the comparisons on all vector int types. */
15638 /* Canonicalize the comparison to EQ, GT, GTU. */
15649 code = reverse_condition (code);
15655 code = reverse_condition (code);
15661 code = swap_condition (code);
15662 x = cop0, cop0 = cop1, cop1 = x;
15666 gcc_unreachable ();
15669 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15670 if (mode == V2DImode)
15675 /* SSE4.1 supports EQ. */
15676 if (!TARGET_SSE4_1)
15682 /* SSE4.2 supports GT/GTU. */
15683 if (!TARGET_SSE4_2)
15688 gcc_unreachable ();
15692 /* Unsigned parallel compare is not supported by the hardware. Play some
15693 tricks to turn this into a signed comparison against 0. */
15696 cop0 = force_reg (mode, cop0);
15705 /* Perform a parallel modulo subtraction. */
15706 t1 = gen_reg_rtx (mode);
15707 emit_insn ((mode == V4SImode
15709 : gen_subv2di3) (t1, cop0, cop1));
15711 /* Extract the original sign bit of op0. */
15712 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15714 t2 = gen_reg_rtx (mode);
15715 emit_insn ((mode == V4SImode
15717 : gen_andv2di3) (t2, cop0, mask));
15719 /* XOR it back into the result of the subtraction. This results
15720 in the sign bit set iff we saw unsigned underflow. */
15721 x = gen_reg_rtx (mode);
15722 emit_insn ((mode == V4SImode
15724 : gen_xorv2di3) (x, t1, t2));
15732 /* Perform a parallel unsigned saturating subtraction. */
15733 x = gen_reg_rtx (mode);
15734 emit_insn (gen_rtx_SET (VOIDmode, x,
15735 gen_rtx_US_MINUS (mode, cop0, cop1)));
15742 gcc_unreachable ();
15746 cop1 = CONST0_RTX (mode);
15750 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15751 operands[1+negate], operands[2-negate]);
15753 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15754 operands[2-negate]);
15758 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15759 true if we should do zero extension, else sign extension. HIGH_P is
15760 true if we want the N/2 high elements, else the low elements. */
15763 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15765 enum machine_mode imode = GET_MODE (operands[1]);
15766 rtx (*unpack)(rtx, rtx, rtx);
15773 unpack = gen_vec_interleave_highv16qi;
15775 unpack = gen_vec_interleave_lowv16qi;
15779 unpack = gen_vec_interleave_highv8hi;
15781 unpack = gen_vec_interleave_lowv8hi;
15785 unpack = gen_vec_interleave_highv4si;
15787 unpack = gen_vec_interleave_lowv4si;
15790 gcc_unreachable ();
15793 dest = gen_lowpart (imode, operands[0]);
15796 se = force_reg (imode, CONST0_RTX (imode));
15798 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15799 operands[1], pc_rtx, pc_rtx);
15801 emit_insn (unpack (dest, operands[1], se));
15804 /* This function performs the same task as ix86_expand_sse_unpack,
15805 but with SSE4.1 instructions. */
15808 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15810 enum machine_mode imode = GET_MODE (operands[1]);
15811 rtx (*unpack)(rtx, rtx);
15818 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15820 unpack = gen_sse4_1_extendv8qiv8hi2;
15824 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15826 unpack = gen_sse4_1_extendv4hiv4si2;
15830 unpack = gen_sse4_1_zero_extendv2siv2di2;
15832 unpack = gen_sse4_1_extendv2siv2di2;
15835 gcc_unreachable ();
15838 dest = operands[0];
15841 /* Shift higher 8 bytes to lower 8 bytes. */
15842 src = gen_reg_rtx (imode);
15843 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15844 gen_lowpart (TImode, operands[1]),
15850 emit_insn (unpack (dest, src));
15853 /* This function performs the same task as ix86_expand_sse_unpack,
15854 but with sse5 instructions. */
15857 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15859 enum machine_mode imode = GET_MODE (operands[1]);
15860 int pperm_bytes[16];
15862 int h = (high_p) ? 8 : 0;
15865 rtvec v = rtvec_alloc (16);
15868 rtx op0 = operands[0], op1 = operands[1];
15873 vs = rtvec_alloc (8);
15874 h2 = (high_p) ? 8 : 0;
15875 for (i = 0; i < 8; i++)
15877 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15878 pperm_bytes[2*i+1] = ((unsigned_p)
15880 : PPERM_SIGN | PPERM_SRC2 | i | h);
15883 for (i = 0; i < 16; i++)
15884 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15886 for (i = 0; i < 8; i++)
15887 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15889 p = gen_rtx_PARALLEL (VOIDmode, vs);
15890 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15892 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15894 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15898 vs = rtvec_alloc (4);
15899 h2 = (high_p) ? 4 : 0;
15900 for (i = 0; i < 4; i++)
15902 sign_extend = ((unsigned_p)
15904 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15905 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15906 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15907 pperm_bytes[4*i+2] = sign_extend;
15908 pperm_bytes[4*i+3] = sign_extend;
15911 for (i = 0; i < 16; i++)
15912 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15914 for (i = 0; i < 4; i++)
15915 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15917 p = gen_rtx_PARALLEL (VOIDmode, vs);
15918 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15920 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15922 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15926 vs = rtvec_alloc (2);
15927 h2 = (high_p) ? 2 : 0;
15928 for (i = 0; i < 2; i++)
15930 sign_extend = ((unsigned_p)
15932 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15933 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15934 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15935 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15936 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15937 pperm_bytes[8*i+4] = sign_extend;
15938 pperm_bytes[8*i+5] = sign_extend;
15939 pperm_bytes[8*i+6] = sign_extend;
15940 pperm_bytes[8*i+7] = sign_extend;
15943 for (i = 0; i < 16; i++)
15944 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15946 for (i = 0; i < 2; i++)
15947 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15949 p = gen_rtx_PARALLEL (VOIDmode, vs);
15950 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15952 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15954 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15958 gcc_unreachable ();
15964 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15965 next narrower integer vector type */
15967 ix86_expand_sse5_pack (rtx operands[3])
15969 enum machine_mode imode = GET_MODE (operands[0]);
15970 int pperm_bytes[16];
15972 rtvec v = rtvec_alloc (16);
15974 rtx op0 = operands[0];
15975 rtx op1 = operands[1];
15976 rtx op2 = operands[2];
15981 for (i = 0; i < 8; i++)
15983 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15984 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15987 for (i = 0; i < 16; i++)
15988 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15990 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15991 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15995 for (i = 0; i < 4; i++)
15997 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15998 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15999 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16000 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16003 for (i = 0; i < 16; i++)
16004 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16006 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16007 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16011 for (i = 0; i < 2; i++)
16013 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16014 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16015 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16016 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16017 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16018 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16019 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16020 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16023 for (i = 0; i < 16; i++)
16024 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16026 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16027 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16031 gcc_unreachable ();
16037 /* Expand conditional increment or decrement using adb/sbb instructions.
16038 The default case using setcc followed by the conditional move can be
16039 done by generic code. */
16041 ix86_expand_int_addcc (rtx operands[])
16043 enum rtx_code code = GET_CODE (operands[1]);
16045 rtx val = const0_rtx;
16046 bool fpcmp = false;
16047 enum machine_mode mode = GET_MODE (operands[0]);
16049 if (operands[3] != const1_rtx
16050 && operands[3] != constm1_rtx)
16052 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16053 ix86_compare_op1, &compare_op))
16055 code = GET_CODE (compare_op);
16057 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16058 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16061 code = ix86_fp_compare_code_to_integer (code);
16068 PUT_CODE (compare_op,
16069 reverse_condition_maybe_unordered
16070 (GET_CODE (compare_op)));
16072 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16074 PUT_MODE (compare_op, mode);
16076 /* Construct either adc or sbb insn. */
16077 if ((code == LTU) == (operands[3] == constm1_rtx))
16079 switch (GET_MODE (operands[0]))
16082 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16085 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16088 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16091 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16094 gcc_unreachable ();
16099 switch (GET_MODE (operands[0]))
16102 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16105 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16108 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16111 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16114 gcc_unreachable ();
16117 return 1; /* DONE */
16121 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16122 works for floating pointer parameters and nonoffsetable memories.
16123 For pushes, it returns just stack offsets; the values will be saved
16124 in the right order. Maximally three parts are generated. */
16127 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16132 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16134 size = (GET_MODE_SIZE (mode) + 4) / 8;
16136 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16137 gcc_assert (size >= 2 && size <= 4);
16139 /* Optimize constant pool reference to immediates. This is used by fp
16140 moves, that force all constants to memory to allow combining. */
16141 if (MEM_P (operand) && MEM_READONLY_P (operand))
16143 rtx tmp = maybe_get_pool_constant (operand);
16148 if (MEM_P (operand) && !offsettable_memref_p (operand))
16150 /* The only non-offsetable memories we handle are pushes. */
16151 int ok = push_operand (operand, VOIDmode);
16155 operand = copy_rtx (operand);
16156 PUT_MODE (operand, Pmode);
16157 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16161 if (GET_CODE (operand) == CONST_VECTOR)
16163 enum machine_mode imode = int_mode_for_mode (mode);
16164 /* Caution: if we looked through a constant pool memory above,
16165 the operand may actually have a different mode now. That's
16166 ok, since we want to pun this all the way back to an integer. */
16167 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16168 gcc_assert (operand != NULL);
16174 if (mode == DImode)
16175 split_di (&operand, 1, &parts[0], &parts[1]);
16180 if (REG_P (operand))
16182 gcc_assert (reload_completed);
16183 for (i = 0; i < size; i++)
16184 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16186 else if (offsettable_memref_p (operand))
16188 operand = adjust_address (operand, SImode, 0);
16189 parts[0] = operand;
16190 for (i = 1; i < size; i++)
16191 parts[i] = adjust_address (operand, SImode, 4 * i);
16193 else if (GET_CODE (operand) == CONST_DOUBLE)
16198 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16202 real_to_target (l, &r, mode);
16203 parts[3] = gen_int_mode (l[3], SImode);
16204 parts[2] = gen_int_mode (l[2], SImode);
16207 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16208 parts[2] = gen_int_mode (l[2], SImode);
16211 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16214 gcc_unreachable ();
16216 parts[1] = gen_int_mode (l[1], SImode);
16217 parts[0] = gen_int_mode (l[0], SImode);
16220 gcc_unreachable ();
16225 if (mode == TImode)
16226 split_ti (&operand, 1, &parts[0], &parts[1]);
16227 if (mode == XFmode || mode == TFmode)
16229 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16230 if (REG_P (operand))
16232 gcc_assert (reload_completed);
16233 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16234 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16236 else if (offsettable_memref_p (operand))
16238 operand = adjust_address (operand, DImode, 0);
16239 parts[0] = operand;
16240 parts[1] = adjust_address (operand, upper_mode, 8);
16242 else if (GET_CODE (operand) == CONST_DOUBLE)
16247 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16248 real_to_target (l, &r, mode);
16250 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16251 if (HOST_BITS_PER_WIDE_INT >= 64)
16254 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16255 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16258 parts[0] = immed_double_const (l[0], l[1], DImode);
16260 if (upper_mode == SImode)
16261 parts[1] = gen_int_mode (l[2], SImode);
16262 else if (HOST_BITS_PER_WIDE_INT >= 64)
16265 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16266 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16269 parts[1] = immed_double_const (l[2], l[3], DImode);
16272 gcc_unreachable ();
16279 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16280 Return false when normal moves are needed; true when all required
16281 insns have been emitted. Operands 2-4 contain the input values
16282 int the correct order; operands 5-7 contain the output values. */
16285 ix86_split_long_move (rtx operands[])
16290 int collisions = 0;
16291 enum machine_mode mode = GET_MODE (operands[0]);
16292 bool collisionparts[4];
16294 /* The DFmode expanders may ask us to move double.
16295 For 64bit target this is single move. By hiding the fact
16296 here we simplify i386.md splitters. */
16297 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16299 /* Optimize constant pool reference to immediates. This is used by
16300 fp moves, that force all constants to memory to allow combining. */
16302 if (MEM_P (operands[1])
16303 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16304 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16305 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16306 if (push_operand (operands[0], VOIDmode))
16308 operands[0] = copy_rtx (operands[0]);
16309 PUT_MODE (operands[0], Pmode);
16312 operands[0] = gen_lowpart (DImode, operands[0]);
16313 operands[1] = gen_lowpart (DImode, operands[1]);
16314 emit_move_insn (operands[0], operands[1]);
16318 /* The only non-offsettable memory we handle is push. */
16319 if (push_operand (operands[0], VOIDmode))
16322 gcc_assert (!MEM_P (operands[0])
16323 || offsettable_memref_p (operands[0]));
16325 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16326 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16328 /* When emitting push, take care for source operands on the stack. */
16329 if (push && MEM_P (operands[1])
16330 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16331 for (i = 0; i < nparts - 1; i++)
16332 part[1][i] = change_address (part[1][i],
16333 GET_MODE (part[1][i]),
16334 XEXP (part[1][i + 1], 0));
16336 /* We need to do copy in the right order in case an address register
16337 of the source overlaps the destination. */
16338 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16342 for (i = 0; i < nparts; i++)
16345 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16346 if (collisionparts[i])
16350 /* Collision in the middle part can be handled by reordering. */
16351 if (collisions == 1 && nparts == 3 && collisionparts [1])
16353 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16354 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16356 else if (collisions == 1
16358 && (collisionparts [1] || collisionparts [2]))
16360 if (collisionparts [1])
16362 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16363 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16367 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16368 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16372 /* If there are more collisions, we can't handle it by reordering.
16373 Do an lea to the last part and use only one colliding move. */
16374 else if (collisions > 1)
16380 base = part[0][nparts - 1];
16382 /* Handle the case when the last part isn't valid for lea.
16383 Happens in 64-bit mode storing the 12-byte XFmode. */
16384 if (GET_MODE (base) != Pmode)
16385 base = gen_rtx_REG (Pmode, REGNO (base));
16387 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16388 part[1][0] = replace_equiv_address (part[1][0], base);
16389 for (i = 1; i < nparts; i++)
16391 tmp = plus_constant (base, UNITS_PER_WORD * i);
16392 part[1][i] = replace_equiv_address (part[1][i], tmp);
16403 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16404 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16405 emit_move_insn (part[0][2], part[1][2]);
16407 else if (nparts == 4)
16409 emit_move_insn (part[0][3], part[1][3]);
16410 emit_move_insn (part[0][2], part[1][2]);
16415 /* In 64bit mode we don't have 32bit push available. In case this is
16416 register, it is OK - we will just use larger counterpart. We also
16417 retype memory - these comes from attempt to avoid REX prefix on
16418 moving of second half of TFmode value. */
16419 if (GET_MODE (part[1][1]) == SImode)
16421 switch (GET_CODE (part[1][1]))
16424 part[1][1] = adjust_address (part[1][1], DImode, 0);
16428 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16432 gcc_unreachable ();
16435 if (GET_MODE (part[1][0]) == SImode)
16436 part[1][0] = part[1][1];
16439 emit_move_insn (part[0][1], part[1][1]);
16440 emit_move_insn (part[0][0], part[1][0]);
16444 /* Choose correct order to not overwrite the source before it is copied. */
16445 if ((REG_P (part[0][0])
16446 && REG_P (part[1][1])
16447 && (REGNO (part[0][0]) == REGNO (part[1][1])
16449 && REGNO (part[0][0]) == REGNO (part[1][2]))
16451 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16453 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16455 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16457 operands[2 + i] = part[0][j];
16458 operands[6 + i] = part[1][j];
16463 for (i = 0; i < nparts; i++)
16465 operands[2 + i] = part[0][i];
16466 operands[6 + i] = part[1][i];
16470 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16471 if (optimize_insn_for_size_p ())
16473 for (j = 0; j < nparts - 1; j++)
16474 if (CONST_INT_P (operands[6 + j])
16475 && operands[6 + j] != const0_rtx
16476 && REG_P (operands[2 + j]))
16477 for (i = j; i < nparts - 1; i++)
16478 if (CONST_INT_P (operands[7 + i])
16479 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16480 operands[7 + i] = operands[2 + j];
16483 for (i = 0; i < nparts; i++)
16484 emit_move_insn (operands[2 + i], operands[6 + i]);
16489 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16490 left shift by a constant, either using a single shift or
16491 a sequence of add instructions. */
16494 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16498 emit_insn ((mode == DImode
16500 : gen_adddi3) (operand, operand, operand));
16502 else if (!optimize_insn_for_size_p ()
16503 && count * ix86_cost->add <= ix86_cost->shift_const)
16506 for (i=0; i<count; i++)
16508 emit_insn ((mode == DImode
16510 : gen_adddi3) (operand, operand, operand));
16514 emit_insn ((mode == DImode
16516 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16520 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16522 rtx low[2], high[2];
16524 const int single_width = mode == DImode ? 32 : 64;
16526 if (CONST_INT_P (operands[2]))
16528 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16529 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16531 if (count >= single_width)
16533 emit_move_insn (high[0], low[1]);
16534 emit_move_insn (low[0], const0_rtx);
16536 if (count > single_width)
16537 ix86_expand_ashl_const (high[0], count - single_width, mode);
16541 if (!rtx_equal_p (operands[0], operands[1]))
16542 emit_move_insn (operands[0], operands[1]);
16543 emit_insn ((mode == DImode
16545 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16546 ix86_expand_ashl_const (low[0], count, mode);
16551 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16553 if (operands[1] == const1_rtx)
16555 /* Assuming we've chosen a QImode capable registers, then 1 << N
16556 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16557 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16559 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16561 ix86_expand_clear (low[0]);
16562 ix86_expand_clear (high[0]);
16563 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16565 d = gen_lowpart (QImode, low[0]);
16566 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16567 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16568 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16570 d = gen_lowpart (QImode, high[0]);
16571 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16572 s = gen_rtx_NE (QImode, flags, const0_rtx);
16573 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16576 /* Otherwise, we can get the same results by manually performing
16577 a bit extract operation on bit 5/6, and then performing the two
16578 shifts. The two methods of getting 0/1 into low/high are exactly
16579 the same size. Avoiding the shift in the bit extract case helps
16580 pentium4 a bit; no one else seems to care much either way. */
16585 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16586 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16588 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16589 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16591 emit_insn ((mode == DImode
16593 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16594 emit_insn ((mode == DImode
16596 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16597 emit_move_insn (low[0], high[0]);
16598 emit_insn ((mode == DImode
16600 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16603 emit_insn ((mode == DImode
16605 : gen_ashldi3) (low[0], low[0], operands[2]));
16606 emit_insn ((mode == DImode
16608 : gen_ashldi3) (high[0], high[0], operands[2]));
16612 if (operands[1] == constm1_rtx)
16614 /* For -1 << N, we can avoid the shld instruction, because we
16615 know that we're shifting 0...31/63 ones into a -1. */
16616 emit_move_insn (low[0], constm1_rtx);
16617 if (optimize_insn_for_size_p ())
16618 emit_move_insn (high[0], low[0]);
16620 emit_move_insn (high[0], constm1_rtx);
16624 if (!rtx_equal_p (operands[0], operands[1]))
16625 emit_move_insn (operands[0], operands[1]);
16627 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16628 emit_insn ((mode == DImode
16630 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16633 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16635 if (TARGET_CMOVE && scratch)
16637 ix86_expand_clear (scratch);
16638 emit_insn ((mode == DImode
16639 ? gen_x86_shift_adj_1
16640 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16644 emit_insn ((mode == DImode
16645 ? gen_x86_shift_adj_2
16646 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16650 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16652 rtx low[2], high[2];
16654 const int single_width = mode == DImode ? 32 : 64;
16656 if (CONST_INT_P (operands[2]))
16658 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16659 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16661 if (count == single_width * 2 - 1)
16663 emit_move_insn (high[0], high[1]);
16664 emit_insn ((mode == DImode
16666 : gen_ashrdi3) (high[0], high[0],
16667 GEN_INT (single_width - 1)));
16668 emit_move_insn (low[0], high[0]);
16671 else if (count >= single_width)
16673 emit_move_insn (low[0], high[1]);
16674 emit_move_insn (high[0], low[0]);
16675 emit_insn ((mode == DImode
16677 : gen_ashrdi3) (high[0], high[0],
16678 GEN_INT (single_width - 1)));
16679 if (count > single_width)
16680 emit_insn ((mode == DImode
16682 : gen_ashrdi3) (low[0], low[0],
16683 GEN_INT (count - single_width)));
16687 if (!rtx_equal_p (operands[0], operands[1]))
16688 emit_move_insn (operands[0], operands[1]);
16689 emit_insn ((mode == DImode
16691 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16692 emit_insn ((mode == DImode
16694 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16699 if (!rtx_equal_p (operands[0], operands[1]))
16700 emit_move_insn (operands[0], operands[1]);
16702 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16704 emit_insn ((mode == DImode
16706 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16707 emit_insn ((mode == DImode
16709 : gen_ashrdi3) (high[0], high[0], operands[2]));
16711 if (TARGET_CMOVE && scratch)
16713 emit_move_insn (scratch, high[0]);
16714 emit_insn ((mode == DImode
16716 : gen_ashrdi3) (scratch, scratch,
16717 GEN_INT (single_width - 1)));
16718 emit_insn ((mode == DImode
16719 ? gen_x86_shift_adj_1
16720 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16724 emit_insn ((mode == DImode
16725 ? gen_x86_shift_adj_3
16726 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16731 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16733 rtx low[2], high[2];
16735 const int single_width = mode == DImode ? 32 : 64;
16737 if (CONST_INT_P (operands[2]))
16739 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16740 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16742 if (count >= single_width)
16744 emit_move_insn (low[0], high[1]);
16745 ix86_expand_clear (high[0]);
16747 if (count > single_width)
16748 emit_insn ((mode == DImode
16750 : gen_lshrdi3) (low[0], low[0],
16751 GEN_INT (count - single_width)));
16755 if (!rtx_equal_p (operands[0], operands[1]))
16756 emit_move_insn (operands[0], operands[1]);
16757 emit_insn ((mode == DImode
16759 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16760 emit_insn ((mode == DImode
16762 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16767 if (!rtx_equal_p (operands[0], operands[1]))
16768 emit_move_insn (operands[0], operands[1]);
16770 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16772 emit_insn ((mode == DImode
16774 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16775 emit_insn ((mode == DImode
16777 : gen_lshrdi3) (high[0], high[0], operands[2]));
16779 /* Heh. By reversing the arguments, we can reuse this pattern. */
16780 if (TARGET_CMOVE && scratch)
16782 ix86_expand_clear (scratch);
16783 emit_insn ((mode == DImode
16784 ? gen_x86_shift_adj_1
16785 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16789 emit_insn ((mode == DImode
16790 ? gen_x86_shift_adj_2
16791 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16795 /* Predict just emitted jump instruction to be taken with probability PROB. */
16797 predict_jump (int prob)
16799 rtx insn = get_last_insn ();
16800 gcc_assert (JUMP_P (insn));
16802 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16807 /* Helper function for the string operations below. Dest VARIABLE whether
16808 it is aligned to VALUE bytes. If true, jump to the label. */
16810 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16812 rtx label = gen_label_rtx ();
16813 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16814 if (GET_MODE (variable) == DImode)
16815 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16817 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16818 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16821 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16823 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16827 /* Adjust COUNTER by the VALUE. */
16829 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16831 if (GET_MODE (countreg) == DImode)
16832 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16834 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16837 /* Zero extend possibly SImode EXP to Pmode register. */
16839 ix86_zero_extend_to_Pmode (rtx exp)
16842 if (GET_MODE (exp) == VOIDmode)
16843 return force_reg (Pmode, exp);
16844 if (GET_MODE (exp) == Pmode)
16845 return copy_to_mode_reg (Pmode, exp);
16846 r = gen_reg_rtx (Pmode);
16847 emit_insn (gen_zero_extendsidi2 (r, exp));
16851 /* Divide COUNTREG by SCALE. */
16853 scale_counter (rtx countreg, int scale)
16856 rtx piece_size_mask;
16860 if (CONST_INT_P (countreg))
16861 return GEN_INT (INTVAL (countreg) / scale);
16862 gcc_assert (REG_P (countreg));
16864 piece_size_mask = GEN_INT (scale - 1);
16865 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16866 GEN_INT (exact_log2 (scale)),
16867 NULL, 1, OPTAB_DIRECT);
16871 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16872 DImode for constant loop counts. */
16874 static enum machine_mode
16875 counter_mode (rtx count_exp)
16877 if (GET_MODE (count_exp) != VOIDmode)
16878 return GET_MODE (count_exp);
16879 if (GET_CODE (count_exp) != CONST_INT)
16881 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16886 /* When SRCPTR is non-NULL, output simple loop to move memory
16887 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16888 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16889 equivalent loop to set memory by VALUE (supposed to be in MODE).
16891 The size is rounded down to whole number of chunk size moved at once.
16892 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16896 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16897 rtx destptr, rtx srcptr, rtx value,
16898 rtx count, enum machine_mode mode, int unroll,
16901 rtx out_label, top_label, iter, tmp;
16902 enum machine_mode iter_mode = counter_mode (count);
16903 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16904 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16910 top_label = gen_label_rtx ();
16911 out_label = gen_label_rtx ();
16912 iter = gen_reg_rtx (iter_mode);
16914 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16915 NULL, 1, OPTAB_DIRECT);
16916 /* Those two should combine. */
16917 if (piece_size == const1_rtx)
16919 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16921 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16923 emit_move_insn (iter, const0_rtx);
16925 emit_label (top_label);
16927 tmp = convert_modes (Pmode, iter_mode, iter, true);
16928 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16929 destmem = change_address (destmem, mode, x_addr);
16933 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16934 srcmem = change_address (srcmem, mode, y_addr);
16936 /* When unrolling for chips that reorder memory reads and writes,
16937 we can save registers by using single temporary.
16938 Also using 4 temporaries is overkill in 32bit mode. */
16939 if (!TARGET_64BIT && 0)
16941 for (i = 0; i < unroll; i++)
16946 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16948 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16950 emit_move_insn (destmem, srcmem);
16956 gcc_assert (unroll <= 4);
16957 for (i = 0; i < unroll; i++)
16959 tmpreg[i] = gen_reg_rtx (mode);
16963 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16965 emit_move_insn (tmpreg[i], srcmem);
16967 for (i = 0; i < unroll; i++)
16972 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16974 emit_move_insn (destmem, tmpreg[i]);
16979 for (i = 0; i < unroll; i++)
16983 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16984 emit_move_insn (destmem, value);
16987 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16988 true, OPTAB_LIB_WIDEN);
16990 emit_move_insn (iter, tmp);
16992 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16994 if (expected_size != -1)
16996 expected_size /= GET_MODE_SIZE (mode) * unroll;
16997 if (expected_size == 0)
16999 else if (expected_size > REG_BR_PROB_BASE)
17000 predict_jump (REG_BR_PROB_BASE - 1);
17002 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17005 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17006 iter = ix86_zero_extend_to_Pmode (iter);
17007 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17008 true, OPTAB_LIB_WIDEN);
17009 if (tmp != destptr)
17010 emit_move_insn (destptr, tmp);
17013 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17014 true, OPTAB_LIB_WIDEN);
17016 emit_move_insn (srcptr, tmp);
17018 emit_label (out_label);
17021 /* Output "rep; mov" instruction.
17022 Arguments have same meaning as for previous function */
17024 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17025 rtx destptr, rtx srcptr,
17027 enum machine_mode mode)
17033 /* If the size is known, it is shorter to use rep movs. */
17034 if (mode == QImode && CONST_INT_P (count)
17035 && !(INTVAL (count) & 3))
17038 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17039 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17040 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17041 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17042 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17043 if (mode != QImode)
17045 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17046 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17047 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17048 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17049 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17050 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17054 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17055 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17057 if (CONST_INT_P (count))
17059 count = GEN_INT (INTVAL (count)
17060 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17061 destmem = shallow_copy_rtx (destmem);
17062 srcmem = shallow_copy_rtx (srcmem);
17063 set_mem_size (destmem, count);
17064 set_mem_size (srcmem, count);
17068 if (MEM_SIZE (destmem))
17069 set_mem_size (destmem, NULL_RTX);
17070 if (MEM_SIZE (srcmem))
17071 set_mem_size (srcmem, NULL_RTX);
17073 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17077 /* Output "rep; stos" instruction.
17078 Arguments have same meaning as for previous function */
17080 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17081 rtx count, enum machine_mode mode,
17087 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17088 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17089 value = force_reg (mode, gen_lowpart (mode, value));
17090 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17091 if (mode != QImode)
17093 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17094 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17095 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17098 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17099 if (orig_value == const0_rtx && CONST_INT_P (count))
17101 count = GEN_INT (INTVAL (count)
17102 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17103 destmem = shallow_copy_rtx (destmem);
17104 set_mem_size (destmem, count);
17106 else if (MEM_SIZE (destmem))
17107 set_mem_size (destmem, NULL_RTX);
17108 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17112 emit_strmov (rtx destmem, rtx srcmem,
17113 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17115 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17116 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17117 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17120 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17122 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17123 rtx destptr, rtx srcptr, rtx count, int max_size)
17126 if (CONST_INT_P (count))
17128 HOST_WIDE_INT countval = INTVAL (count);
17131 if ((countval & 0x10) && max_size > 16)
17135 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17136 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17139 gcc_unreachable ();
17142 if ((countval & 0x08) && max_size > 8)
17145 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17148 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17149 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17153 if ((countval & 0x04) && max_size > 4)
17155 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17158 if ((countval & 0x02) && max_size > 2)
17160 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17163 if ((countval & 0x01) && max_size > 1)
17165 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17172 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17173 count, 1, OPTAB_DIRECT);
17174 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17175 count, QImode, 1, 4);
17179 /* When there are stringops, we can cheaply increase dest and src pointers.
17180 Otherwise we save code size by maintaining offset (zero is readily
17181 available from preceding rep operation) and using x86 addressing modes.
17183 if (TARGET_SINGLE_STRINGOP)
17187 rtx label = ix86_expand_aligntest (count, 4, true);
17188 src = change_address (srcmem, SImode, srcptr);
17189 dest = change_address (destmem, SImode, destptr);
17190 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17191 emit_label (label);
17192 LABEL_NUSES (label) = 1;
17196 rtx label = ix86_expand_aligntest (count, 2, true);
17197 src = change_address (srcmem, HImode, srcptr);
17198 dest = change_address (destmem, HImode, destptr);
17199 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17200 emit_label (label);
17201 LABEL_NUSES (label) = 1;
17205 rtx label = ix86_expand_aligntest (count, 1, true);
17206 src = change_address (srcmem, QImode, srcptr);
17207 dest = change_address (destmem, QImode, destptr);
17208 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17209 emit_label (label);
17210 LABEL_NUSES (label) = 1;
17215 rtx offset = force_reg (Pmode, const0_rtx);
17220 rtx label = ix86_expand_aligntest (count, 4, true);
17221 src = change_address (srcmem, SImode, srcptr);
17222 dest = change_address (destmem, SImode, destptr);
17223 emit_move_insn (dest, src);
17224 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17225 true, OPTAB_LIB_WIDEN);
17227 emit_move_insn (offset, tmp);
17228 emit_label (label);
17229 LABEL_NUSES (label) = 1;
17233 rtx label = ix86_expand_aligntest (count, 2, true);
17234 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17235 src = change_address (srcmem, HImode, tmp);
17236 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17237 dest = change_address (destmem, HImode, tmp);
17238 emit_move_insn (dest, src);
17239 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17240 true, OPTAB_LIB_WIDEN);
17242 emit_move_insn (offset, tmp);
17243 emit_label (label);
17244 LABEL_NUSES (label) = 1;
17248 rtx label = ix86_expand_aligntest (count, 1, true);
17249 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17250 src = change_address (srcmem, QImode, tmp);
17251 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17252 dest = change_address (destmem, QImode, tmp);
17253 emit_move_insn (dest, src);
17254 emit_label (label);
17255 LABEL_NUSES (label) = 1;
17260 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17262 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17263 rtx count, int max_size)
17266 expand_simple_binop (counter_mode (count), AND, count,
17267 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17268 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17269 gen_lowpart (QImode, value), count, QImode,
17273 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17275 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17279 if (CONST_INT_P (count))
17281 HOST_WIDE_INT countval = INTVAL (count);
17284 if ((countval & 0x10) && max_size > 16)
17288 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17289 emit_insn (gen_strset (destptr, dest, value));
17290 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17291 emit_insn (gen_strset (destptr, dest, value));
17294 gcc_unreachable ();
17297 if ((countval & 0x08) && max_size > 8)
17301 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17302 emit_insn (gen_strset (destptr, dest, value));
17306 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17307 emit_insn (gen_strset (destptr, dest, value));
17308 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17309 emit_insn (gen_strset (destptr, dest, value));
17313 if ((countval & 0x04) && max_size > 4)
17315 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17316 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17319 if ((countval & 0x02) && max_size > 2)
17321 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17322 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17325 if ((countval & 0x01) && max_size > 1)
17327 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17328 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17335 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17340 rtx label = ix86_expand_aligntest (count, 16, true);
17343 dest = change_address (destmem, DImode, destptr);
17344 emit_insn (gen_strset (destptr, dest, value));
17345 emit_insn (gen_strset (destptr, dest, value));
17349 dest = change_address (destmem, SImode, destptr);
17350 emit_insn (gen_strset (destptr, dest, value));
17351 emit_insn (gen_strset (destptr, dest, value));
17352 emit_insn (gen_strset (destptr, dest, value));
17353 emit_insn (gen_strset (destptr, dest, value));
17355 emit_label (label);
17356 LABEL_NUSES (label) = 1;
17360 rtx label = ix86_expand_aligntest (count, 8, true);
17363 dest = change_address (destmem, DImode, destptr);
17364 emit_insn (gen_strset (destptr, dest, value));
17368 dest = change_address (destmem, SImode, destptr);
17369 emit_insn (gen_strset (destptr, dest, value));
17370 emit_insn (gen_strset (destptr, dest, value));
17372 emit_label (label);
17373 LABEL_NUSES (label) = 1;
17377 rtx label = ix86_expand_aligntest (count, 4, true);
17378 dest = change_address (destmem, SImode, destptr);
17379 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17380 emit_label (label);
17381 LABEL_NUSES (label) = 1;
17385 rtx label = ix86_expand_aligntest (count, 2, true);
17386 dest = change_address (destmem, HImode, destptr);
17387 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17388 emit_label (label);
17389 LABEL_NUSES (label) = 1;
17393 rtx label = ix86_expand_aligntest (count, 1, true);
17394 dest = change_address (destmem, QImode, destptr);
17395 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17396 emit_label (label);
17397 LABEL_NUSES (label) = 1;
17401 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17402 DESIRED_ALIGNMENT. */
17404 expand_movmem_prologue (rtx destmem, rtx srcmem,
17405 rtx destptr, rtx srcptr, rtx count,
17406 int align, int desired_alignment)
17408 if (align <= 1 && desired_alignment > 1)
17410 rtx label = ix86_expand_aligntest (destptr, 1, false);
17411 srcmem = change_address (srcmem, QImode, srcptr);
17412 destmem = change_address (destmem, QImode, destptr);
17413 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17414 ix86_adjust_counter (count, 1);
17415 emit_label (label);
17416 LABEL_NUSES (label) = 1;
17418 if (align <= 2 && desired_alignment > 2)
17420 rtx label = ix86_expand_aligntest (destptr, 2, false);
17421 srcmem = change_address (srcmem, HImode, srcptr);
17422 destmem = change_address (destmem, HImode, destptr);
17423 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17424 ix86_adjust_counter (count, 2);
17425 emit_label (label);
17426 LABEL_NUSES (label) = 1;
17428 if (align <= 4 && desired_alignment > 4)
17430 rtx label = ix86_expand_aligntest (destptr, 4, false);
17431 srcmem = change_address (srcmem, SImode, srcptr);
17432 destmem = change_address (destmem, SImode, destptr);
17433 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17434 ix86_adjust_counter (count, 4);
17435 emit_label (label);
17436 LABEL_NUSES (label) = 1;
17438 gcc_assert (desired_alignment <= 8);
17441 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17442 ALIGN_BYTES is how many bytes need to be copied. */
17444 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17445 int desired_align, int align_bytes)
17448 rtx src_size, dst_size;
17450 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17451 if (src_align_bytes >= 0)
17452 src_align_bytes = desired_align - src_align_bytes;
17453 src_size = MEM_SIZE (src);
17454 dst_size = MEM_SIZE (dst);
17455 if (align_bytes & 1)
17457 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17458 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17460 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17462 if (align_bytes & 2)
17464 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17465 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17466 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17467 set_mem_align (dst, 2 * BITS_PER_UNIT);
17468 if (src_align_bytes >= 0
17469 && (src_align_bytes & 1) == (align_bytes & 1)
17470 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17471 set_mem_align (src, 2 * BITS_PER_UNIT);
17473 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17475 if (align_bytes & 4)
17477 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17478 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17479 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17480 set_mem_align (dst, 4 * BITS_PER_UNIT);
17481 if (src_align_bytes >= 0)
17483 unsigned int src_align = 0;
17484 if ((src_align_bytes & 3) == (align_bytes & 3))
17486 else if ((src_align_bytes & 1) == (align_bytes & 1))
17488 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17489 set_mem_align (src, src_align * BITS_PER_UNIT);
17492 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17494 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17495 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17496 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17497 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17498 if (src_align_bytes >= 0)
17500 unsigned int src_align = 0;
17501 if ((src_align_bytes & 7) == (align_bytes & 7))
17503 else if ((src_align_bytes & 3) == (align_bytes & 3))
17505 else if ((src_align_bytes & 1) == (align_bytes & 1))
17507 if (src_align > (unsigned int) desired_align)
17508 src_align = desired_align;
17509 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17510 set_mem_align (src, src_align * BITS_PER_UNIT);
17513 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17515 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17520 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17521 DESIRED_ALIGNMENT. */
17523 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17524 int align, int desired_alignment)
17526 if (align <= 1 && desired_alignment > 1)
17528 rtx label = ix86_expand_aligntest (destptr, 1, false);
17529 destmem = change_address (destmem, QImode, destptr);
17530 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17531 ix86_adjust_counter (count, 1);
17532 emit_label (label);
17533 LABEL_NUSES (label) = 1;
17535 if (align <= 2 && desired_alignment > 2)
17537 rtx label = ix86_expand_aligntest (destptr, 2, false);
17538 destmem = change_address (destmem, HImode, destptr);
17539 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17540 ix86_adjust_counter (count, 2);
17541 emit_label (label);
17542 LABEL_NUSES (label) = 1;
17544 if (align <= 4 && desired_alignment > 4)
17546 rtx label = ix86_expand_aligntest (destptr, 4, false);
17547 destmem = change_address (destmem, SImode, destptr);
17548 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17549 ix86_adjust_counter (count, 4);
17550 emit_label (label);
17551 LABEL_NUSES (label) = 1;
17553 gcc_assert (desired_alignment <= 8);
17556 /* Set enough from DST to align DST known to by aligned by ALIGN to
17557 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17559 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17560 int desired_align, int align_bytes)
17563 rtx dst_size = MEM_SIZE (dst);
17564 if (align_bytes & 1)
17566 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17568 emit_insn (gen_strset (destreg, dst,
17569 gen_lowpart (QImode, value)));
17571 if (align_bytes & 2)
17573 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17574 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17575 set_mem_align (dst, 2 * BITS_PER_UNIT);
17577 emit_insn (gen_strset (destreg, dst,
17578 gen_lowpart (HImode, value)));
17580 if (align_bytes & 4)
17582 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17583 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17584 set_mem_align (dst, 4 * BITS_PER_UNIT);
17586 emit_insn (gen_strset (destreg, dst,
17587 gen_lowpart (SImode, value)));
17589 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17590 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17591 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17593 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17597 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17598 static enum stringop_alg
17599 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17600 int *dynamic_check)
17602 const struct stringop_algs * algs;
17603 bool optimize_for_speed;
17604 /* Algorithms using the rep prefix want at least edi and ecx;
17605 additionally, memset wants eax and memcpy wants esi. Don't
17606 consider such algorithms if the user has appropriated those
17607 registers for their own purposes. */
17608 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17610 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17612 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17613 || (alg != rep_prefix_1_byte \
17614 && alg != rep_prefix_4_byte \
17615 && alg != rep_prefix_8_byte))
17616 const struct processor_costs *cost;
17618 /* Even if the string operation call is cold, we still might spend a lot
17619 of time processing large blocks. */
17620 if (optimize_function_for_size_p (cfun)
17621 || (optimize_insn_for_size_p ()
17622 && expected_size != -1 && expected_size < 256))
17623 optimize_for_speed = false;
17625 optimize_for_speed = true;
17627 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17629 *dynamic_check = -1;
17631 algs = &cost->memset[TARGET_64BIT != 0];
17633 algs = &cost->memcpy[TARGET_64BIT != 0];
17634 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17635 return stringop_alg;
17636 /* rep; movq or rep; movl is the smallest variant. */
17637 else if (!optimize_for_speed)
17639 if (!count || (count & 3))
17640 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17642 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17644 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17646 else if (expected_size != -1 && expected_size < 4)
17647 return loop_1_byte;
17648 else if (expected_size != -1)
17651 enum stringop_alg alg = libcall;
17652 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17654 /* We get here if the algorithms that were not libcall-based
17655 were rep-prefix based and we are unable to use rep prefixes
17656 based on global register usage. Break out of the loop and
17657 use the heuristic below. */
17658 if (algs->size[i].max == 0)
17660 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17662 enum stringop_alg candidate = algs->size[i].alg;
17664 if (candidate != libcall && ALG_USABLE_P (candidate))
17666 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17667 last non-libcall inline algorithm. */
17668 if (TARGET_INLINE_ALL_STRINGOPS)
17670 /* When the current size is best to be copied by a libcall,
17671 but we are still forced to inline, run the heuristic below
17672 that will pick code for medium sized blocks. */
17673 if (alg != libcall)
17677 else if (ALG_USABLE_P (candidate))
17681 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17683 /* When asked to inline the call anyway, try to pick meaningful choice.
17684 We look for maximal size of block that is faster to copy by hand and
17685 take blocks of at most of that size guessing that average size will
17686 be roughly half of the block.
17688 If this turns out to be bad, we might simply specify the preferred
17689 choice in ix86_costs. */
17690 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17691 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17694 enum stringop_alg alg;
17696 bool any_alg_usable_p = true;
17698 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17700 enum stringop_alg candidate = algs->size[i].alg;
17701 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17703 if (candidate != libcall && candidate
17704 && ALG_USABLE_P (candidate))
17705 max = algs->size[i].max;
17707 /* If there aren't any usable algorithms, then recursing on
17708 smaller sizes isn't going to find anything. Just return the
17709 simple byte-at-a-time copy loop. */
17710 if (!any_alg_usable_p)
17712 /* Pick something reasonable. */
17713 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17714 *dynamic_check = 128;
17715 return loop_1_byte;
17719 alg = decide_alg (count, max / 2, memset, dynamic_check);
17720 gcc_assert (*dynamic_check == -1);
17721 gcc_assert (alg != libcall);
17722 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17723 *dynamic_check = max;
17726 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17727 #undef ALG_USABLE_P
17730 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17731 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17733 decide_alignment (int align,
17734 enum stringop_alg alg,
17737 int desired_align = 0;
17741 gcc_unreachable ();
17743 case unrolled_loop:
17744 desired_align = GET_MODE_SIZE (Pmode);
17746 case rep_prefix_8_byte:
17749 case rep_prefix_4_byte:
17750 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17751 copying whole cacheline at once. */
17752 if (TARGET_PENTIUMPRO)
17757 case rep_prefix_1_byte:
17758 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17759 copying whole cacheline at once. */
17760 if (TARGET_PENTIUMPRO)
17774 if (desired_align < align)
17775 desired_align = align;
17776 if (expected_size != -1 && expected_size < 4)
17777 desired_align = align;
17778 return desired_align;
17781 /* Return the smallest power of 2 greater than VAL. */
17783 smallest_pow2_greater_than (int val)
17791 /* Expand string move (memcpy) operation. Use i386 string operations when
17792 profitable. expand_setmem contains similar code. The code depends upon
17793 architecture, block size and alignment, but always has the same
17796 1) Prologue guard: Conditional that jumps up to epilogues for small
17797 blocks that can be handled by epilogue alone. This is faster but
17798 also needed for correctness, since prologue assume the block is larger
17799 than the desired alignment.
17801 Optional dynamic check for size and libcall for large
17802 blocks is emitted here too, with -minline-stringops-dynamically.
17804 2) Prologue: copy first few bytes in order to get destination aligned
17805 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17806 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17807 We emit either a jump tree on power of two sized blocks, or a byte loop.
17809 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17810 with specified algorithm.
17812 4) Epilogue: code copying tail of the block that is too small to be
17813 handled by main body (or up to size guarded by prologue guard). */
17816 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17817 rtx expected_align_exp, rtx expected_size_exp)
17823 rtx jump_around_label = NULL;
17824 HOST_WIDE_INT align = 1;
17825 unsigned HOST_WIDE_INT count = 0;
17826 HOST_WIDE_INT expected_size = -1;
17827 int size_needed = 0, epilogue_size_needed;
17828 int desired_align = 0, align_bytes = 0;
17829 enum stringop_alg alg;
17831 bool need_zero_guard = false;
17833 if (CONST_INT_P (align_exp))
17834 align = INTVAL (align_exp);
17835 /* i386 can do misaligned access on reasonably increased cost. */
17836 if (CONST_INT_P (expected_align_exp)
17837 && INTVAL (expected_align_exp) > align)
17838 align = INTVAL (expected_align_exp);
17839 /* ALIGN is the minimum of destination and source alignment, but we care here
17840 just about destination alignment. */
17841 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17842 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17844 if (CONST_INT_P (count_exp))
17845 count = expected_size = INTVAL (count_exp);
17846 if (CONST_INT_P (expected_size_exp) && count == 0)
17847 expected_size = INTVAL (expected_size_exp);
17849 /* Make sure we don't need to care about overflow later on. */
17850 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17853 /* Step 0: Decide on preferred algorithm, desired alignment and
17854 size of chunks to be copied by main loop. */
17856 alg = decide_alg (count, expected_size, false, &dynamic_check);
17857 desired_align = decide_alignment (align, alg, expected_size);
17859 if (!TARGET_ALIGN_STRINGOPS)
17860 align = desired_align;
17862 if (alg == libcall)
17864 gcc_assert (alg != no_stringop);
17866 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17867 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17868 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17873 gcc_unreachable ();
17875 need_zero_guard = true;
17876 size_needed = GET_MODE_SIZE (Pmode);
17878 case unrolled_loop:
17879 need_zero_guard = true;
17880 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17882 case rep_prefix_8_byte:
17885 case rep_prefix_4_byte:
17888 case rep_prefix_1_byte:
17892 need_zero_guard = true;
17897 epilogue_size_needed = size_needed;
17899 /* Step 1: Prologue guard. */
17901 /* Alignment code needs count to be in register. */
17902 if (CONST_INT_P (count_exp) && desired_align > align)
17904 if (INTVAL (count_exp) > desired_align
17905 && INTVAL (count_exp) > size_needed)
17908 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17909 if (align_bytes <= 0)
17912 align_bytes = desired_align - align_bytes;
17914 if (align_bytes == 0)
17915 count_exp = force_reg (counter_mode (count_exp), count_exp);
17917 gcc_assert (desired_align >= 1 && align >= 1);
17919 /* Ensure that alignment prologue won't copy past end of block. */
17920 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17922 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17923 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17924 Make sure it is power of 2. */
17925 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17929 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17931 /* If main algorithm works on QImode, no epilogue is needed.
17932 For small sizes just don't align anything. */
17933 if (size_needed == 1)
17934 desired_align = align;
17941 label = gen_label_rtx ();
17942 emit_cmp_and_jump_insns (count_exp,
17943 GEN_INT (epilogue_size_needed),
17944 LTU, 0, counter_mode (count_exp), 1, label);
17945 if (expected_size == -1 || expected_size < epilogue_size_needed)
17946 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17948 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17952 /* Emit code to decide on runtime whether library call or inline should be
17954 if (dynamic_check != -1)
17956 if (CONST_INT_P (count_exp))
17958 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17960 emit_block_move_via_libcall (dst, src, count_exp, false);
17961 count_exp = const0_rtx;
17967 rtx hot_label = gen_label_rtx ();
17968 jump_around_label = gen_label_rtx ();
17969 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17970 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17971 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17972 emit_block_move_via_libcall (dst, src, count_exp, false);
17973 emit_jump (jump_around_label);
17974 emit_label (hot_label);
17978 /* Step 2: Alignment prologue. */
17980 if (desired_align > align)
17982 if (align_bytes == 0)
17984 /* Except for the first move in epilogue, we no longer know
17985 constant offset in aliasing info. It don't seems to worth
17986 the pain to maintain it for the first move, so throw away
17988 src = change_address (src, BLKmode, srcreg);
17989 dst = change_address (dst, BLKmode, destreg);
17990 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17995 /* If we know how many bytes need to be stored before dst is
17996 sufficiently aligned, maintain aliasing info accurately. */
17997 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
17998 desired_align, align_bytes);
17999 count_exp = plus_constant (count_exp, -align_bytes);
18000 count -= align_bytes;
18002 if (need_zero_guard
18003 && (count < (unsigned HOST_WIDE_INT) size_needed
18004 || (align_bytes == 0
18005 && count < ((unsigned HOST_WIDE_INT) size_needed
18006 + desired_align - align))))
18008 /* It is possible that we copied enough so the main loop will not
18010 gcc_assert (size_needed > 1);
18011 if (label == NULL_RTX)
18012 label = gen_label_rtx ();
18013 emit_cmp_and_jump_insns (count_exp,
18014 GEN_INT (size_needed),
18015 LTU, 0, counter_mode (count_exp), 1, label);
18016 if (expected_size == -1
18017 || expected_size < (desired_align - align) / 2 + size_needed)
18018 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18020 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18023 if (label && size_needed == 1)
18025 emit_label (label);
18026 LABEL_NUSES (label) = 1;
18028 epilogue_size_needed = 1;
18030 else if (label == NULL_RTX)
18031 epilogue_size_needed = size_needed;
18033 /* Step 3: Main loop. */
18039 gcc_unreachable ();
18041 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18042 count_exp, QImode, 1, expected_size);
18045 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18046 count_exp, Pmode, 1, expected_size);
18048 case unrolled_loop:
18049 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18050 registers for 4 temporaries anyway. */
18051 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18052 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18055 case rep_prefix_8_byte:
18056 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18059 case rep_prefix_4_byte:
18060 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18063 case rep_prefix_1_byte:
18064 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18068 /* Adjust properly the offset of src and dest memory for aliasing. */
18069 if (CONST_INT_P (count_exp))
18071 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18072 (count / size_needed) * size_needed);
18073 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18074 (count / size_needed) * size_needed);
18078 src = change_address (src, BLKmode, srcreg);
18079 dst = change_address (dst, BLKmode, destreg);
18082 /* Step 4: Epilogue to copy the remaining bytes. */
18086 /* When the main loop is done, COUNT_EXP might hold original count,
18087 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18088 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18089 bytes. Compensate if needed. */
18091 if (size_needed < epilogue_size_needed)
18094 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18095 GEN_INT (size_needed - 1), count_exp, 1,
18097 if (tmp != count_exp)
18098 emit_move_insn (count_exp, tmp);
18100 emit_label (label);
18101 LABEL_NUSES (label) = 1;
18104 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18105 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18106 epilogue_size_needed);
18107 if (jump_around_label)
18108 emit_label (jump_around_label);
18112 /* Helper function for memcpy. For QImode value 0xXY produce
18113 0xXYXYXYXY of wide specified by MODE. This is essentially
18114 a * 0x10101010, but we can do slightly better than
18115 synth_mult by unwinding the sequence by hand on CPUs with
18118 promote_duplicated_reg (enum machine_mode mode, rtx val)
18120 enum machine_mode valmode = GET_MODE (val);
18122 int nops = mode == DImode ? 3 : 2;
18124 gcc_assert (mode == SImode || mode == DImode);
18125 if (val == const0_rtx)
18126 return copy_to_mode_reg (mode, const0_rtx);
18127 if (CONST_INT_P (val))
18129 HOST_WIDE_INT v = INTVAL (val) & 255;
18133 if (mode == DImode)
18134 v |= (v << 16) << 16;
18135 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18138 if (valmode == VOIDmode)
18140 if (valmode != QImode)
18141 val = gen_lowpart (QImode, val);
18142 if (mode == QImode)
18144 if (!TARGET_PARTIAL_REG_STALL)
18146 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18147 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18148 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18149 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18151 rtx reg = convert_modes (mode, QImode, val, true);
18152 tmp = promote_duplicated_reg (mode, const1_rtx);
18153 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18158 rtx reg = convert_modes (mode, QImode, val, true);
18160 if (!TARGET_PARTIAL_REG_STALL)
18161 if (mode == SImode)
18162 emit_insn (gen_movsi_insv_1 (reg, reg));
18164 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18167 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18168 NULL, 1, OPTAB_DIRECT);
18170 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18172 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18173 NULL, 1, OPTAB_DIRECT);
18174 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18175 if (mode == SImode)
18177 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18178 NULL, 1, OPTAB_DIRECT);
18179 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18184 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18185 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18186 alignment from ALIGN to DESIRED_ALIGN. */
18188 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18193 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18194 promoted_val = promote_duplicated_reg (DImode, val);
18195 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18196 promoted_val = promote_duplicated_reg (SImode, val);
18197 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18198 promoted_val = promote_duplicated_reg (HImode, val);
18200 promoted_val = val;
18202 return promoted_val;
18205 /* Expand string clear operation (bzero). Use i386 string operations when
18206 profitable. See expand_movmem comment for explanation of individual
18207 steps performed. */
18209 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18210 rtx expected_align_exp, rtx expected_size_exp)
18215 rtx jump_around_label = NULL;
18216 HOST_WIDE_INT align = 1;
18217 unsigned HOST_WIDE_INT count = 0;
18218 HOST_WIDE_INT expected_size = -1;
18219 int size_needed = 0, epilogue_size_needed;
18220 int desired_align = 0, align_bytes = 0;
18221 enum stringop_alg alg;
18222 rtx promoted_val = NULL;
18223 bool force_loopy_epilogue = false;
18225 bool need_zero_guard = false;
18227 if (CONST_INT_P (align_exp))
18228 align = INTVAL (align_exp);
18229 /* i386 can do misaligned access on reasonably increased cost. */
18230 if (CONST_INT_P (expected_align_exp)
18231 && INTVAL (expected_align_exp) > align)
18232 align = INTVAL (expected_align_exp);
18233 if (CONST_INT_P (count_exp))
18234 count = expected_size = INTVAL (count_exp);
18235 if (CONST_INT_P (expected_size_exp) && count == 0)
18236 expected_size = INTVAL (expected_size_exp);
18238 /* Make sure we don't need to care about overflow later on. */
18239 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18242 /* Step 0: Decide on preferred algorithm, desired alignment and
18243 size of chunks to be copied by main loop. */
18245 alg = decide_alg (count, expected_size, true, &dynamic_check);
18246 desired_align = decide_alignment (align, alg, expected_size);
18248 if (!TARGET_ALIGN_STRINGOPS)
18249 align = desired_align;
18251 if (alg == libcall)
18253 gcc_assert (alg != no_stringop);
18255 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18256 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18261 gcc_unreachable ();
18263 need_zero_guard = true;
18264 size_needed = GET_MODE_SIZE (Pmode);
18266 case unrolled_loop:
18267 need_zero_guard = true;
18268 size_needed = GET_MODE_SIZE (Pmode) * 4;
18270 case rep_prefix_8_byte:
18273 case rep_prefix_4_byte:
18276 case rep_prefix_1_byte:
18280 need_zero_guard = true;
18284 epilogue_size_needed = size_needed;
18286 /* Step 1: Prologue guard. */
18288 /* Alignment code needs count to be in register. */
18289 if (CONST_INT_P (count_exp) && desired_align > align)
18291 if (INTVAL (count_exp) > desired_align
18292 && INTVAL (count_exp) > size_needed)
18295 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18296 if (align_bytes <= 0)
18299 align_bytes = desired_align - align_bytes;
18301 if (align_bytes == 0)
18303 enum machine_mode mode = SImode;
18304 if (TARGET_64BIT && (count & ~0xffffffff))
18306 count_exp = force_reg (mode, count_exp);
18309 /* Do the cheap promotion to allow better CSE across the
18310 main loop and epilogue (ie one load of the big constant in the
18311 front of all code. */
18312 if (CONST_INT_P (val_exp))
18313 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18314 desired_align, align);
18315 /* Ensure that alignment prologue won't copy past end of block. */
18316 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18318 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18319 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18320 Make sure it is power of 2. */
18321 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18323 /* To improve performance of small blocks, we jump around the VAL
18324 promoting mode. This mean that if the promoted VAL is not constant,
18325 we might not use it in the epilogue and have to use byte
18327 if (epilogue_size_needed > 2 && !promoted_val)
18328 force_loopy_epilogue = true;
18331 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18333 /* If main algorithm works on QImode, no epilogue is needed.
18334 For small sizes just don't align anything. */
18335 if (size_needed == 1)
18336 desired_align = align;
18343 label = gen_label_rtx ();
18344 emit_cmp_and_jump_insns (count_exp,
18345 GEN_INT (epilogue_size_needed),
18346 LTU, 0, counter_mode (count_exp), 1, label);
18347 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18348 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18350 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18353 if (dynamic_check != -1)
18355 rtx hot_label = gen_label_rtx ();
18356 jump_around_label = gen_label_rtx ();
18357 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18358 LEU, 0, counter_mode (count_exp), 1, hot_label);
18359 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18360 set_storage_via_libcall (dst, count_exp, val_exp, false);
18361 emit_jump (jump_around_label);
18362 emit_label (hot_label);
18365 /* Step 2: Alignment prologue. */
18367 /* Do the expensive promotion once we branched off the small blocks. */
18369 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18370 desired_align, align);
18371 gcc_assert (desired_align >= 1 && align >= 1);
18373 if (desired_align > align)
18375 if (align_bytes == 0)
18377 /* Except for the first move in epilogue, we no longer know
18378 constant offset in aliasing info. It don't seems to worth
18379 the pain to maintain it for the first move, so throw away
18381 dst = change_address (dst, BLKmode, destreg);
18382 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18387 /* If we know how many bytes need to be stored before dst is
18388 sufficiently aligned, maintain aliasing info accurately. */
18389 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18390 desired_align, align_bytes);
18391 count_exp = plus_constant (count_exp, -align_bytes);
18392 count -= align_bytes;
18394 if (need_zero_guard
18395 && (count < (unsigned HOST_WIDE_INT) size_needed
18396 || (align_bytes == 0
18397 && count < ((unsigned HOST_WIDE_INT) size_needed
18398 + desired_align - align))))
18400 /* It is possible that we copied enough so the main loop will not
18402 gcc_assert (size_needed > 1);
18403 if (label == NULL_RTX)
18404 label = gen_label_rtx ();
18405 emit_cmp_and_jump_insns (count_exp,
18406 GEN_INT (size_needed),
18407 LTU, 0, counter_mode (count_exp), 1, label);
18408 if (expected_size == -1
18409 || expected_size < (desired_align - align) / 2 + size_needed)
18410 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18412 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18415 if (label && size_needed == 1)
18417 emit_label (label);
18418 LABEL_NUSES (label) = 1;
18420 promoted_val = val_exp;
18421 epilogue_size_needed = 1;
18423 else if (label == NULL_RTX)
18424 epilogue_size_needed = size_needed;
18426 /* Step 3: Main loop. */
18432 gcc_unreachable ();
18434 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18435 count_exp, QImode, 1, expected_size);
18438 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18439 count_exp, Pmode, 1, expected_size);
18441 case unrolled_loop:
18442 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18443 count_exp, Pmode, 4, expected_size);
18445 case rep_prefix_8_byte:
18446 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18449 case rep_prefix_4_byte:
18450 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18453 case rep_prefix_1_byte:
18454 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18458 /* Adjust properly the offset of src and dest memory for aliasing. */
18459 if (CONST_INT_P (count_exp))
18460 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18461 (count / size_needed) * size_needed);
18463 dst = change_address (dst, BLKmode, destreg);
18465 /* Step 4: Epilogue to copy the remaining bytes. */
18469 /* When the main loop is done, COUNT_EXP might hold original count,
18470 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18471 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18472 bytes. Compensate if needed. */
18474 if (size_needed < epilogue_size_needed)
18477 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18478 GEN_INT (size_needed - 1), count_exp, 1,
18480 if (tmp != count_exp)
18481 emit_move_insn (count_exp, tmp);
18483 emit_label (label);
18484 LABEL_NUSES (label) = 1;
18487 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18489 if (force_loopy_epilogue)
18490 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18491 epilogue_size_needed);
18493 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18494 epilogue_size_needed);
18496 if (jump_around_label)
18497 emit_label (jump_around_label);
18501 /* Expand the appropriate insns for doing strlen if not just doing
18504 out = result, initialized with the start address
18505 align_rtx = alignment of the address.
18506 scratch = scratch register, initialized with the startaddress when
18507 not aligned, otherwise undefined
18509 This is just the body. It needs the initializations mentioned above and
18510 some address computing at the end. These things are done in i386.md. */
18513 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18517 rtx align_2_label = NULL_RTX;
18518 rtx align_3_label = NULL_RTX;
18519 rtx align_4_label = gen_label_rtx ();
18520 rtx end_0_label = gen_label_rtx ();
18522 rtx tmpreg = gen_reg_rtx (SImode);
18523 rtx scratch = gen_reg_rtx (SImode);
18527 if (CONST_INT_P (align_rtx))
18528 align = INTVAL (align_rtx);
18530 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18532 /* Is there a known alignment and is it less than 4? */
18535 rtx scratch1 = gen_reg_rtx (Pmode);
18536 emit_move_insn (scratch1, out);
18537 /* Is there a known alignment and is it not 2? */
18540 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18541 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18543 /* Leave just the 3 lower bits. */
18544 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18545 NULL_RTX, 0, OPTAB_WIDEN);
18547 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18548 Pmode, 1, align_4_label);
18549 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18550 Pmode, 1, align_2_label);
18551 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18552 Pmode, 1, align_3_label);
18556 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18557 check if is aligned to 4 - byte. */
18559 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18560 NULL_RTX, 0, OPTAB_WIDEN);
18562 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18563 Pmode, 1, align_4_label);
18566 mem = change_address (src, QImode, out);
18568 /* Now compare the bytes. */
18570 /* Compare the first n unaligned byte on a byte per byte basis. */
18571 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18572 QImode, 1, end_0_label);
18574 /* Increment the address. */
18575 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18577 /* Not needed with an alignment of 2 */
18580 emit_label (align_2_label);
18582 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18585 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18587 emit_label (align_3_label);
18590 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18593 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18596 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18597 align this loop. It gives only huge programs, but does not help to
18599 emit_label (align_4_label);
18601 mem = change_address (src, SImode, out);
18602 emit_move_insn (scratch, mem);
18603 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18605 /* This formula yields a nonzero result iff one of the bytes is zero.
18606 This saves three branches inside loop and many cycles. */
18608 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18609 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18610 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18611 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18612 gen_int_mode (0x80808080, SImode)));
18613 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18618 rtx reg = gen_reg_rtx (SImode);
18619 rtx reg2 = gen_reg_rtx (Pmode);
18620 emit_move_insn (reg, tmpreg);
18621 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18623 /* If zero is not in the first two bytes, move two bytes forward. */
18624 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18625 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18626 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18627 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18628 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18631 /* Emit lea manually to avoid clobbering of flags. */
18632 emit_insn (gen_rtx_SET (SImode, reg2,
18633 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18635 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18636 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18637 emit_insn (gen_rtx_SET (VOIDmode, out,
18638 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18645 rtx end_2_label = gen_label_rtx ();
18646 /* Is zero in the first two bytes? */
18648 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18649 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18650 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18651 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18652 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18654 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18655 JUMP_LABEL (tmp) = end_2_label;
18657 /* Not in the first two. Move two bytes forward. */
18658 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18659 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18661 emit_label (end_2_label);
18665 /* Avoid branch in fixing the byte. */
18666 tmpreg = gen_lowpart (QImode, tmpreg);
18667 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18668 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18669 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18671 emit_label (end_0_label);
18674 /* Expand strlen. */
18677 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18679 rtx addr, scratch1, scratch2, scratch3, scratch4;
18681 /* The generic case of strlen expander is long. Avoid it's
18682 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18684 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18685 && !TARGET_INLINE_ALL_STRINGOPS
18686 && !optimize_insn_for_size_p ()
18687 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18690 addr = force_reg (Pmode, XEXP (src, 0));
18691 scratch1 = gen_reg_rtx (Pmode);
18693 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18694 && !optimize_insn_for_size_p ())
18696 /* Well it seems that some optimizer does not combine a call like
18697 foo(strlen(bar), strlen(bar));
18698 when the move and the subtraction is done here. It does calculate
18699 the length just once when these instructions are done inside of
18700 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18701 often used and I use one fewer register for the lifetime of
18702 output_strlen_unroll() this is better. */
18704 emit_move_insn (out, addr);
18706 ix86_expand_strlensi_unroll_1 (out, src, align);
18708 /* strlensi_unroll_1 returns the address of the zero at the end of
18709 the string, like memchr(), so compute the length by subtracting
18710 the start address. */
18711 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18717 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18718 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18721 scratch2 = gen_reg_rtx (Pmode);
18722 scratch3 = gen_reg_rtx (Pmode);
18723 scratch4 = force_reg (Pmode, constm1_rtx);
18725 emit_move_insn (scratch3, addr);
18726 eoschar = force_reg (QImode, eoschar);
18728 src = replace_equiv_address_nv (src, scratch3);
18730 /* If .md starts supporting :P, this can be done in .md. */
18731 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18732 scratch4), UNSPEC_SCAS);
18733 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18734 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18735 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18740 /* For given symbol (function) construct code to compute address of it's PLT
18741 entry in large x86-64 PIC model. */
18743 construct_plt_address (rtx symbol)
18745 rtx tmp = gen_reg_rtx (Pmode);
18746 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18748 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18749 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18751 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18752 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18757 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18759 rtx pop, int sibcall)
18761 rtx use = NULL, call;
18763 if (pop == const0_rtx)
18765 gcc_assert (!TARGET_64BIT || !pop);
18767 if (TARGET_MACHO && !TARGET_64BIT)
18770 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18771 fnaddr = machopic_indirect_call_target (fnaddr);
18776 /* Static functions and indirect calls don't need the pic register. */
18777 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18778 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18779 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18780 use_reg (&use, pic_offset_table_rtx);
18783 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18785 rtx al = gen_rtx_REG (QImode, AX_REG);
18786 emit_move_insn (al, callarg2);
18787 use_reg (&use, al);
18790 if (ix86_cmodel == CM_LARGE_PIC
18791 && GET_CODE (fnaddr) == MEM
18792 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18793 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18794 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18795 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18797 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18798 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18800 if (sibcall && TARGET_64BIT
18801 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18804 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18805 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18806 emit_move_insn (fnaddr, addr);
18807 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18810 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18812 call = gen_rtx_SET (VOIDmode, retval, call);
18815 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18816 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18817 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18820 && ix86_cfun_abi () == MS_ABI
18821 && (!callarg2 || INTVAL (callarg2) != -2))
18823 /* We need to represent that SI and DI registers are clobbered
18825 static int clobbered_registers[] = {
18826 XMM6_REG, XMM7_REG, XMM8_REG,
18827 XMM9_REG, XMM10_REG, XMM11_REG,
18828 XMM12_REG, XMM13_REG, XMM14_REG,
18829 XMM15_REG, SI_REG, DI_REG
18832 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18833 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18834 UNSPEC_MS_TO_SYSV_CALL);
18838 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18839 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18842 (SSE_REGNO_P (clobbered_registers[i])
18844 clobbered_registers[i]));
18846 call = gen_rtx_PARALLEL (VOIDmode,
18847 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18851 call = emit_call_insn (call);
18853 CALL_INSN_FUNCTION_USAGE (call) = use;
18857 /* Clear stack slot assignments remembered from previous functions.
18858 This is called from INIT_EXPANDERS once before RTL is emitted for each
18861 static struct machine_function *
18862 ix86_init_machine_status (void)
18864 struct machine_function *f;
18866 f = GGC_CNEW (struct machine_function);
18867 f->use_fast_prologue_epilogue_nregs = -1;
18868 f->tls_descriptor_call_expanded_p = 0;
18869 f->call_abi = ix86_abi;
18874 /* Return a MEM corresponding to a stack slot with mode MODE.
18875 Allocate a new slot if necessary.
18877 The RTL for a function can have several slots available: N is
18878 which slot to use. */
18881 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18883 struct stack_local_entry *s;
18885 gcc_assert (n < MAX_386_STACK_LOCALS);
18887 /* Virtual slot is valid only before vregs are instantiated. */
18888 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18890 for (s = ix86_stack_locals; s; s = s->next)
18891 if (s->mode == mode && s->n == n)
18892 return copy_rtx (s->rtl);
18894 s = (struct stack_local_entry *)
18895 ggc_alloc (sizeof (struct stack_local_entry));
18898 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18900 s->next = ix86_stack_locals;
18901 ix86_stack_locals = s;
18905 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18907 static GTY(()) rtx ix86_tls_symbol;
18909 ix86_tls_get_addr (void)
18912 if (!ix86_tls_symbol)
18914 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18915 (TARGET_ANY_GNU_TLS
18917 ? "___tls_get_addr"
18918 : "__tls_get_addr");
18921 return ix86_tls_symbol;
18924 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18926 static GTY(()) rtx ix86_tls_module_base_symbol;
18928 ix86_tls_module_base (void)
18931 if (!ix86_tls_module_base_symbol)
18933 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18934 "_TLS_MODULE_BASE_");
18935 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18936 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18939 return ix86_tls_module_base_symbol;
18942 /* Calculate the length of the memory address in the instruction
18943 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18946 memory_address_length (rtx addr)
18948 struct ix86_address parts;
18949 rtx base, index, disp;
18953 if (GET_CODE (addr) == PRE_DEC
18954 || GET_CODE (addr) == POST_INC
18955 || GET_CODE (addr) == PRE_MODIFY
18956 || GET_CODE (addr) == POST_MODIFY)
18959 ok = ix86_decompose_address (addr, &parts);
18962 if (parts.base && GET_CODE (parts.base) == SUBREG)
18963 parts.base = SUBREG_REG (parts.base);
18964 if (parts.index && GET_CODE (parts.index) == SUBREG)
18965 parts.index = SUBREG_REG (parts.index);
18968 index = parts.index;
18973 - esp as the base always wants an index,
18974 - ebp as the base always wants a displacement. */
18976 /* Register Indirect. */
18977 if (base && !index && !disp)
18979 /* esp (for its index) and ebp (for its displacement) need
18980 the two-byte modrm form. */
18981 if (addr == stack_pointer_rtx
18982 || addr == arg_pointer_rtx
18983 || addr == frame_pointer_rtx
18984 || addr == hard_frame_pointer_rtx)
18988 /* Direct Addressing. */
18989 else if (disp && !base && !index)
18994 /* Find the length of the displacement constant. */
18997 if (base && satisfies_constraint_K (disp))
19002 /* ebp always wants a displacement. */
19003 else if (base == hard_frame_pointer_rtx)
19006 /* An index requires the two-byte modrm form.... */
19008 /* ...like esp, which always wants an index. */
19009 || base == stack_pointer_rtx
19010 || base == arg_pointer_rtx
19011 || base == frame_pointer_rtx)
19018 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19019 is set, expect that insn have 8bit immediate alternative. */
19021 ix86_attr_length_immediate_default (rtx insn, int shortform)
19025 extract_insn_cached (insn);
19026 for (i = recog_data.n_operands - 1; i >= 0; --i)
19027 if (CONSTANT_P (recog_data.operand[i]))
19030 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19034 switch (get_attr_mode (insn))
19045 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19050 fatal_insn ("unknown insn mode", insn);
19056 /* Compute default value for "length_address" attribute. */
19058 ix86_attr_length_address_default (rtx insn)
19062 if (get_attr_type (insn) == TYPE_LEA)
19064 rtx set = PATTERN (insn);
19066 if (GET_CODE (set) == PARALLEL)
19067 set = XVECEXP (set, 0, 0);
19069 gcc_assert (GET_CODE (set) == SET);
19071 return memory_address_length (SET_SRC (set));
19074 extract_insn_cached (insn);
19075 for (i = recog_data.n_operands - 1; i >= 0; --i)
19076 if (MEM_P (recog_data.operand[i]))
19078 return memory_address_length (XEXP (recog_data.operand[i], 0));
19084 /* Compute default value for "length_vex" attribute. It includes
19085 2 or 3 byte VEX prefix and 1 opcode byte. */
19088 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19093 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19094 byte VEX prefix. */
19095 if (!has_0f_opcode || has_vex_w)
19098 /* We can always use 2 byte VEX prefix in 32bit. */
19102 extract_insn_cached (insn);
19104 for (i = recog_data.n_operands - 1; i >= 0; --i)
19105 if (REG_P (recog_data.operand[i]))
19107 /* REX.W bit uses 3 byte VEX prefix. */
19108 if (GET_MODE (recog_data.operand[i]) == DImode)
19113 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19114 if (MEM_P (recog_data.operand[i])
19115 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19122 /* Return the maximum number of instructions a cpu can issue. */
19125 ix86_issue_rate (void)
19129 case PROCESSOR_PENTIUM:
19130 case PROCESSOR_ATOM:
19134 case PROCESSOR_PENTIUMPRO:
19135 case PROCESSOR_PENTIUM4:
19136 case PROCESSOR_ATHLON:
19138 case PROCESSOR_AMDFAM10:
19139 case PROCESSOR_NOCONA:
19140 case PROCESSOR_GENERIC32:
19141 case PROCESSOR_GENERIC64:
19144 case PROCESSOR_CORE2:
19152 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19153 by DEP_INSN and nothing set by DEP_INSN. */
19156 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19160 /* Simplify the test for uninteresting insns. */
19161 if (insn_type != TYPE_SETCC
19162 && insn_type != TYPE_ICMOV
19163 && insn_type != TYPE_FCMOV
19164 && insn_type != TYPE_IBR)
19167 if ((set = single_set (dep_insn)) != 0)
19169 set = SET_DEST (set);
19172 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19173 && XVECLEN (PATTERN (dep_insn), 0) == 2
19174 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19175 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19177 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19178 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19183 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19186 /* This test is true if the dependent insn reads the flags but
19187 not any other potentially set register. */
19188 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19191 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19197 /* Return true iff USE_INSN has a memory address with operands set by
19201 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19204 extract_insn_cached (use_insn);
19205 for (i = recog_data.n_operands - 1; i >= 0; --i)
19206 if (MEM_P (recog_data.operand[i]))
19208 rtx addr = XEXP (recog_data.operand[i], 0);
19209 return modified_in_p (addr, set_insn) != 0;
19215 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19217 enum attr_type insn_type, dep_insn_type;
19218 enum attr_memory memory;
19220 int dep_insn_code_number;
19222 /* Anti and output dependencies have zero cost on all CPUs. */
19223 if (REG_NOTE_KIND (link) != 0)
19226 dep_insn_code_number = recog_memoized (dep_insn);
19228 /* If we can't recognize the insns, we can't really do anything. */
19229 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19232 insn_type = get_attr_type (insn);
19233 dep_insn_type = get_attr_type (dep_insn);
19237 case PROCESSOR_PENTIUM:
19238 /* Address Generation Interlock adds a cycle of latency. */
19239 if (insn_type == TYPE_LEA)
19241 rtx addr = PATTERN (insn);
19243 if (GET_CODE (addr) == PARALLEL)
19244 addr = XVECEXP (addr, 0, 0);
19246 gcc_assert (GET_CODE (addr) == SET);
19248 addr = SET_SRC (addr);
19249 if (modified_in_p (addr, dep_insn))
19252 else if (ix86_agi_dependent (dep_insn, insn))
19255 /* ??? Compares pair with jump/setcc. */
19256 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19259 /* Floating point stores require value to be ready one cycle earlier. */
19260 if (insn_type == TYPE_FMOV
19261 && get_attr_memory (insn) == MEMORY_STORE
19262 && !ix86_agi_dependent (dep_insn, insn))
19266 case PROCESSOR_PENTIUMPRO:
19267 memory = get_attr_memory (insn);
19269 /* INT->FP conversion is expensive. */
19270 if (get_attr_fp_int_src (dep_insn))
19273 /* There is one cycle extra latency between an FP op and a store. */
19274 if (insn_type == TYPE_FMOV
19275 && (set = single_set (dep_insn)) != NULL_RTX
19276 && (set2 = single_set (insn)) != NULL_RTX
19277 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19278 && MEM_P (SET_DEST (set2)))
19281 /* Show ability of reorder buffer to hide latency of load by executing
19282 in parallel with previous instruction in case
19283 previous instruction is not needed to compute the address. */
19284 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19285 && !ix86_agi_dependent (dep_insn, insn))
19287 /* Claim moves to take one cycle, as core can issue one load
19288 at time and the next load can start cycle later. */
19289 if (dep_insn_type == TYPE_IMOV
19290 || dep_insn_type == TYPE_FMOV)
19298 memory = get_attr_memory (insn);
19300 /* The esp dependency is resolved before the instruction is really
19302 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19303 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19306 /* INT->FP conversion is expensive. */
19307 if (get_attr_fp_int_src (dep_insn))
19310 /* Show ability of reorder buffer to hide latency of load by executing
19311 in parallel with previous instruction in case
19312 previous instruction is not needed to compute the address. */
19313 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19314 && !ix86_agi_dependent (dep_insn, insn))
19316 /* Claim moves to take one cycle, as core can issue one load
19317 at time and the next load can start cycle later. */
19318 if (dep_insn_type == TYPE_IMOV
19319 || dep_insn_type == TYPE_FMOV)
19328 case PROCESSOR_ATHLON:
19330 case PROCESSOR_AMDFAM10:
19331 case PROCESSOR_ATOM:
19332 case PROCESSOR_GENERIC32:
19333 case PROCESSOR_GENERIC64:
19334 memory = get_attr_memory (insn);
19336 /* Show ability of reorder buffer to hide latency of load by executing
19337 in parallel with previous instruction in case
19338 previous instruction is not needed to compute the address. */
19339 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19340 && !ix86_agi_dependent (dep_insn, insn))
19342 enum attr_unit unit = get_attr_unit (insn);
19345 /* Because of the difference between the length of integer and
19346 floating unit pipeline preparation stages, the memory operands
19347 for floating point are cheaper.
19349 ??? For Athlon it the difference is most probably 2. */
19350 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19353 loadcost = TARGET_ATHLON ? 2 : 0;
19355 if (cost >= loadcost)
19368 /* How many alternative schedules to try. This should be as wide as the
19369 scheduling freedom in the DFA, but no wider. Making this value too
19370 large results extra work for the scheduler. */
19373 ia32_multipass_dfa_lookahead (void)
19377 case PROCESSOR_PENTIUM:
19380 case PROCESSOR_PENTIUMPRO:
19390 /* Compute the alignment given to a constant that is being placed in memory.
19391 EXP is the constant and ALIGN is the alignment that the object would
19393 The value of this function is used instead of that alignment to align
19397 ix86_constant_alignment (tree exp, int align)
19399 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19400 || TREE_CODE (exp) == INTEGER_CST)
19402 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19404 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19407 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19408 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19409 return BITS_PER_WORD;
19414 /* Compute the alignment for a static variable.
19415 TYPE is the data type, and ALIGN is the alignment that
19416 the object would ordinarily have. The value of this function is used
19417 instead of that alignment to align the object. */
19420 ix86_data_alignment (tree type, int align)
19422 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19424 if (AGGREGATE_TYPE_P (type)
19425 && TYPE_SIZE (type)
19426 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19427 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19428 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19429 && align < max_align)
19432 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19433 to 16byte boundary. */
19436 if (AGGREGATE_TYPE_P (type)
19437 && TYPE_SIZE (type)
19438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19444 if (TREE_CODE (type) == ARRAY_TYPE)
19446 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19448 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19451 else if (TREE_CODE (type) == COMPLEX_TYPE)
19454 if (TYPE_MODE (type) == DCmode && align < 64)
19456 if ((TYPE_MODE (type) == XCmode
19457 || TYPE_MODE (type) == TCmode) && align < 128)
19460 else if ((TREE_CODE (type) == RECORD_TYPE
19461 || TREE_CODE (type) == UNION_TYPE
19462 || TREE_CODE (type) == QUAL_UNION_TYPE)
19463 && TYPE_FIELDS (type))
19465 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19467 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19470 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19471 || TREE_CODE (type) == INTEGER_TYPE)
19473 if (TYPE_MODE (type) == DFmode && align < 64)
19475 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19482 /* Compute the alignment for a local variable or a stack slot. EXP is
19483 the data type or decl itself, MODE is the widest mode available and
19484 ALIGN is the alignment that the object would ordinarily have. The
19485 value of this macro is used instead of that alignment to align the
19489 ix86_local_alignment (tree exp, enum machine_mode mode,
19490 unsigned int align)
19494 if (exp && DECL_P (exp))
19496 type = TREE_TYPE (exp);
19505 /* Don't do dynamic stack realignment for long long objects with
19506 -mpreferred-stack-boundary=2. */
19509 && ix86_preferred_stack_boundary < 64
19510 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19511 && (!type || !TYPE_USER_ALIGN (type))
19512 && (!decl || !DECL_USER_ALIGN (decl)))
19515 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19516 register in MODE. We will return the largest alignment of XF
19520 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19521 align = GET_MODE_ALIGNMENT (DFmode);
19525 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19526 to 16byte boundary. */
19529 if (AGGREGATE_TYPE_P (type)
19530 && TYPE_SIZE (type)
19531 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19532 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19533 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19536 if (TREE_CODE (type) == ARRAY_TYPE)
19538 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19540 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19543 else if (TREE_CODE (type) == COMPLEX_TYPE)
19545 if (TYPE_MODE (type) == DCmode && align < 64)
19547 if ((TYPE_MODE (type) == XCmode
19548 || TYPE_MODE (type) == TCmode) && align < 128)
19551 else if ((TREE_CODE (type) == RECORD_TYPE
19552 || TREE_CODE (type) == UNION_TYPE
19553 || TREE_CODE (type) == QUAL_UNION_TYPE)
19554 && TYPE_FIELDS (type))
19556 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19558 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19561 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19562 || TREE_CODE (type) == INTEGER_TYPE)
19565 if (TYPE_MODE (type) == DFmode && align < 64)
19567 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19573 /* Emit RTL insns to initialize the variable parts of a trampoline.
19574 FNADDR is an RTX for the address of the function's pure code.
19575 CXT is an RTX for the static chain value for the function. */
19577 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19581 /* Compute offset from the end of the jmp to the target function. */
19582 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19583 plus_constant (tramp, 10),
19584 NULL_RTX, 1, OPTAB_DIRECT);
19585 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19586 gen_int_mode (0xb9, QImode));
19587 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19588 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19589 gen_int_mode (0xe9, QImode));
19590 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19595 /* Try to load address using shorter movl instead of movabs.
19596 We may want to support movq for kernel mode, but kernel does not use
19597 trampolines at the moment. */
19598 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19600 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19601 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19602 gen_int_mode (0xbb41, HImode));
19603 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19604 gen_lowpart (SImode, fnaddr));
19609 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19610 gen_int_mode (0xbb49, HImode));
19611 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19615 /* Load static chain using movabs to r10. */
19616 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19617 gen_int_mode (0xba49, HImode));
19618 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19621 /* Jump to the r11 */
19622 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19623 gen_int_mode (0xff49, HImode));
19624 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19625 gen_int_mode (0xe3, QImode));
19627 gcc_assert (offset <= TRAMPOLINE_SIZE);
19630 #ifdef ENABLE_EXECUTE_STACK
19631 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19632 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19636 /* Codes for all the SSE/MMX builtins. */
19639 IX86_BUILTIN_ADDPS,
19640 IX86_BUILTIN_ADDSS,
19641 IX86_BUILTIN_DIVPS,
19642 IX86_BUILTIN_DIVSS,
19643 IX86_BUILTIN_MULPS,
19644 IX86_BUILTIN_MULSS,
19645 IX86_BUILTIN_SUBPS,
19646 IX86_BUILTIN_SUBSS,
19648 IX86_BUILTIN_CMPEQPS,
19649 IX86_BUILTIN_CMPLTPS,
19650 IX86_BUILTIN_CMPLEPS,
19651 IX86_BUILTIN_CMPGTPS,
19652 IX86_BUILTIN_CMPGEPS,
19653 IX86_BUILTIN_CMPNEQPS,
19654 IX86_BUILTIN_CMPNLTPS,
19655 IX86_BUILTIN_CMPNLEPS,
19656 IX86_BUILTIN_CMPNGTPS,
19657 IX86_BUILTIN_CMPNGEPS,
19658 IX86_BUILTIN_CMPORDPS,
19659 IX86_BUILTIN_CMPUNORDPS,
19660 IX86_BUILTIN_CMPEQSS,
19661 IX86_BUILTIN_CMPLTSS,
19662 IX86_BUILTIN_CMPLESS,
19663 IX86_BUILTIN_CMPNEQSS,
19664 IX86_BUILTIN_CMPNLTSS,
19665 IX86_BUILTIN_CMPNLESS,
19666 IX86_BUILTIN_CMPNGTSS,
19667 IX86_BUILTIN_CMPNGESS,
19668 IX86_BUILTIN_CMPORDSS,
19669 IX86_BUILTIN_CMPUNORDSS,
19671 IX86_BUILTIN_COMIEQSS,
19672 IX86_BUILTIN_COMILTSS,
19673 IX86_BUILTIN_COMILESS,
19674 IX86_BUILTIN_COMIGTSS,
19675 IX86_BUILTIN_COMIGESS,
19676 IX86_BUILTIN_COMINEQSS,
19677 IX86_BUILTIN_UCOMIEQSS,
19678 IX86_BUILTIN_UCOMILTSS,
19679 IX86_BUILTIN_UCOMILESS,
19680 IX86_BUILTIN_UCOMIGTSS,
19681 IX86_BUILTIN_UCOMIGESS,
19682 IX86_BUILTIN_UCOMINEQSS,
19684 IX86_BUILTIN_CVTPI2PS,
19685 IX86_BUILTIN_CVTPS2PI,
19686 IX86_BUILTIN_CVTSI2SS,
19687 IX86_BUILTIN_CVTSI642SS,
19688 IX86_BUILTIN_CVTSS2SI,
19689 IX86_BUILTIN_CVTSS2SI64,
19690 IX86_BUILTIN_CVTTPS2PI,
19691 IX86_BUILTIN_CVTTSS2SI,
19692 IX86_BUILTIN_CVTTSS2SI64,
19694 IX86_BUILTIN_MAXPS,
19695 IX86_BUILTIN_MAXSS,
19696 IX86_BUILTIN_MINPS,
19697 IX86_BUILTIN_MINSS,
19699 IX86_BUILTIN_LOADUPS,
19700 IX86_BUILTIN_STOREUPS,
19701 IX86_BUILTIN_MOVSS,
19703 IX86_BUILTIN_MOVHLPS,
19704 IX86_BUILTIN_MOVLHPS,
19705 IX86_BUILTIN_LOADHPS,
19706 IX86_BUILTIN_LOADLPS,
19707 IX86_BUILTIN_STOREHPS,
19708 IX86_BUILTIN_STORELPS,
19710 IX86_BUILTIN_MASKMOVQ,
19711 IX86_BUILTIN_MOVMSKPS,
19712 IX86_BUILTIN_PMOVMSKB,
19714 IX86_BUILTIN_MOVNTPS,
19715 IX86_BUILTIN_MOVNTQ,
19717 IX86_BUILTIN_LOADDQU,
19718 IX86_BUILTIN_STOREDQU,
19720 IX86_BUILTIN_PACKSSWB,
19721 IX86_BUILTIN_PACKSSDW,
19722 IX86_BUILTIN_PACKUSWB,
19724 IX86_BUILTIN_PADDB,
19725 IX86_BUILTIN_PADDW,
19726 IX86_BUILTIN_PADDD,
19727 IX86_BUILTIN_PADDQ,
19728 IX86_BUILTIN_PADDSB,
19729 IX86_BUILTIN_PADDSW,
19730 IX86_BUILTIN_PADDUSB,
19731 IX86_BUILTIN_PADDUSW,
19732 IX86_BUILTIN_PSUBB,
19733 IX86_BUILTIN_PSUBW,
19734 IX86_BUILTIN_PSUBD,
19735 IX86_BUILTIN_PSUBQ,
19736 IX86_BUILTIN_PSUBSB,
19737 IX86_BUILTIN_PSUBSW,
19738 IX86_BUILTIN_PSUBUSB,
19739 IX86_BUILTIN_PSUBUSW,
19742 IX86_BUILTIN_PANDN,
19746 IX86_BUILTIN_PAVGB,
19747 IX86_BUILTIN_PAVGW,
19749 IX86_BUILTIN_PCMPEQB,
19750 IX86_BUILTIN_PCMPEQW,
19751 IX86_BUILTIN_PCMPEQD,
19752 IX86_BUILTIN_PCMPGTB,
19753 IX86_BUILTIN_PCMPGTW,
19754 IX86_BUILTIN_PCMPGTD,
19756 IX86_BUILTIN_PMADDWD,
19758 IX86_BUILTIN_PMAXSW,
19759 IX86_BUILTIN_PMAXUB,
19760 IX86_BUILTIN_PMINSW,
19761 IX86_BUILTIN_PMINUB,
19763 IX86_BUILTIN_PMULHUW,
19764 IX86_BUILTIN_PMULHW,
19765 IX86_BUILTIN_PMULLW,
19767 IX86_BUILTIN_PSADBW,
19768 IX86_BUILTIN_PSHUFW,
19770 IX86_BUILTIN_PSLLW,
19771 IX86_BUILTIN_PSLLD,
19772 IX86_BUILTIN_PSLLQ,
19773 IX86_BUILTIN_PSRAW,
19774 IX86_BUILTIN_PSRAD,
19775 IX86_BUILTIN_PSRLW,
19776 IX86_BUILTIN_PSRLD,
19777 IX86_BUILTIN_PSRLQ,
19778 IX86_BUILTIN_PSLLWI,
19779 IX86_BUILTIN_PSLLDI,
19780 IX86_BUILTIN_PSLLQI,
19781 IX86_BUILTIN_PSRAWI,
19782 IX86_BUILTIN_PSRADI,
19783 IX86_BUILTIN_PSRLWI,
19784 IX86_BUILTIN_PSRLDI,
19785 IX86_BUILTIN_PSRLQI,
19787 IX86_BUILTIN_PUNPCKHBW,
19788 IX86_BUILTIN_PUNPCKHWD,
19789 IX86_BUILTIN_PUNPCKHDQ,
19790 IX86_BUILTIN_PUNPCKLBW,
19791 IX86_BUILTIN_PUNPCKLWD,
19792 IX86_BUILTIN_PUNPCKLDQ,
19794 IX86_BUILTIN_SHUFPS,
19796 IX86_BUILTIN_RCPPS,
19797 IX86_BUILTIN_RCPSS,
19798 IX86_BUILTIN_RSQRTPS,
19799 IX86_BUILTIN_RSQRTPS_NR,
19800 IX86_BUILTIN_RSQRTSS,
19801 IX86_BUILTIN_RSQRTF,
19802 IX86_BUILTIN_SQRTPS,
19803 IX86_BUILTIN_SQRTPS_NR,
19804 IX86_BUILTIN_SQRTSS,
19806 IX86_BUILTIN_UNPCKHPS,
19807 IX86_BUILTIN_UNPCKLPS,
19809 IX86_BUILTIN_ANDPS,
19810 IX86_BUILTIN_ANDNPS,
19812 IX86_BUILTIN_XORPS,
19815 IX86_BUILTIN_LDMXCSR,
19816 IX86_BUILTIN_STMXCSR,
19817 IX86_BUILTIN_SFENCE,
19819 /* 3DNow! Original */
19820 IX86_BUILTIN_FEMMS,
19821 IX86_BUILTIN_PAVGUSB,
19822 IX86_BUILTIN_PF2ID,
19823 IX86_BUILTIN_PFACC,
19824 IX86_BUILTIN_PFADD,
19825 IX86_BUILTIN_PFCMPEQ,
19826 IX86_BUILTIN_PFCMPGE,
19827 IX86_BUILTIN_PFCMPGT,
19828 IX86_BUILTIN_PFMAX,
19829 IX86_BUILTIN_PFMIN,
19830 IX86_BUILTIN_PFMUL,
19831 IX86_BUILTIN_PFRCP,
19832 IX86_BUILTIN_PFRCPIT1,
19833 IX86_BUILTIN_PFRCPIT2,
19834 IX86_BUILTIN_PFRSQIT1,
19835 IX86_BUILTIN_PFRSQRT,
19836 IX86_BUILTIN_PFSUB,
19837 IX86_BUILTIN_PFSUBR,
19838 IX86_BUILTIN_PI2FD,
19839 IX86_BUILTIN_PMULHRW,
19841 /* 3DNow! Athlon Extensions */
19842 IX86_BUILTIN_PF2IW,
19843 IX86_BUILTIN_PFNACC,
19844 IX86_BUILTIN_PFPNACC,
19845 IX86_BUILTIN_PI2FW,
19846 IX86_BUILTIN_PSWAPDSI,
19847 IX86_BUILTIN_PSWAPDSF,
19850 IX86_BUILTIN_ADDPD,
19851 IX86_BUILTIN_ADDSD,
19852 IX86_BUILTIN_DIVPD,
19853 IX86_BUILTIN_DIVSD,
19854 IX86_BUILTIN_MULPD,
19855 IX86_BUILTIN_MULSD,
19856 IX86_BUILTIN_SUBPD,
19857 IX86_BUILTIN_SUBSD,
19859 IX86_BUILTIN_CMPEQPD,
19860 IX86_BUILTIN_CMPLTPD,
19861 IX86_BUILTIN_CMPLEPD,
19862 IX86_BUILTIN_CMPGTPD,
19863 IX86_BUILTIN_CMPGEPD,
19864 IX86_BUILTIN_CMPNEQPD,
19865 IX86_BUILTIN_CMPNLTPD,
19866 IX86_BUILTIN_CMPNLEPD,
19867 IX86_BUILTIN_CMPNGTPD,
19868 IX86_BUILTIN_CMPNGEPD,
19869 IX86_BUILTIN_CMPORDPD,
19870 IX86_BUILTIN_CMPUNORDPD,
19871 IX86_BUILTIN_CMPEQSD,
19872 IX86_BUILTIN_CMPLTSD,
19873 IX86_BUILTIN_CMPLESD,
19874 IX86_BUILTIN_CMPNEQSD,
19875 IX86_BUILTIN_CMPNLTSD,
19876 IX86_BUILTIN_CMPNLESD,
19877 IX86_BUILTIN_CMPORDSD,
19878 IX86_BUILTIN_CMPUNORDSD,
19880 IX86_BUILTIN_COMIEQSD,
19881 IX86_BUILTIN_COMILTSD,
19882 IX86_BUILTIN_COMILESD,
19883 IX86_BUILTIN_COMIGTSD,
19884 IX86_BUILTIN_COMIGESD,
19885 IX86_BUILTIN_COMINEQSD,
19886 IX86_BUILTIN_UCOMIEQSD,
19887 IX86_BUILTIN_UCOMILTSD,
19888 IX86_BUILTIN_UCOMILESD,
19889 IX86_BUILTIN_UCOMIGTSD,
19890 IX86_BUILTIN_UCOMIGESD,
19891 IX86_BUILTIN_UCOMINEQSD,
19893 IX86_BUILTIN_MAXPD,
19894 IX86_BUILTIN_MAXSD,
19895 IX86_BUILTIN_MINPD,
19896 IX86_BUILTIN_MINSD,
19898 IX86_BUILTIN_ANDPD,
19899 IX86_BUILTIN_ANDNPD,
19901 IX86_BUILTIN_XORPD,
19903 IX86_BUILTIN_SQRTPD,
19904 IX86_BUILTIN_SQRTSD,
19906 IX86_BUILTIN_UNPCKHPD,
19907 IX86_BUILTIN_UNPCKLPD,
19909 IX86_BUILTIN_SHUFPD,
19911 IX86_BUILTIN_LOADUPD,
19912 IX86_BUILTIN_STOREUPD,
19913 IX86_BUILTIN_MOVSD,
19915 IX86_BUILTIN_LOADHPD,
19916 IX86_BUILTIN_LOADLPD,
19918 IX86_BUILTIN_CVTDQ2PD,
19919 IX86_BUILTIN_CVTDQ2PS,
19921 IX86_BUILTIN_CVTPD2DQ,
19922 IX86_BUILTIN_CVTPD2PI,
19923 IX86_BUILTIN_CVTPD2PS,
19924 IX86_BUILTIN_CVTTPD2DQ,
19925 IX86_BUILTIN_CVTTPD2PI,
19927 IX86_BUILTIN_CVTPI2PD,
19928 IX86_BUILTIN_CVTSI2SD,
19929 IX86_BUILTIN_CVTSI642SD,
19931 IX86_BUILTIN_CVTSD2SI,
19932 IX86_BUILTIN_CVTSD2SI64,
19933 IX86_BUILTIN_CVTSD2SS,
19934 IX86_BUILTIN_CVTSS2SD,
19935 IX86_BUILTIN_CVTTSD2SI,
19936 IX86_BUILTIN_CVTTSD2SI64,
19938 IX86_BUILTIN_CVTPS2DQ,
19939 IX86_BUILTIN_CVTPS2PD,
19940 IX86_BUILTIN_CVTTPS2DQ,
19942 IX86_BUILTIN_MOVNTI,
19943 IX86_BUILTIN_MOVNTPD,
19944 IX86_BUILTIN_MOVNTDQ,
19946 IX86_BUILTIN_MOVQ128,
19949 IX86_BUILTIN_MASKMOVDQU,
19950 IX86_BUILTIN_MOVMSKPD,
19951 IX86_BUILTIN_PMOVMSKB128,
19953 IX86_BUILTIN_PACKSSWB128,
19954 IX86_BUILTIN_PACKSSDW128,
19955 IX86_BUILTIN_PACKUSWB128,
19957 IX86_BUILTIN_PADDB128,
19958 IX86_BUILTIN_PADDW128,
19959 IX86_BUILTIN_PADDD128,
19960 IX86_BUILTIN_PADDQ128,
19961 IX86_BUILTIN_PADDSB128,
19962 IX86_BUILTIN_PADDSW128,
19963 IX86_BUILTIN_PADDUSB128,
19964 IX86_BUILTIN_PADDUSW128,
19965 IX86_BUILTIN_PSUBB128,
19966 IX86_BUILTIN_PSUBW128,
19967 IX86_BUILTIN_PSUBD128,
19968 IX86_BUILTIN_PSUBQ128,
19969 IX86_BUILTIN_PSUBSB128,
19970 IX86_BUILTIN_PSUBSW128,
19971 IX86_BUILTIN_PSUBUSB128,
19972 IX86_BUILTIN_PSUBUSW128,
19974 IX86_BUILTIN_PAND128,
19975 IX86_BUILTIN_PANDN128,
19976 IX86_BUILTIN_POR128,
19977 IX86_BUILTIN_PXOR128,
19979 IX86_BUILTIN_PAVGB128,
19980 IX86_BUILTIN_PAVGW128,
19982 IX86_BUILTIN_PCMPEQB128,
19983 IX86_BUILTIN_PCMPEQW128,
19984 IX86_BUILTIN_PCMPEQD128,
19985 IX86_BUILTIN_PCMPGTB128,
19986 IX86_BUILTIN_PCMPGTW128,
19987 IX86_BUILTIN_PCMPGTD128,
19989 IX86_BUILTIN_PMADDWD128,
19991 IX86_BUILTIN_PMAXSW128,
19992 IX86_BUILTIN_PMAXUB128,
19993 IX86_BUILTIN_PMINSW128,
19994 IX86_BUILTIN_PMINUB128,
19996 IX86_BUILTIN_PMULUDQ,
19997 IX86_BUILTIN_PMULUDQ128,
19998 IX86_BUILTIN_PMULHUW128,
19999 IX86_BUILTIN_PMULHW128,
20000 IX86_BUILTIN_PMULLW128,
20002 IX86_BUILTIN_PSADBW128,
20003 IX86_BUILTIN_PSHUFHW,
20004 IX86_BUILTIN_PSHUFLW,
20005 IX86_BUILTIN_PSHUFD,
20007 IX86_BUILTIN_PSLLDQI128,
20008 IX86_BUILTIN_PSLLWI128,
20009 IX86_BUILTIN_PSLLDI128,
20010 IX86_BUILTIN_PSLLQI128,
20011 IX86_BUILTIN_PSRAWI128,
20012 IX86_BUILTIN_PSRADI128,
20013 IX86_BUILTIN_PSRLDQI128,
20014 IX86_BUILTIN_PSRLWI128,
20015 IX86_BUILTIN_PSRLDI128,
20016 IX86_BUILTIN_PSRLQI128,
20018 IX86_BUILTIN_PSLLDQ128,
20019 IX86_BUILTIN_PSLLW128,
20020 IX86_BUILTIN_PSLLD128,
20021 IX86_BUILTIN_PSLLQ128,
20022 IX86_BUILTIN_PSRAW128,
20023 IX86_BUILTIN_PSRAD128,
20024 IX86_BUILTIN_PSRLW128,
20025 IX86_BUILTIN_PSRLD128,
20026 IX86_BUILTIN_PSRLQ128,
20028 IX86_BUILTIN_PUNPCKHBW128,
20029 IX86_BUILTIN_PUNPCKHWD128,
20030 IX86_BUILTIN_PUNPCKHDQ128,
20031 IX86_BUILTIN_PUNPCKHQDQ128,
20032 IX86_BUILTIN_PUNPCKLBW128,
20033 IX86_BUILTIN_PUNPCKLWD128,
20034 IX86_BUILTIN_PUNPCKLDQ128,
20035 IX86_BUILTIN_PUNPCKLQDQ128,
20037 IX86_BUILTIN_CLFLUSH,
20038 IX86_BUILTIN_MFENCE,
20039 IX86_BUILTIN_LFENCE,
20042 IX86_BUILTIN_ADDSUBPS,
20043 IX86_BUILTIN_HADDPS,
20044 IX86_BUILTIN_HSUBPS,
20045 IX86_BUILTIN_MOVSHDUP,
20046 IX86_BUILTIN_MOVSLDUP,
20047 IX86_BUILTIN_ADDSUBPD,
20048 IX86_BUILTIN_HADDPD,
20049 IX86_BUILTIN_HSUBPD,
20050 IX86_BUILTIN_LDDQU,
20052 IX86_BUILTIN_MONITOR,
20053 IX86_BUILTIN_MWAIT,
20056 IX86_BUILTIN_PHADDW,
20057 IX86_BUILTIN_PHADDD,
20058 IX86_BUILTIN_PHADDSW,
20059 IX86_BUILTIN_PHSUBW,
20060 IX86_BUILTIN_PHSUBD,
20061 IX86_BUILTIN_PHSUBSW,
20062 IX86_BUILTIN_PMADDUBSW,
20063 IX86_BUILTIN_PMULHRSW,
20064 IX86_BUILTIN_PSHUFB,
20065 IX86_BUILTIN_PSIGNB,
20066 IX86_BUILTIN_PSIGNW,
20067 IX86_BUILTIN_PSIGND,
20068 IX86_BUILTIN_PALIGNR,
20069 IX86_BUILTIN_PABSB,
20070 IX86_BUILTIN_PABSW,
20071 IX86_BUILTIN_PABSD,
20073 IX86_BUILTIN_PHADDW128,
20074 IX86_BUILTIN_PHADDD128,
20075 IX86_BUILTIN_PHADDSW128,
20076 IX86_BUILTIN_PHSUBW128,
20077 IX86_BUILTIN_PHSUBD128,
20078 IX86_BUILTIN_PHSUBSW128,
20079 IX86_BUILTIN_PMADDUBSW128,
20080 IX86_BUILTIN_PMULHRSW128,
20081 IX86_BUILTIN_PSHUFB128,
20082 IX86_BUILTIN_PSIGNB128,
20083 IX86_BUILTIN_PSIGNW128,
20084 IX86_BUILTIN_PSIGND128,
20085 IX86_BUILTIN_PALIGNR128,
20086 IX86_BUILTIN_PABSB128,
20087 IX86_BUILTIN_PABSW128,
20088 IX86_BUILTIN_PABSD128,
20090 /* AMDFAM10 - SSE4A New Instructions. */
20091 IX86_BUILTIN_MOVNTSD,
20092 IX86_BUILTIN_MOVNTSS,
20093 IX86_BUILTIN_EXTRQI,
20094 IX86_BUILTIN_EXTRQ,
20095 IX86_BUILTIN_INSERTQI,
20096 IX86_BUILTIN_INSERTQ,
20099 IX86_BUILTIN_BLENDPD,
20100 IX86_BUILTIN_BLENDPS,
20101 IX86_BUILTIN_BLENDVPD,
20102 IX86_BUILTIN_BLENDVPS,
20103 IX86_BUILTIN_PBLENDVB128,
20104 IX86_BUILTIN_PBLENDW128,
20109 IX86_BUILTIN_INSERTPS128,
20111 IX86_BUILTIN_MOVNTDQA,
20112 IX86_BUILTIN_MPSADBW128,
20113 IX86_BUILTIN_PACKUSDW128,
20114 IX86_BUILTIN_PCMPEQQ,
20115 IX86_BUILTIN_PHMINPOSUW128,
20117 IX86_BUILTIN_PMAXSB128,
20118 IX86_BUILTIN_PMAXSD128,
20119 IX86_BUILTIN_PMAXUD128,
20120 IX86_BUILTIN_PMAXUW128,
20122 IX86_BUILTIN_PMINSB128,
20123 IX86_BUILTIN_PMINSD128,
20124 IX86_BUILTIN_PMINUD128,
20125 IX86_BUILTIN_PMINUW128,
20127 IX86_BUILTIN_PMOVSXBW128,
20128 IX86_BUILTIN_PMOVSXBD128,
20129 IX86_BUILTIN_PMOVSXBQ128,
20130 IX86_BUILTIN_PMOVSXWD128,
20131 IX86_BUILTIN_PMOVSXWQ128,
20132 IX86_BUILTIN_PMOVSXDQ128,
20134 IX86_BUILTIN_PMOVZXBW128,
20135 IX86_BUILTIN_PMOVZXBD128,
20136 IX86_BUILTIN_PMOVZXBQ128,
20137 IX86_BUILTIN_PMOVZXWD128,
20138 IX86_BUILTIN_PMOVZXWQ128,
20139 IX86_BUILTIN_PMOVZXDQ128,
20141 IX86_BUILTIN_PMULDQ128,
20142 IX86_BUILTIN_PMULLD128,
20144 IX86_BUILTIN_ROUNDPD,
20145 IX86_BUILTIN_ROUNDPS,
20146 IX86_BUILTIN_ROUNDSD,
20147 IX86_BUILTIN_ROUNDSS,
20149 IX86_BUILTIN_PTESTZ,
20150 IX86_BUILTIN_PTESTC,
20151 IX86_BUILTIN_PTESTNZC,
20153 IX86_BUILTIN_VEC_INIT_V2SI,
20154 IX86_BUILTIN_VEC_INIT_V4HI,
20155 IX86_BUILTIN_VEC_INIT_V8QI,
20156 IX86_BUILTIN_VEC_EXT_V2DF,
20157 IX86_BUILTIN_VEC_EXT_V2DI,
20158 IX86_BUILTIN_VEC_EXT_V4SF,
20159 IX86_BUILTIN_VEC_EXT_V4SI,
20160 IX86_BUILTIN_VEC_EXT_V8HI,
20161 IX86_BUILTIN_VEC_EXT_V2SI,
20162 IX86_BUILTIN_VEC_EXT_V4HI,
20163 IX86_BUILTIN_VEC_EXT_V16QI,
20164 IX86_BUILTIN_VEC_SET_V2DI,
20165 IX86_BUILTIN_VEC_SET_V4SF,
20166 IX86_BUILTIN_VEC_SET_V4SI,
20167 IX86_BUILTIN_VEC_SET_V8HI,
20168 IX86_BUILTIN_VEC_SET_V4HI,
20169 IX86_BUILTIN_VEC_SET_V16QI,
20171 IX86_BUILTIN_VEC_PACK_SFIX,
20174 IX86_BUILTIN_CRC32QI,
20175 IX86_BUILTIN_CRC32HI,
20176 IX86_BUILTIN_CRC32SI,
20177 IX86_BUILTIN_CRC32DI,
20179 IX86_BUILTIN_PCMPESTRI128,
20180 IX86_BUILTIN_PCMPESTRM128,
20181 IX86_BUILTIN_PCMPESTRA128,
20182 IX86_BUILTIN_PCMPESTRC128,
20183 IX86_BUILTIN_PCMPESTRO128,
20184 IX86_BUILTIN_PCMPESTRS128,
20185 IX86_BUILTIN_PCMPESTRZ128,
20186 IX86_BUILTIN_PCMPISTRI128,
20187 IX86_BUILTIN_PCMPISTRM128,
20188 IX86_BUILTIN_PCMPISTRA128,
20189 IX86_BUILTIN_PCMPISTRC128,
20190 IX86_BUILTIN_PCMPISTRO128,
20191 IX86_BUILTIN_PCMPISTRS128,
20192 IX86_BUILTIN_PCMPISTRZ128,
20194 IX86_BUILTIN_PCMPGTQ,
20196 /* AES instructions */
20197 IX86_BUILTIN_AESENC128,
20198 IX86_BUILTIN_AESENCLAST128,
20199 IX86_BUILTIN_AESDEC128,
20200 IX86_BUILTIN_AESDECLAST128,
20201 IX86_BUILTIN_AESIMC128,
20202 IX86_BUILTIN_AESKEYGENASSIST128,
20204 /* PCLMUL instruction */
20205 IX86_BUILTIN_PCLMULQDQ128,
20208 IX86_BUILTIN_ADDPD256,
20209 IX86_BUILTIN_ADDPS256,
20210 IX86_BUILTIN_ADDSUBPD256,
20211 IX86_BUILTIN_ADDSUBPS256,
20212 IX86_BUILTIN_ANDPD256,
20213 IX86_BUILTIN_ANDPS256,
20214 IX86_BUILTIN_ANDNPD256,
20215 IX86_BUILTIN_ANDNPS256,
20216 IX86_BUILTIN_BLENDPD256,
20217 IX86_BUILTIN_BLENDPS256,
20218 IX86_BUILTIN_BLENDVPD256,
20219 IX86_BUILTIN_BLENDVPS256,
20220 IX86_BUILTIN_DIVPD256,
20221 IX86_BUILTIN_DIVPS256,
20222 IX86_BUILTIN_DPPS256,
20223 IX86_BUILTIN_HADDPD256,
20224 IX86_BUILTIN_HADDPS256,
20225 IX86_BUILTIN_HSUBPD256,
20226 IX86_BUILTIN_HSUBPS256,
20227 IX86_BUILTIN_MAXPD256,
20228 IX86_BUILTIN_MAXPS256,
20229 IX86_BUILTIN_MINPD256,
20230 IX86_BUILTIN_MINPS256,
20231 IX86_BUILTIN_MULPD256,
20232 IX86_BUILTIN_MULPS256,
20233 IX86_BUILTIN_ORPD256,
20234 IX86_BUILTIN_ORPS256,
20235 IX86_BUILTIN_SHUFPD256,
20236 IX86_BUILTIN_SHUFPS256,
20237 IX86_BUILTIN_SUBPD256,
20238 IX86_BUILTIN_SUBPS256,
20239 IX86_BUILTIN_XORPD256,
20240 IX86_BUILTIN_XORPS256,
20241 IX86_BUILTIN_CMPSD,
20242 IX86_BUILTIN_CMPSS,
20243 IX86_BUILTIN_CMPPD,
20244 IX86_BUILTIN_CMPPS,
20245 IX86_BUILTIN_CMPPD256,
20246 IX86_BUILTIN_CMPPS256,
20247 IX86_BUILTIN_CVTDQ2PD256,
20248 IX86_BUILTIN_CVTDQ2PS256,
20249 IX86_BUILTIN_CVTPD2PS256,
20250 IX86_BUILTIN_CVTPS2DQ256,
20251 IX86_BUILTIN_CVTPS2PD256,
20252 IX86_BUILTIN_CVTTPD2DQ256,
20253 IX86_BUILTIN_CVTPD2DQ256,
20254 IX86_BUILTIN_CVTTPS2DQ256,
20255 IX86_BUILTIN_EXTRACTF128PD256,
20256 IX86_BUILTIN_EXTRACTF128PS256,
20257 IX86_BUILTIN_EXTRACTF128SI256,
20258 IX86_BUILTIN_VZEROALL,
20259 IX86_BUILTIN_VZEROUPPER,
20260 IX86_BUILTIN_VZEROUPPER_REX64,
20261 IX86_BUILTIN_VPERMILVARPD,
20262 IX86_BUILTIN_VPERMILVARPS,
20263 IX86_BUILTIN_VPERMILVARPD256,
20264 IX86_BUILTIN_VPERMILVARPS256,
20265 IX86_BUILTIN_VPERMILPD,
20266 IX86_BUILTIN_VPERMILPS,
20267 IX86_BUILTIN_VPERMILPD256,
20268 IX86_BUILTIN_VPERMILPS256,
20269 IX86_BUILTIN_VPERM2F128PD256,
20270 IX86_BUILTIN_VPERM2F128PS256,
20271 IX86_BUILTIN_VPERM2F128SI256,
20272 IX86_BUILTIN_VBROADCASTSS,
20273 IX86_BUILTIN_VBROADCASTSD256,
20274 IX86_BUILTIN_VBROADCASTSS256,
20275 IX86_BUILTIN_VBROADCASTPD256,
20276 IX86_BUILTIN_VBROADCASTPS256,
20277 IX86_BUILTIN_VINSERTF128PD256,
20278 IX86_BUILTIN_VINSERTF128PS256,
20279 IX86_BUILTIN_VINSERTF128SI256,
20280 IX86_BUILTIN_LOADUPD256,
20281 IX86_BUILTIN_LOADUPS256,
20282 IX86_BUILTIN_STOREUPD256,
20283 IX86_BUILTIN_STOREUPS256,
20284 IX86_BUILTIN_LDDQU256,
20285 IX86_BUILTIN_MOVNTDQ256,
20286 IX86_BUILTIN_MOVNTPD256,
20287 IX86_BUILTIN_MOVNTPS256,
20288 IX86_BUILTIN_LOADDQU256,
20289 IX86_BUILTIN_STOREDQU256,
20290 IX86_BUILTIN_MASKLOADPD,
20291 IX86_BUILTIN_MASKLOADPS,
20292 IX86_BUILTIN_MASKSTOREPD,
20293 IX86_BUILTIN_MASKSTOREPS,
20294 IX86_BUILTIN_MASKLOADPD256,
20295 IX86_BUILTIN_MASKLOADPS256,
20296 IX86_BUILTIN_MASKSTOREPD256,
20297 IX86_BUILTIN_MASKSTOREPS256,
20298 IX86_BUILTIN_MOVSHDUP256,
20299 IX86_BUILTIN_MOVSLDUP256,
20300 IX86_BUILTIN_MOVDDUP256,
20302 IX86_BUILTIN_SQRTPD256,
20303 IX86_BUILTIN_SQRTPS256,
20304 IX86_BUILTIN_SQRTPS_NR256,
20305 IX86_BUILTIN_RSQRTPS256,
20306 IX86_BUILTIN_RSQRTPS_NR256,
20308 IX86_BUILTIN_RCPPS256,
20310 IX86_BUILTIN_ROUNDPD256,
20311 IX86_BUILTIN_ROUNDPS256,
20313 IX86_BUILTIN_UNPCKHPD256,
20314 IX86_BUILTIN_UNPCKLPD256,
20315 IX86_BUILTIN_UNPCKHPS256,
20316 IX86_BUILTIN_UNPCKLPS256,
20318 IX86_BUILTIN_SI256_SI,
20319 IX86_BUILTIN_PS256_PS,
20320 IX86_BUILTIN_PD256_PD,
20321 IX86_BUILTIN_SI_SI256,
20322 IX86_BUILTIN_PS_PS256,
20323 IX86_BUILTIN_PD_PD256,
20325 IX86_BUILTIN_VTESTZPD,
20326 IX86_BUILTIN_VTESTCPD,
20327 IX86_BUILTIN_VTESTNZCPD,
20328 IX86_BUILTIN_VTESTZPS,
20329 IX86_BUILTIN_VTESTCPS,
20330 IX86_BUILTIN_VTESTNZCPS,
20331 IX86_BUILTIN_VTESTZPD256,
20332 IX86_BUILTIN_VTESTCPD256,
20333 IX86_BUILTIN_VTESTNZCPD256,
20334 IX86_BUILTIN_VTESTZPS256,
20335 IX86_BUILTIN_VTESTCPS256,
20336 IX86_BUILTIN_VTESTNZCPS256,
20337 IX86_BUILTIN_PTESTZ256,
20338 IX86_BUILTIN_PTESTC256,
20339 IX86_BUILTIN_PTESTNZC256,
20341 IX86_BUILTIN_MOVMSKPD256,
20342 IX86_BUILTIN_MOVMSKPS256,
20344 /* TFmode support builtins. */
20346 IX86_BUILTIN_HUGE_VALQ,
20347 IX86_BUILTIN_FABSQ,
20348 IX86_BUILTIN_COPYSIGNQ,
20350 /* SSE5 instructions */
20351 IX86_BUILTIN_FMADDSS,
20352 IX86_BUILTIN_FMADDSD,
20353 IX86_BUILTIN_FMADDPS,
20354 IX86_BUILTIN_FMADDPD,
20355 IX86_BUILTIN_FMSUBSS,
20356 IX86_BUILTIN_FMSUBSD,
20357 IX86_BUILTIN_FMSUBPS,
20358 IX86_BUILTIN_FMSUBPD,
20359 IX86_BUILTIN_FNMADDSS,
20360 IX86_BUILTIN_FNMADDSD,
20361 IX86_BUILTIN_FNMADDPS,
20362 IX86_BUILTIN_FNMADDPD,
20363 IX86_BUILTIN_FNMSUBSS,
20364 IX86_BUILTIN_FNMSUBSD,
20365 IX86_BUILTIN_FNMSUBPS,
20366 IX86_BUILTIN_FNMSUBPD,
20367 IX86_BUILTIN_PCMOV,
20368 IX86_BUILTIN_PCMOV_V2DI,
20369 IX86_BUILTIN_PCMOV_V4SI,
20370 IX86_BUILTIN_PCMOV_V8HI,
20371 IX86_BUILTIN_PCMOV_V16QI,
20372 IX86_BUILTIN_PCMOV_V4SF,
20373 IX86_BUILTIN_PCMOV_V2DF,
20374 IX86_BUILTIN_PPERM,
20375 IX86_BUILTIN_PERMPS,
20376 IX86_BUILTIN_PERMPD,
20377 IX86_BUILTIN_PMACSSWW,
20378 IX86_BUILTIN_PMACSWW,
20379 IX86_BUILTIN_PMACSSWD,
20380 IX86_BUILTIN_PMACSWD,
20381 IX86_BUILTIN_PMACSSDD,
20382 IX86_BUILTIN_PMACSDD,
20383 IX86_BUILTIN_PMACSSDQL,
20384 IX86_BUILTIN_PMACSSDQH,
20385 IX86_BUILTIN_PMACSDQL,
20386 IX86_BUILTIN_PMACSDQH,
20387 IX86_BUILTIN_PMADCSSWD,
20388 IX86_BUILTIN_PMADCSWD,
20389 IX86_BUILTIN_PHADDBW,
20390 IX86_BUILTIN_PHADDBD,
20391 IX86_BUILTIN_PHADDBQ,
20392 IX86_BUILTIN_PHADDWD,
20393 IX86_BUILTIN_PHADDWQ,
20394 IX86_BUILTIN_PHADDDQ,
20395 IX86_BUILTIN_PHADDUBW,
20396 IX86_BUILTIN_PHADDUBD,
20397 IX86_BUILTIN_PHADDUBQ,
20398 IX86_BUILTIN_PHADDUWD,
20399 IX86_BUILTIN_PHADDUWQ,
20400 IX86_BUILTIN_PHADDUDQ,
20401 IX86_BUILTIN_PHSUBBW,
20402 IX86_BUILTIN_PHSUBWD,
20403 IX86_BUILTIN_PHSUBDQ,
20404 IX86_BUILTIN_PROTB,
20405 IX86_BUILTIN_PROTW,
20406 IX86_BUILTIN_PROTD,
20407 IX86_BUILTIN_PROTQ,
20408 IX86_BUILTIN_PROTB_IMM,
20409 IX86_BUILTIN_PROTW_IMM,
20410 IX86_BUILTIN_PROTD_IMM,
20411 IX86_BUILTIN_PROTQ_IMM,
20412 IX86_BUILTIN_PSHLB,
20413 IX86_BUILTIN_PSHLW,
20414 IX86_BUILTIN_PSHLD,
20415 IX86_BUILTIN_PSHLQ,
20416 IX86_BUILTIN_PSHAB,
20417 IX86_BUILTIN_PSHAW,
20418 IX86_BUILTIN_PSHAD,
20419 IX86_BUILTIN_PSHAQ,
20420 IX86_BUILTIN_FRCZSS,
20421 IX86_BUILTIN_FRCZSD,
20422 IX86_BUILTIN_FRCZPS,
20423 IX86_BUILTIN_FRCZPD,
20424 IX86_BUILTIN_CVTPH2PS,
20425 IX86_BUILTIN_CVTPS2PH,
20427 IX86_BUILTIN_COMEQSS,
20428 IX86_BUILTIN_COMNESS,
20429 IX86_BUILTIN_COMLTSS,
20430 IX86_BUILTIN_COMLESS,
20431 IX86_BUILTIN_COMGTSS,
20432 IX86_BUILTIN_COMGESS,
20433 IX86_BUILTIN_COMUEQSS,
20434 IX86_BUILTIN_COMUNESS,
20435 IX86_BUILTIN_COMULTSS,
20436 IX86_BUILTIN_COMULESS,
20437 IX86_BUILTIN_COMUGTSS,
20438 IX86_BUILTIN_COMUGESS,
20439 IX86_BUILTIN_COMORDSS,
20440 IX86_BUILTIN_COMUNORDSS,
20441 IX86_BUILTIN_COMFALSESS,
20442 IX86_BUILTIN_COMTRUESS,
20444 IX86_BUILTIN_COMEQSD,
20445 IX86_BUILTIN_COMNESD,
20446 IX86_BUILTIN_COMLTSD,
20447 IX86_BUILTIN_COMLESD,
20448 IX86_BUILTIN_COMGTSD,
20449 IX86_BUILTIN_COMGESD,
20450 IX86_BUILTIN_COMUEQSD,
20451 IX86_BUILTIN_COMUNESD,
20452 IX86_BUILTIN_COMULTSD,
20453 IX86_BUILTIN_COMULESD,
20454 IX86_BUILTIN_COMUGTSD,
20455 IX86_BUILTIN_COMUGESD,
20456 IX86_BUILTIN_COMORDSD,
20457 IX86_BUILTIN_COMUNORDSD,
20458 IX86_BUILTIN_COMFALSESD,
20459 IX86_BUILTIN_COMTRUESD,
20461 IX86_BUILTIN_COMEQPS,
20462 IX86_BUILTIN_COMNEPS,
20463 IX86_BUILTIN_COMLTPS,
20464 IX86_BUILTIN_COMLEPS,
20465 IX86_BUILTIN_COMGTPS,
20466 IX86_BUILTIN_COMGEPS,
20467 IX86_BUILTIN_COMUEQPS,
20468 IX86_BUILTIN_COMUNEPS,
20469 IX86_BUILTIN_COMULTPS,
20470 IX86_BUILTIN_COMULEPS,
20471 IX86_BUILTIN_COMUGTPS,
20472 IX86_BUILTIN_COMUGEPS,
20473 IX86_BUILTIN_COMORDPS,
20474 IX86_BUILTIN_COMUNORDPS,
20475 IX86_BUILTIN_COMFALSEPS,
20476 IX86_BUILTIN_COMTRUEPS,
20478 IX86_BUILTIN_COMEQPD,
20479 IX86_BUILTIN_COMNEPD,
20480 IX86_BUILTIN_COMLTPD,
20481 IX86_BUILTIN_COMLEPD,
20482 IX86_BUILTIN_COMGTPD,
20483 IX86_BUILTIN_COMGEPD,
20484 IX86_BUILTIN_COMUEQPD,
20485 IX86_BUILTIN_COMUNEPD,
20486 IX86_BUILTIN_COMULTPD,
20487 IX86_BUILTIN_COMULEPD,
20488 IX86_BUILTIN_COMUGTPD,
20489 IX86_BUILTIN_COMUGEPD,
20490 IX86_BUILTIN_COMORDPD,
20491 IX86_BUILTIN_COMUNORDPD,
20492 IX86_BUILTIN_COMFALSEPD,
20493 IX86_BUILTIN_COMTRUEPD,
20495 IX86_BUILTIN_PCOMEQUB,
20496 IX86_BUILTIN_PCOMNEUB,
20497 IX86_BUILTIN_PCOMLTUB,
20498 IX86_BUILTIN_PCOMLEUB,
20499 IX86_BUILTIN_PCOMGTUB,
20500 IX86_BUILTIN_PCOMGEUB,
20501 IX86_BUILTIN_PCOMFALSEUB,
20502 IX86_BUILTIN_PCOMTRUEUB,
20503 IX86_BUILTIN_PCOMEQUW,
20504 IX86_BUILTIN_PCOMNEUW,
20505 IX86_BUILTIN_PCOMLTUW,
20506 IX86_BUILTIN_PCOMLEUW,
20507 IX86_BUILTIN_PCOMGTUW,
20508 IX86_BUILTIN_PCOMGEUW,
20509 IX86_BUILTIN_PCOMFALSEUW,
20510 IX86_BUILTIN_PCOMTRUEUW,
20511 IX86_BUILTIN_PCOMEQUD,
20512 IX86_BUILTIN_PCOMNEUD,
20513 IX86_BUILTIN_PCOMLTUD,
20514 IX86_BUILTIN_PCOMLEUD,
20515 IX86_BUILTIN_PCOMGTUD,
20516 IX86_BUILTIN_PCOMGEUD,
20517 IX86_BUILTIN_PCOMFALSEUD,
20518 IX86_BUILTIN_PCOMTRUEUD,
20519 IX86_BUILTIN_PCOMEQUQ,
20520 IX86_BUILTIN_PCOMNEUQ,
20521 IX86_BUILTIN_PCOMLTUQ,
20522 IX86_BUILTIN_PCOMLEUQ,
20523 IX86_BUILTIN_PCOMGTUQ,
20524 IX86_BUILTIN_PCOMGEUQ,
20525 IX86_BUILTIN_PCOMFALSEUQ,
20526 IX86_BUILTIN_PCOMTRUEUQ,
20528 IX86_BUILTIN_PCOMEQB,
20529 IX86_BUILTIN_PCOMNEB,
20530 IX86_BUILTIN_PCOMLTB,
20531 IX86_BUILTIN_PCOMLEB,
20532 IX86_BUILTIN_PCOMGTB,
20533 IX86_BUILTIN_PCOMGEB,
20534 IX86_BUILTIN_PCOMFALSEB,
20535 IX86_BUILTIN_PCOMTRUEB,
20536 IX86_BUILTIN_PCOMEQW,
20537 IX86_BUILTIN_PCOMNEW,
20538 IX86_BUILTIN_PCOMLTW,
20539 IX86_BUILTIN_PCOMLEW,
20540 IX86_BUILTIN_PCOMGTW,
20541 IX86_BUILTIN_PCOMGEW,
20542 IX86_BUILTIN_PCOMFALSEW,
20543 IX86_BUILTIN_PCOMTRUEW,
20544 IX86_BUILTIN_PCOMEQD,
20545 IX86_BUILTIN_PCOMNED,
20546 IX86_BUILTIN_PCOMLTD,
20547 IX86_BUILTIN_PCOMLED,
20548 IX86_BUILTIN_PCOMGTD,
20549 IX86_BUILTIN_PCOMGED,
20550 IX86_BUILTIN_PCOMFALSED,
20551 IX86_BUILTIN_PCOMTRUED,
20552 IX86_BUILTIN_PCOMEQQ,
20553 IX86_BUILTIN_PCOMNEQ,
20554 IX86_BUILTIN_PCOMLTQ,
20555 IX86_BUILTIN_PCOMLEQ,
20556 IX86_BUILTIN_PCOMGTQ,
20557 IX86_BUILTIN_PCOMGEQ,
20558 IX86_BUILTIN_PCOMFALSEQ,
20559 IX86_BUILTIN_PCOMTRUEQ,
20564 /* Table for the ix86 builtin decls. */
20565 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20567 /* Table of all of the builtin functions that are possible with different ISA's
20568 but are waiting to be built until a function is declared to use that
20570 struct builtin_isa GTY(())
20572 tree type; /* builtin type to use in the declaration */
20573 const char *name; /* function name */
20574 int isa; /* isa_flags this builtin is defined for */
20575 bool const_p; /* true if the declaration is constant */
20578 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20581 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20582 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20583 * function decl in the ix86_builtins array. Returns the function decl or
20584 * NULL_TREE, if the builtin was not added.
20586 * If the front end has a special hook for builtin functions, delay adding
20587 * builtin functions that aren't in the current ISA until the ISA is changed
20588 * with function specific optimization. Doing so, can save about 300K for the
20589 * default compiler. When the builtin is expanded, check at that time whether
20592 * If the front end doesn't have a special hook, record all builtins, even if
20593 * it isn't an instruction set in the current ISA in case the user uses
20594 * function specific options for a different ISA, so that we don't get scope
20595 * errors if a builtin is added in the middle of a function scope. */
20598 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20600 tree decl = NULL_TREE;
20602 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20604 ix86_builtins_isa[(int) code].isa = mask;
20606 if ((mask & ix86_isa_flags) != 0
20607 || (lang_hooks.builtin_function
20608 == lang_hooks.builtin_function_ext_scope))
20611 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20613 ix86_builtins[(int) code] = decl;
20614 ix86_builtins_isa[(int) code].type = NULL_TREE;
20618 ix86_builtins[(int) code] = NULL_TREE;
20619 ix86_builtins_isa[(int) code].const_p = false;
20620 ix86_builtins_isa[(int) code].type = type;
20621 ix86_builtins_isa[(int) code].name = name;
20628 /* Like def_builtin, but also marks the function decl "const". */
20631 def_builtin_const (int mask, const char *name, tree type,
20632 enum ix86_builtins code)
20634 tree decl = def_builtin (mask, name, type, code);
20636 TREE_READONLY (decl) = 1;
20638 ix86_builtins_isa[(int) code].const_p = true;
20643 /* Add any new builtin functions for a given ISA that may not have been
20644 declared. This saves a bit of space compared to adding all of the
20645 declarations to the tree, even if we didn't use them. */
20648 ix86_add_new_builtins (int isa)
20653 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20655 if ((ix86_builtins_isa[i].isa & isa) != 0
20656 && ix86_builtins_isa[i].type != NULL_TREE)
20658 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20659 ix86_builtins_isa[i].type,
20660 i, BUILT_IN_MD, NULL,
20663 ix86_builtins[i] = decl;
20664 ix86_builtins_isa[i].type = NULL_TREE;
20665 if (ix86_builtins_isa[i].const_p)
20666 TREE_READONLY (decl) = 1;
20671 /* Bits for builtin_description.flag. */
20673 /* Set when we don't support the comparison natively, and should
20674 swap_comparison in order to support it. */
20675 #define BUILTIN_DESC_SWAP_OPERANDS 1
20677 struct builtin_description
20679 const unsigned int mask;
20680 const enum insn_code icode;
20681 const char *const name;
20682 const enum ix86_builtins code;
20683 const enum rtx_code comparison;
20687 static const struct builtin_description bdesc_comi[] =
20689 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20690 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20691 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20693 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20694 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20695 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20715 static const struct builtin_description bdesc_pcmpestr[] =
20718 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20719 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20720 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20721 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20722 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20723 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20724 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20727 static const struct builtin_description bdesc_pcmpistr[] =
20730 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20731 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20732 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20733 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20734 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20735 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20736 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20739 /* Special builtin types */
20740 enum ix86_special_builtin_type
20742 SPECIAL_FTYPE_UNKNOWN,
20744 V32QI_FTYPE_PCCHAR,
20745 V16QI_FTYPE_PCCHAR,
20747 V8SF_FTYPE_PCFLOAT,
20749 V4DF_FTYPE_PCDOUBLE,
20750 V4SF_FTYPE_PCFLOAT,
20751 V2DF_FTYPE_PCDOUBLE,
20752 V8SF_FTYPE_PCV8SF_V8SF,
20753 V4DF_FTYPE_PCV4DF_V4DF,
20754 V4SF_FTYPE_V4SF_PCV2SF,
20755 V4SF_FTYPE_PCV4SF_V4SF,
20756 V2DF_FTYPE_V2DF_PCDOUBLE,
20757 V2DF_FTYPE_PCV2DF_V2DF,
20759 VOID_FTYPE_PV2SF_V4SF,
20760 VOID_FTYPE_PV4DI_V4DI,
20761 VOID_FTYPE_PV2DI_V2DI,
20762 VOID_FTYPE_PCHAR_V32QI,
20763 VOID_FTYPE_PCHAR_V16QI,
20764 VOID_FTYPE_PFLOAT_V8SF,
20765 VOID_FTYPE_PFLOAT_V4SF,
20766 VOID_FTYPE_PDOUBLE_V4DF,
20767 VOID_FTYPE_PDOUBLE_V2DF,
20769 VOID_FTYPE_PINT_INT,
20770 VOID_FTYPE_PV8SF_V8SF_V8SF,
20771 VOID_FTYPE_PV4DF_V4DF_V4DF,
20772 VOID_FTYPE_PV4SF_V4SF_V4SF,
20773 VOID_FTYPE_PV2DF_V2DF_V2DF
20776 /* Builtin types */
20777 enum ix86_builtin_type
20780 FLOAT128_FTYPE_FLOAT128,
20782 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20783 INT_FTYPE_V8SF_V8SF_PTEST,
20784 INT_FTYPE_V4DI_V4DI_PTEST,
20785 INT_FTYPE_V4DF_V4DF_PTEST,
20786 INT_FTYPE_V4SF_V4SF_PTEST,
20787 INT_FTYPE_V2DI_V2DI_PTEST,
20788 INT_FTYPE_V2DF_V2DF_PTEST,
20820 V4SF_FTYPE_V4SF_VEC_MERGE,
20829 V2DF_FTYPE_V2DF_VEC_MERGE,
20840 V16QI_FTYPE_V16QI_V16QI,
20841 V16QI_FTYPE_V8HI_V8HI,
20842 V8QI_FTYPE_V8QI_V8QI,
20843 V8QI_FTYPE_V4HI_V4HI,
20844 V8HI_FTYPE_V8HI_V8HI,
20845 V8HI_FTYPE_V8HI_V8HI_COUNT,
20846 V8HI_FTYPE_V16QI_V16QI,
20847 V8HI_FTYPE_V4SI_V4SI,
20848 V8HI_FTYPE_V8HI_SI_COUNT,
20849 V8SF_FTYPE_V8SF_V8SF,
20850 V8SF_FTYPE_V8SF_V8SI,
20851 V4SI_FTYPE_V4SI_V4SI,
20852 V4SI_FTYPE_V4SI_V4SI_COUNT,
20853 V4SI_FTYPE_V8HI_V8HI,
20854 V4SI_FTYPE_V4SF_V4SF,
20855 V4SI_FTYPE_V2DF_V2DF,
20856 V4SI_FTYPE_V4SI_SI_COUNT,
20857 V4HI_FTYPE_V4HI_V4HI,
20858 V4HI_FTYPE_V4HI_V4HI_COUNT,
20859 V4HI_FTYPE_V8QI_V8QI,
20860 V4HI_FTYPE_V2SI_V2SI,
20861 V4HI_FTYPE_V4HI_SI_COUNT,
20862 V4DF_FTYPE_V4DF_V4DF,
20863 V4DF_FTYPE_V4DF_V4DI,
20864 V4SF_FTYPE_V4SF_V4SF,
20865 V4SF_FTYPE_V4SF_V4SF_SWAP,
20866 V4SF_FTYPE_V4SF_V4SI,
20867 V4SF_FTYPE_V4SF_V2SI,
20868 V4SF_FTYPE_V4SF_V2DF,
20869 V4SF_FTYPE_V4SF_DI,
20870 V4SF_FTYPE_V4SF_SI,
20871 V2DI_FTYPE_V2DI_V2DI,
20872 V2DI_FTYPE_V2DI_V2DI_COUNT,
20873 V2DI_FTYPE_V16QI_V16QI,
20874 V2DI_FTYPE_V4SI_V4SI,
20875 V2DI_FTYPE_V2DI_V16QI,
20876 V2DI_FTYPE_V2DF_V2DF,
20877 V2DI_FTYPE_V2DI_SI_COUNT,
20878 V2SI_FTYPE_V2SI_V2SI,
20879 V2SI_FTYPE_V2SI_V2SI_COUNT,
20880 V2SI_FTYPE_V4HI_V4HI,
20881 V2SI_FTYPE_V2SF_V2SF,
20882 V2SI_FTYPE_V2SI_SI_COUNT,
20883 V2DF_FTYPE_V2DF_V2DF,
20884 V2DF_FTYPE_V2DF_V2DF_SWAP,
20885 V2DF_FTYPE_V2DF_V4SF,
20886 V2DF_FTYPE_V2DF_V2DI,
20887 V2DF_FTYPE_V2DF_DI,
20888 V2DF_FTYPE_V2DF_SI,
20889 V2SF_FTYPE_V2SF_V2SF,
20890 V1DI_FTYPE_V1DI_V1DI,
20891 V1DI_FTYPE_V1DI_V1DI_COUNT,
20892 V1DI_FTYPE_V8QI_V8QI,
20893 V1DI_FTYPE_V2SI_V2SI,
20894 V1DI_FTYPE_V1DI_SI_COUNT,
20895 UINT64_FTYPE_UINT64_UINT64,
20896 UINT_FTYPE_UINT_UINT,
20897 UINT_FTYPE_UINT_USHORT,
20898 UINT_FTYPE_UINT_UCHAR,
20899 V8HI_FTYPE_V8HI_INT,
20900 V4SI_FTYPE_V4SI_INT,
20901 V4HI_FTYPE_V4HI_INT,
20902 V8SF_FTYPE_V8SF_INT,
20903 V4SI_FTYPE_V8SI_INT,
20904 V4SF_FTYPE_V8SF_INT,
20905 V2DF_FTYPE_V4DF_INT,
20906 V4DF_FTYPE_V4DF_INT,
20907 V4SF_FTYPE_V4SF_INT,
20908 V2DI_FTYPE_V2DI_INT,
20909 V2DI2TI_FTYPE_V2DI_INT,
20910 V2DF_FTYPE_V2DF_INT,
20911 V16QI_FTYPE_V16QI_V16QI_V16QI,
20912 V8SF_FTYPE_V8SF_V8SF_V8SF,
20913 V4DF_FTYPE_V4DF_V4DF_V4DF,
20914 V4SF_FTYPE_V4SF_V4SF_V4SF,
20915 V2DF_FTYPE_V2DF_V2DF_V2DF,
20916 V16QI_FTYPE_V16QI_V16QI_INT,
20917 V8SI_FTYPE_V8SI_V8SI_INT,
20918 V8SI_FTYPE_V8SI_V4SI_INT,
20919 V8HI_FTYPE_V8HI_V8HI_INT,
20920 V8SF_FTYPE_V8SF_V8SF_INT,
20921 V8SF_FTYPE_V8SF_V4SF_INT,
20922 V4SI_FTYPE_V4SI_V4SI_INT,
20923 V4DF_FTYPE_V4DF_V4DF_INT,
20924 V4DF_FTYPE_V4DF_V2DF_INT,
20925 V4SF_FTYPE_V4SF_V4SF_INT,
20926 V2DI_FTYPE_V2DI_V2DI_INT,
20927 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20928 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20929 V2DF_FTYPE_V2DF_V2DF_INT,
20930 V2DI_FTYPE_V2DI_UINT_UINT,
20931 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20934 /* Special builtins with variable number of arguments. */
20935 static const struct builtin_description bdesc_special_args[] =
20938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20941 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20953 /* SSE or 3DNow!A */
20954 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20955 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20972 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20978 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20979 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20984 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21014 /* Builtins with variable number of arguments. */
21015 static const struct builtin_description bdesc_args[] =
21018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21021 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21023 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21026 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21028 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21030 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21035 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21037 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21039 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21060 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21062 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21063 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21064 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21065 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21069 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21073 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21074 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21078 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21082 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21083 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21084 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21085 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21087 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21088 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21089 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21090 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21091 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21092 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21093 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21094 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21095 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21096 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21097 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21098 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21099 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21100 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21101 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21104 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21105 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21106 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21107 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21108 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21109 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21114 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21116 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21120 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21123 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21127 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21128 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21129 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21159 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21160 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21164 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21165 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21166 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21167 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21169 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21170 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21171 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21177 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21179 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21181 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21182 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21183 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21185 /* SSE MMX or 3Dnow!A */
21186 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21187 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21188 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21190 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21191 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21192 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21193 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21195 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21196 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21198 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21219 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21220 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21226 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21227 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21228 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21229 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21257 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21261 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21263 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21264 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21270 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21272 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21273 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21274 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21275 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21276 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21277 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21278 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21279 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21290 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21291 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21293 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21295 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21296 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21308 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21309 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21310 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21326 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21335 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21340 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21341 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21342 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21343 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21344 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21345 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21348 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21349 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21350 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21351 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21352 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21353 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21355 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21356 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21357 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21358 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21366 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21367 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21372 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21373 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21376 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21377 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21379 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21380 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21381 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21382 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21383 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21384 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21389 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21390 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21391 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21392 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21394 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21395 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21396 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21397 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21398 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21399 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21400 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21401 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21402 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21403 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21404 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21405 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21406 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21407 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21408 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21409 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21410 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21411 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21412 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21413 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21414 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21415 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21416 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21417 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21420 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21421 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21425 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21426 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21428 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21430 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21431 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21432 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21433 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21435 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21436 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21437 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21438 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21439 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21440 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21441 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21442 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21443 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21444 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21445 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21446 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21447 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21449 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21450 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21451 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21452 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21453 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21454 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21455 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21456 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21457 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21458 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21459 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21460 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21462 /* SSE4.1 and SSE5 */
21463 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21464 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21465 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21466 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21468 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21469 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21470 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21473 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21474 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21475 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21476 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21477 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21480 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21481 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21482 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21483 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21486 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21487 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21489 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21490 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21491 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21492 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21495 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21498 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21499 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21502 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21503 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21506 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21512 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21513 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21514 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21515 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21516 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21517 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21518 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21519 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21520 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21521 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21522 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21523 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21569 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21571 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21573 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21575 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21613 enum multi_arg_type {
21623 MULTI_ARG_3_PERMPS,
21624 MULTI_ARG_3_PERMPD,
21631 MULTI_ARG_2_DI_IMM,
21632 MULTI_ARG_2_SI_IMM,
21633 MULTI_ARG_2_HI_IMM,
21634 MULTI_ARG_2_QI_IMM,
21635 MULTI_ARG_2_SF_CMP,
21636 MULTI_ARG_2_DF_CMP,
21637 MULTI_ARG_2_DI_CMP,
21638 MULTI_ARG_2_SI_CMP,
21639 MULTI_ARG_2_HI_CMP,
21640 MULTI_ARG_2_QI_CMP,
21663 static const struct builtin_description bdesc_multi_arg[] =
21665 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21666 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21667 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21668 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21669 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21670 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21671 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21672 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21673 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21674 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21675 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21676 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21677 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21678 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21679 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21680 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21681 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21682 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21683 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21684 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21685 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21686 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21687 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21688 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21689 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21690 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21691 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21692 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21693 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21694 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21695 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21696 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21697 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21698 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21699 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21700 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21701 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21702 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21703 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21704 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21705 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21706 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21707 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21708 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21709 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21710 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21711 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21712 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21713 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21714 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21715 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21716 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21717 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21718 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21719 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21720 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21721 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21722 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21723 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21724 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21725 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21726 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21727 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21728 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21729 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21730 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21731 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21732 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21733 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21734 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21735 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21736 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21737 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21738 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21739 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21741 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21742 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21743 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21744 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21745 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21746 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21747 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21748 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21749 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21750 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21751 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21752 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21753 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21754 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21755 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21756 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21758 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21759 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21760 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21761 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21762 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21763 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21764 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21765 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21766 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21767 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21768 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21769 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21770 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21771 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21772 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21773 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21775 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21776 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21777 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21778 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21779 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21780 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21781 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21901 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21902 in the current target ISA to allow the user to compile particular modules
21903 with different target specific options that differ from the command line
21906 ix86_init_mmx_sse_builtins (void)
21908 const struct builtin_description * d;
21911 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21912 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21913 tree V1DI_type_node
21914 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21915 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21916 tree V2DI_type_node
21917 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21918 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21919 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21920 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21921 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21922 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21923 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21925 tree pchar_type_node = build_pointer_type (char_type_node);
21926 tree pcchar_type_node
21927 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21928 tree pfloat_type_node = build_pointer_type (float_type_node);
21929 tree pcfloat_type_node
21930 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21931 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21932 tree pcv2sf_type_node
21933 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21934 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21935 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21938 tree int_ftype_v4sf_v4sf
21939 = build_function_type_list (integer_type_node,
21940 V4SF_type_node, V4SF_type_node, NULL_TREE);
21941 tree v4si_ftype_v4sf_v4sf
21942 = build_function_type_list (V4SI_type_node,
21943 V4SF_type_node, V4SF_type_node, NULL_TREE);
21944 /* MMX/SSE/integer conversions. */
21945 tree int_ftype_v4sf
21946 = build_function_type_list (integer_type_node,
21947 V4SF_type_node, NULL_TREE);
21948 tree int64_ftype_v4sf
21949 = build_function_type_list (long_long_integer_type_node,
21950 V4SF_type_node, NULL_TREE);
21951 tree int_ftype_v8qi
21952 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21953 tree v4sf_ftype_v4sf_int
21954 = build_function_type_list (V4SF_type_node,
21955 V4SF_type_node, integer_type_node, NULL_TREE);
21956 tree v4sf_ftype_v4sf_int64
21957 = build_function_type_list (V4SF_type_node,
21958 V4SF_type_node, long_long_integer_type_node,
21960 tree v4sf_ftype_v4sf_v2si
21961 = build_function_type_list (V4SF_type_node,
21962 V4SF_type_node, V2SI_type_node, NULL_TREE);
21964 /* Miscellaneous. */
21965 tree v8qi_ftype_v4hi_v4hi
21966 = build_function_type_list (V8QI_type_node,
21967 V4HI_type_node, V4HI_type_node, NULL_TREE);
21968 tree v4hi_ftype_v2si_v2si
21969 = build_function_type_list (V4HI_type_node,
21970 V2SI_type_node, V2SI_type_node, NULL_TREE);
21971 tree v4sf_ftype_v4sf_v4sf_int
21972 = build_function_type_list (V4SF_type_node,
21973 V4SF_type_node, V4SF_type_node,
21974 integer_type_node, NULL_TREE);
21975 tree v2si_ftype_v4hi_v4hi
21976 = build_function_type_list (V2SI_type_node,
21977 V4HI_type_node, V4HI_type_node, NULL_TREE);
21978 tree v4hi_ftype_v4hi_int
21979 = build_function_type_list (V4HI_type_node,
21980 V4HI_type_node, integer_type_node, NULL_TREE);
21981 tree v2si_ftype_v2si_int
21982 = build_function_type_list (V2SI_type_node,
21983 V2SI_type_node, integer_type_node, NULL_TREE);
21984 tree v1di_ftype_v1di_int
21985 = build_function_type_list (V1DI_type_node,
21986 V1DI_type_node, integer_type_node, NULL_TREE);
21988 tree void_ftype_void
21989 = build_function_type (void_type_node, void_list_node);
21990 tree void_ftype_unsigned
21991 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21992 tree void_ftype_unsigned_unsigned
21993 = build_function_type_list (void_type_node, unsigned_type_node,
21994 unsigned_type_node, NULL_TREE);
21995 tree void_ftype_pcvoid_unsigned_unsigned
21996 = build_function_type_list (void_type_node, const_ptr_type_node,
21997 unsigned_type_node, unsigned_type_node,
21999 tree unsigned_ftype_void
22000 = build_function_type (unsigned_type_node, void_list_node);
22001 tree v2si_ftype_v4sf
22002 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22003 /* Loads/stores. */
22004 tree void_ftype_v8qi_v8qi_pchar
22005 = build_function_type_list (void_type_node,
22006 V8QI_type_node, V8QI_type_node,
22007 pchar_type_node, NULL_TREE);
22008 tree v4sf_ftype_pcfloat
22009 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22010 tree v4sf_ftype_v4sf_pcv2sf
22011 = build_function_type_list (V4SF_type_node,
22012 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22013 tree void_ftype_pv2sf_v4sf
22014 = build_function_type_list (void_type_node,
22015 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22016 tree void_ftype_pfloat_v4sf
22017 = build_function_type_list (void_type_node,
22018 pfloat_type_node, V4SF_type_node, NULL_TREE);
22019 tree void_ftype_pdi_di
22020 = build_function_type_list (void_type_node,
22021 pdi_type_node, long_long_unsigned_type_node,
22023 tree void_ftype_pv2di_v2di
22024 = build_function_type_list (void_type_node,
22025 pv2di_type_node, V2DI_type_node, NULL_TREE);
22026 /* Normal vector unops. */
22027 tree v4sf_ftype_v4sf
22028 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22029 tree v16qi_ftype_v16qi
22030 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22031 tree v8hi_ftype_v8hi
22032 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22033 tree v4si_ftype_v4si
22034 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22035 tree v8qi_ftype_v8qi
22036 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22037 tree v4hi_ftype_v4hi
22038 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22040 /* Normal vector binops. */
22041 tree v4sf_ftype_v4sf_v4sf
22042 = build_function_type_list (V4SF_type_node,
22043 V4SF_type_node, V4SF_type_node, NULL_TREE);
22044 tree v8qi_ftype_v8qi_v8qi
22045 = build_function_type_list (V8QI_type_node,
22046 V8QI_type_node, V8QI_type_node, NULL_TREE);
22047 tree v4hi_ftype_v4hi_v4hi
22048 = build_function_type_list (V4HI_type_node,
22049 V4HI_type_node, V4HI_type_node, NULL_TREE);
22050 tree v2si_ftype_v2si_v2si
22051 = build_function_type_list (V2SI_type_node,
22052 V2SI_type_node, V2SI_type_node, NULL_TREE);
22053 tree v1di_ftype_v1di_v1di
22054 = build_function_type_list (V1DI_type_node,
22055 V1DI_type_node, V1DI_type_node, NULL_TREE);
22056 tree v1di_ftype_v1di_v1di_int
22057 = build_function_type_list (V1DI_type_node,
22058 V1DI_type_node, V1DI_type_node,
22059 integer_type_node, NULL_TREE);
22060 tree v2si_ftype_v2sf
22061 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22062 tree v2sf_ftype_v2si
22063 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22064 tree v2si_ftype_v2si
22065 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22066 tree v2sf_ftype_v2sf
22067 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22068 tree v2sf_ftype_v2sf_v2sf
22069 = build_function_type_list (V2SF_type_node,
22070 V2SF_type_node, V2SF_type_node, NULL_TREE);
22071 tree v2si_ftype_v2sf_v2sf
22072 = build_function_type_list (V2SI_type_node,
22073 V2SF_type_node, V2SF_type_node, NULL_TREE);
22074 tree pint_type_node = build_pointer_type (integer_type_node);
22075 tree pdouble_type_node = build_pointer_type (double_type_node);
22076 tree pcdouble_type_node = build_pointer_type (
22077 build_type_variant (double_type_node, 1, 0));
22078 tree int_ftype_v2df_v2df
22079 = build_function_type_list (integer_type_node,
22080 V2DF_type_node, V2DF_type_node, NULL_TREE);
22082 tree void_ftype_pcvoid
22083 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22084 tree v4sf_ftype_v4si
22085 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22086 tree v4si_ftype_v4sf
22087 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22088 tree v2df_ftype_v4si
22089 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22090 tree v4si_ftype_v2df
22091 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22092 tree v4si_ftype_v2df_v2df
22093 = build_function_type_list (V4SI_type_node,
22094 V2DF_type_node, V2DF_type_node, NULL_TREE);
22095 tree v2si_ftype_v2df
22096 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22097 tree v4sf_ftype_v2df
22098 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22099 tree v2df_ftype_v2si
22100 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22101 tree v2df_ftype_v4sf
22102 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22103 tree int_ftype_v2df
22104 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22105 tree int64_ftype_v2df
22106 = build_function_type_list (long_long_integer_type_node,
22107 V2DF_type_node, NULL_TREE);
22108 tree v2df_ftype_v2df_int
22109 = build_function_type_list (V2DF_type_node,
22110 V2DF_type_node, integer_type_node, NULL_TREE);
22111 tree v2df_ftype_v2df_int64
22112 = build_function_type_list (V2DF_type_node,
22113 V2DF_type_node, long_long_integer_type_node,
22115 tree v4sf_ftype_v4sf_v2df
22116 = build_function_type_list (V4SF_type_node,
22117 V4SF_type_node, V2DF_type_node, NULL_TREE);
22118 tree v2df_ftype_v2df_v4sf
22119 = build_function_type_list (V2DF_type_node,
22120 V2DF_type_node, V4SF_type_node, NULL_TREE);
22121 tree v2df_ftype_v2df_v2df_int
22122 = build_function_type_list (V2DF_type_node,
22123 V2DF_type_node, V2DF_type_node,
22126 tree v2df_ftype_v2df_pcdouble
22127 = build_function_type_list (V2DF_type_node,
22128 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22129 tree void_ftype_pdouble_v2df
22130 = build_function_type_list (void_type_node,
22131 pdouble_type_node, V2DF_type_node, NULL_TREE);
22132 tree void_ftype_pint_int
22133 = build_function_type_list (void_type_node,
22134 pint_type_node, integer_type_node, NULL_TREE);
22135 tree void_ftype_v16qi_v16qi_pchar
22136 = build_function_type_list (void_type_node,
22137 V16QI_type_node, V16QI_type_node,
22138 pchar_type_node, NULL_TREE);
22139 tree v2df_ftype_pcdouble
22140 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22141 tree v2df_ftype_v2df_v2df
22142 = build_function_type_list (V2DF_type_node,
22143 V2DF_type_node, V2DF_type_node, NULL_TREE);
22144 tree v16qi_ftype_v16qi_v16qi
22145 = build_function_type_list (V16QI_type_node,
22146 V16QI_type_node, V16QI_type_node, NULL_TREE);
22147 tree v8hi_ftype_v8hi_v8hi
22148 = build_function_type_list (V8HI_type_node,
22149 V8HI_type_node, V8HI_type_node, NULL_TREE);
22150 tree v4si_ftype_v4si_v4si
22151 = build_function_type_list (V4SI_type_node,
22152 V4SI_type_node, V4SI_type_node, NULL_TREE);
22153 tree v2di_ftype_v2di_v2di
22154 = build_function_type_list (V2DI_type_node,
22155 V2DI_type_node, V2DI_type_node, NULL_TREE);
22156 tree v2di_ftype_v2df_v2df
22157 = build_function_type_list (V2DI_type_node,
22158 V2DF_type_node, V2DF_type_node, NULL_TREE);
22159 tree v2df_ftype_v2df
22160 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22161 tree v2di_ftype_v2di_int
22162 = build_function_type_list (V2DI_type_node,
22163 V2DI_type_node, integer_type_node, NULL_TREE);
22164 tree v2di_ftype_v2di_v2di_int
22165 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22166 V2DI_type_node, integer_type_node, NULL_TREE);
22167 tree v4si_ftype_v4si_int
22168 = build_function_type_list (V4SI_type_node,
22169 V4SI_type_node, integer_type_node, NULL_TREE);
22170 tree v8hi_ftype_v8hi_int
22171 = build_function_type_list (V8HI_type_node,
22172 V8HI_type_node, integer_type_node, NULL_TREE);
22173 tree v4si_ftype_v8hi_v8hi
22174 = build_function_type_list (V4SI_type_node,
22175 V8HI_type_node, V8HI_type_node, NULL_TREE);
22176 tree v1di_ftype_v8qi_v8qi
22177 = build_function_type_list (V1DI_type_node,
22178 V8QI_type_node, V8QI_type_node, NULL_TREE);
22179 tree v1di_ftype_v2si_v2si
22180 = build_function_type_list (V1DI_type_node,
22181 V2SI_type_node, V2SI_type_node, NULL_TREE);
22182 tree v2di_ftype_v16qi_v16qi
22183 = build_function_type_list (V2DI_type_node,
22184 V16QI_type_node, V16QI_type_node, NULL_TREE);
22185 tree v2di_ftype_v4si_v4si
22186 = build_function_type_list (V2DI_type_node,
22187 V4SI_type_node, V4SI_type_node, NULL_TREE);
22188 tree int_ftype_v16qi
22189 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22190 tree v16qi_ftype_pcchar
22191 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22192 tree void_ftype_pchar_v16qi
22193 = build_function_type_list (void_type_node,
22194 pchar_type_node, V16QI_type_node, NULL_TREE);
22196 tree v2di_ftype_v2di_unsigned_unsigned
22197 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22198 unsigned_type_node, unsigned_type_node,
22200 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22201 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22202 unsigned_type_node, unsigned_type_node,
22204 tree v2di_ftype_v2di_v16qi
22205 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22207 tree v2df_ftype_v2df_v2df_v2df
22208 = build_function_type_list (V2DF_type_node,
22209 V2DF_type_node, V2DF_type_node,
22210 V2DF_type_node, NULL_TREE);
22211 tree v4sf_ftype_v4sf_v4sf_v4sf
22212 = build_function_type_list (V4SF_type_node,
22213 V4SF_type_node, V4SF_type_node,
22214 V4SF_type_node, NULL_TREE);
22215 tree v8hi_ftype_v16qi
22216 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22218 tree v4si_ftype_v16qi
22219 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22221 tree v2di_ftype_v16qi
22222 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22224 tree v4si_ftype_v8hi
22225 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22227 tree v2di_ftype_v8hi
22228 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22230 tree v2di_ftype_v4si
22231 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22233 tree v2di_ftype_pv2di
22234 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22236 tree v16qi_ftype_v16qi_v16qi_int
22237 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22238 V16QI_type_node, integer_type_node,
22240 tree v16qi_ftype_v16qi_v16qi_v16qi
22241 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22242 V16QI_type_node, V16QI_type_node,
22244 tree v8hi_ftype_v8hi_v8hi_int
22245 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22246 V8HI_type_node, integer_type_node,
22248 tree v4si_ftype_v4si_v4si_int
22249 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22250 V4SI_type_node, integer_type_node,
22252 tree int_ftype_v2di_v2di
22253 = build_function_type_list (integer_type_node,
22254 V2DI_type_node, V2DI_type_node,
22256 tree int_ftype_v16qi_int_v16qi_int_int
22257 = build_function_type_list (integer_type_node,
22264 tree v16qi_ftype_v16qi_int_v16qi_int_int
22265 = build_function_type_list (V16QI_type_node,
22272 tree int_ftype_v16qi_v16qi_int
22273 = build_function_type_list (integer_type_node,
22279 /* SSE5 instructions */
22280 tree v2di_ftype_v2di_v2di_v2di
22281 = build_function_type_list (V2DI_type_node,
22287 tree v4si_ftype_v4si_v4si_v4si
22288 = build_function_type_list (V4SI_type_node,
22294 tree v4si_ftype_v4si_v4si_v2di
22295 = build_function_type_list (V4SI_type_node,
22301 tree v8hi_ftype_v8hi_v8hi_v8hi
22302 = build_function_type_list (V8HI_type_node,
22308 tree v8hi_ftype_v8hi_v8hi_v4si
22309 = build_function_type_list (V8HI_type_node,
22315 tree v2df_ftype_v2df_v2df_v16qi
22316 = build_function_type_list (V2DF_type_node,
22322 tree v4sf_ftype_v4sf_v4sf_v16qi
22323 = build_function_type_list (V4SF_type_node,
22329 tree v2di_ftype_v2di_si
22330 = build_function_type_list (V2DI_type_node,
22335 tree v4si_ftype_v4si_si
22336 = build_function_type_list (V4SI_type_node,
22341 tree v8hi_ftype_v8hi_si
22342 = build_function_type_list (V8HI_type_node,
22347 tree v16qi_ftype_v16qi_si
22348 = build_function_type_list (V16QI_type_node,
22352 tree v4sf_ftype_v4hi
22353 = build_function_type_list (V4SF_type_node,
22357 tree v4hi_ftype_v4sf
22358 = build_function_type_list (V4HI_type_node,
22362 tree v2di_ftype_v2di
22363 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22365 tree v16qi_ftype_v8hi_v8hi
22366 = build_function_type_list (V16QI_type_node,
22367 V8HI_type_node, V8HI_type_node,
22369 tree v8hi_ftype_v4si_v4si
22370 = build_function_type_list (V8HI_type_node,
22371 V4SI_type_node, V4SI_type_node,
22373 tree v8hi_ftype_v16qi_v16qi
22374 = build_function_type_list (V8HI_type_node,
22375 V16QI_type_node, V16QI_type_node,
22377 tree v4hi_ftype_v8qi_v8qi
22378 = build_function_type_list (V4HI_type_node,
22379 V8QI_type_node, V8QI_type_node,
22381 tree unsigned_ftype_unsigned_uchar
22382 = build_function_type_list (unsigned_type_node,
22383 unsigned_type_node,
22384 unsigned_char_type_node,
22386 tree unsigned_ftype_unsigned_ushort
22387 = build_function_type_list (unsigned_type_node,
22388 unsigned_type_node,
22389 short_unsigned_type_node,
22391 tree unsigned_ftype_unsigned_unsigned
22392 = build_function_type_list (unsigned_type_node,
22393 unsigned_type_node,
22394 unsigned_type_node,
22396 tree uint64_ftype_uint64_uint64
22397 = build_function_type_list (long_long_unsigned_type_node,
22398 long_long_unsigned_type_node,
22399 long_long_unsigned_type_node,
22401 tree float_ftype_float
22402 = build_function_type_list (float_type_node,
22407 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22409 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22411 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22413 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22415 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22417 tree v8sf_ftype_v8sf
22418 = build_function_type_list (V8SF_type_node,
22421 tree v8si_ftype_v8sf
22422 = build_function_type_list (V8SI_type_node,
22425 tree v8sf_ftype_v8si
22426 = build_function_type_list (V8SF_type_node,
22429 tree v4si_ftype_v4df
22430 = build_function_type_list (V4SI_type_node,
22433 tree v4df_ftype_v4df
22434 = build_function_type_list (V4DF_type_node,
22437 tree v4df_ftype_v4si
22438 = build_function_type_list (V4DF_type_node,
22441 tree v4df_ftype_v4sf
22442 = build_function_type_list (V4DF_type_node,
22445 tree v4sf_ftype_v4df
22446 = build_function_type_list (V4SF_type_node,
22449 tree v8sf_ftype_v8sf_v8sf
22450 = build_function_type_list (V8SF_type_node,
22451 V8SF_type_node, V8SF_type_node,
22453 tree v4df_ftype_v4df_v4df
22454 = build_function_type_list (V4DF_type_node,
22455 V4DF_type_node, V4DF_type_node,
22457 tree v8sf_ftype_v8sf_int
22458 = build_function_type_list (V8SF_type_node,
22459 V8SF_type_node, integer_type_node,
22461 tree v4si_ftype_v8si_int
22462 = build_function_type_list (V4SI_type_node,
22463 V8SI_type_node, integer_type_node,
22465 tree v4df_ftype_v4df_int
22466 = build_function_type_list (V4DF_type_node,
22467 V4DF_type_node, integer_type_node,
22469 tree v4sf_ftype_v8sf_int
22470 = build_function_type_list (V4SF_type_node,
22471 V8SF_type_node, integer_type_node,
22473 tree v2df_ftype_v4df_int
22474 = build_function_type_list (V2DF_type_node,
22475 V4DF_type_node, integer_type_node,
22477 tree v8sf_ftype_v8sf_v8sf_int
22478 = build_function_type_list (V8SF_type_node,
22479 V8SF_type_node, V8SF_type_node,
22482 tree v8sf_ftype_v8sf_v8sf_v8sf
22483 = build_function_type_list (V8SF_type_node,
22484 V8SF_type_node, V8SF_type_node,
22487 tree v4df_ftype_v4df_v4df_v4df
22488 = build_function_type_list (V4DF_type_node,
22489 V4DF_type_node, V4DF_type_node,
22492 tree v8si_ftype_v8si_v8si_int
22493 = build_function_type_list (V8SI_type_node,
22494 V8SI_type_node, V8SI_type_node,
22497 tree v4df_ftype_v4df_v4df_int
22498 = build_function_type_list (V4DF_type_node,
22499 V4DF_type_node, V4DF_type_node,
22502 tree v8sf_ftype_pcfloat
22503 = build_function_type_list (V8SF_type_node,
22506 tree v4df_ftype_pcdouble
22507 = build_function_type_list (V4DF_type_node,
22508 pcdouble_type_node,
22510 tree pcv4sf_type_node
22511 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22512 tree pcv2df_type_node
22513 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22514 tree v8sf_ftype_pcv4sf
22515 = build_function_type_list (V8SF_type_node,
22518 tree v4df_ftype_pcv2df
22519 = build_function_type_list (V4DF_type_node,
22522 tree v32qi_ftype_pcchar
22523 = build_function_type_list (V32QI_type_node,
22526 tree void_ftype_pchar_v32qi
22527 = build_function_type_list (void_type_node,
22528 pchar_type_node, V32QI_type_node,
22530 tree v8si_ftype_v8si_v4si_int
22531 = build_function_type_list (V8SI_type_node,
22532 V8SI_type_node, V4SI_type_node,
22535 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22536 tree void_ftype_pv4di_v4di
22537 = build_function_type_list (void_type_node,
22538 pv4di_type_node, V4DI_type_node,
22540 tree v8sf_ftype_v8sf_v4sf_int
22541 = build_function_type_list (V8SF_type_node,
22542 V8SF_type_node, V4SF_type_node,
22545 tree v4df_ftype_v4df_v2df_int
22546 = build_function_type_list (V4DF_type_node,
22547 V4DF_type_node, V2DF_type_node,
22550 tree void_ftype_pfloat_v8sf
22551 = build_function_type_list (void_type_node,
22552 pfloat_type_node, V8SF_type_node,
22554 tree void_ftype_pdouble_v4df
22555 = build_function_type_list (void_type_node,
22556 pdouble_type_node, V4DF_type_node,
22558 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22559 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22560 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22561 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22562 tree pcv8sf_type_node
22563 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22564 tree pcv4df_type_node
22565 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22566 tree v8sf_ftype_pcv8sf_v8sf
22567 = build_function_type_list (V8SF_type_node,
22568 pcv8sf_type_node, V8SF_type_node,
22570 tree v4df_ftype_pcv4df_v4df
22571 = build_function_type_list (V4DF_type_node,
22572 pcv4df_type_node, V4DF_type_node,
22574 tree v4sf_ftype_pcv4sf_v4sf
22575 = build_function_type_list (V4SF_type_node,
22576 pcv4sf_type_node, V4SF_type_node,
22578 tree v2df_ftype_pcv2df_v2df
22579 = build_function_type_list (V2DF_type_node,
22580 pcv2df_type_node, V2DF_type_node,
22582 tree void_ftype_pv8sf_v8sf_v8sf
22583 = build_function_type_list (void_type_node,
22584 pv8sf_type_node, V8SF_type_node,
22587 tree void_ftype_pv4df_v4df_v4df
22588 = build_function_type_list (void_type_node,
22589 pv4df_type_node, V4DF_type_node,
22592 tree void_ftype_pv4sf_v4sf_v4sf
22593 = build_function_type_list (void_type_node,
22594 pv4sf_type_node, V4SF_type_node,
22597 tree void_ftype_pv2df_v2df_v2df
22598 = build_function_type_list (void_type_node,
22599 pv2df_type_node, V2DF_type_node,
22602 tree v4df_ftype_v2df
22603 = build_function_type_list (V4DF_type_node,
22606 tree v8sf_ftype_v4sf
22607 = build_function_type_list (V8SF_type_node,
22610 tree v8si_ftype_v4si
22611 = build_function_type_list (V8SI_type_node,
22614 tree v2df_ftype_v4df
22615 = build_function_type_list (V2DF_type_node,
22618 tree v4sf_ftype_v8sf
22619 = build_function_type_list (V4SF_type_node,
22622 tree v4si_ftype_v8si
22623 = build_function_type_list (V4SI_type_node,
22626 tree int_ftype_v4df
22627 = build_function_type_list (integer_type_node,
22630 tree int_ftype_v8sf
22631 = build_function_type_list (integer_type_node,
22634 tree int_ftype_v8sf_v8sf
22635 = build_function_type_list (integer_type_node,
22636 V8SF_type_node, V8SF_type_node,
22638 tree int_ftype_v4di_v4di
22639 = build_function_type_list (integer_type_node,
22640 V4DI_type_node, V4DI_type_node,
22642 tree int_ftype_v4df_v4df
22643 = build_function_type_list (integer_type_node,
22644 V4DF_type_node, V4DF_type_node,
22646 tree v8sf_ftype_v8sf_v8si
22647 = build_function_type_list (V8SF_type_node,
22648 V8SF_type_node, V8SI_type_node,
22650 tree v4df_ftype_v4df_v4di
22651 = build_function_type_list (V4DF_type_node,
22652 V4DF_type_node, V4DI_type_node,
22654 tree v4sf_ftype_v4sf_v4si
22655 = build_function_type_list (V4SF_type_node,
22656 V4SF_type_node, V4SI_type_node, NULL_TREE);
22657 tree v2df_ftype_v2df_v2di
22658 = build_function_type_list (V2DF_type_node,
22659 V2DF_type_node, V2DI_type_node, NULL_TREE);
22663 /* Add all special builtins with variable number of operands. */
22664 for (i = 0, d = bdesc_special_args;
22665 i < ARRAY_SIZE (bdesc_special_args);
22673 switch ((enum ix86_special_builtin_type) d->flag)
22675 case VOID_FTYPE_VOID:
22676 type = void_ftype_void;
22678 case V32QI_FTYPE_PCCHAR:
22679 type = v32qi_ftype_pcchar;
22681 case V16QI_FTYPE_PCCHAR:
22682 type = v16qi_ftype_pcchar;
22684 case V8SF_FTYPE_PCV4SF:
22685 type = v8sf_ftype_pcv4sf;
22687 case V8SF_FTYPE_PCFLOAT:
22688 type = v8sf_ftype_pcfloat;
22690 case V4DF_FTYPE_PCV2DF:
22691 type = v4df_ftype_pcv2df;
22693 case V4DF_FTYPE_PCDOUBLE:
22694 type = v4df_ftype_pcdouble;
22696 case V4SF_FTYPE_PCFLOAT:
22697 type = v4sf_ftype_pcfloat;
22699 case V2DI_FTYPE_PV2DI:
22700 type = v2di_ftype_pv2di;
22702 case V2DF_FTYPE_PCDOUBLE:
22703 type = v2df_ftype_pcdouble;
22705 case V8SF_FTYPE_PCV8SF_V8SF:
22706 type = v8sf_ftype_pcv8sf_v8sf;
22708 case V4DF_FTYPE_PCV4DF_V4DF:
22709 type = v4df_ftype_pcv4df_v4df;
22711 case V4SF_FTYPE_V4SF_PCV2SF:
22712 type = v4sf_ftype_v4sf_pcv2sf;
22714 case V4SF_FTYPE_PCV4SF_V4SF:
22715 type = v4sf_ftype_pcv4sf_v4sf;
22717 case V2DF_FTYPE_V2DF_PCDOUBLE:
22718 type = v2df_ftype_v2df_pcdouble;
22720 case V2DF_FTYPE_PCV2DF_V2DF:
22721 type = v2df_ftype_pcv2df_v2df;
22723 case VOID_FTYPE_PV2SF_V4SF:
22724 type = void_ftype_pv2sf_v4sf;
22726 case VOID_FTYPE_PV4DI_V4DI:
22727 type = void_ftype_pv4di_v4di;
22729 case VOID_FTYPE_PV2DI_V2DI:
22730 type = void_ftype_pv2di_v2di;
22732 case VOID_FTYPE_PCHAR_V32QI:
22733 type = void_ftype_pchar_v32qi;
22735 case VOID_FTYPE_PCHAR_V16QI:
22736 type = void_ftype_pchar_v16qi;
22738 case VOID_FTYPE_PFLOAT_V8SF:
22739 type = void_ftype_pfloat_v8sf;
22741 case VOID_FTYPE_PFLOAT_V4SF:
22742 type = void_ftype_pfloat_v4sf;
22744 case VOID_FTYPE_PDOUBLE_V4DF:
22745 type = void_ftype_pdouble_v4df;
22747 case VOID_FTYPE_PDOUBLE_V2DF:
22748 type = void_ftype_pdouble_v2df;
22750 case VOID_FTYPE_PDI_DI:
22751 type = void_ftype_pdi_di;
22753 case VOID_FTYPE_PINT_INT:
22754 type = void_ftype_pint_int;
22756 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22757 type = void_ftype_pv8sf_v8sf_v8sf;
22759 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22760 type = void_ftype_pv4df_v4df_v4df;
22762 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22763 type = void_ftype_pv4sf_v4sf_v4sf;
22765 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22766 type = void_ftype_pv2df_v2df_v2df;
22769 gcc_unreachable ();
22772 def_builtin (d->mask, d->name, type, d->code);
22775 /* Add all builtins with variable number of operands. */
22776 for (i = 0, d = bdesc_args;
22777 i < ARRAY_SIZE (bdesc_args);
22785 switch ((enum ix86_builtin_type) d->flag)
22787 case FLOAT_FTYPE_FLOAT:
22788 type = float_ftype_float;
22790 case INT_FTYPE_V8SF_V8SF_PTEST:
22791 type = int_ftype_v8sf_v8sf;
22793 case INT_FTYPE_V4DI_V4DI_PTEST:
22794 type = int_ftype_v4di_v4di;
22796 case INT_FTYPE_V4DF_V4DF_PTEST:
22797 type = int_ftype_v4df_v4df;
22799 case INT_FTYPE_V4SF_V4SF_PTEST:
22800 type = int_ftype_v4sf_v4sf;
22802 case INT_FTYPE_V2DI_V2DI_PTEST:
22803 type = int_ftype_v2di_v2di;
22805 case INT_FTYPE_V2DF_V2DF_PTEST:
22806 type = int_ftype_v2df_v2df;
22808 case INT64_FTYPE_V4SF:
22809 type = int64_ftype_v4sf;
22811 case INT64_FTYPE_V2DF:
22812 type = int64_ftype_v2df;
22814 case INT_FTYPE_V16QI:
22815 type = int_ftype_v16qi;
22817 case INT_FTYPE_V8QI:
22818 type = int_ftype_v8qi;
22820 case INT_FTYPE_V8SF:
22821 type = int_ftype_v8sf;
22823 case INT_FTYPE_V4DF:
22824 type = int_ftype_v4df;
22826 case INT_FTYPE_V4SF:
22827 type = int_ftype_v4sf;
22829 case INT_FTYPE_V2DF:
22830 type = int_ftype_v2df;
22832 case V16QI_FTYPE_V16QI:
22833 type = v16qi_ftype_v16qi;
22835 case V8SI_FTYPE_V8SF:
22836 type = v8si_ftype_v8sf;
22838 case V8SI_FTYPE_V4SI:
22839 type = v8si_ftype_v4si;
22841 case V8HI_FTYPE_V8HI:
22842 type = v8hi_ftype_v8hi;
22844 case V8HI_FTYPE_V16QI:
22845 type = v8hi_ftype_v16qi;
22847 case V8QI_FTYPE_V8QI:
22848 type = v8qi_ftype_v8qi;
22850 case V8SF_FTYPE_V8SF:
22851 type = v8sf_ftype_v8sf;
22853 case V8SF_FTYPE_V8SI:
22854 type = v8sf_ftype_v8si;
22856 case V8SF_FTYPE_V4SF:
22857 type = v8sf_ftype_v4sf;
22859 case V4SI_FTYPE_V4DF:
22860 type = v4si_ftype_v4df;
22862 case V4SI_FTYPE_V4SI:
22863 type = v4si_ftype_v4si;
22865 case V4SI_FTYPE_V16QI:
22866 type = v4si_ftype_v16qi;
22868 case V4SI_FTYPE_V8SI:
22869 type = v4si_ftype_v8si;
22871 case V4SI_FTYPE_V8HI:
22872 type = v4si_ftype_v8hi;
22874 case V4SI_FTYPE_V4SF:
22875 type = v4si_ftype_v4sf;
22877 case V4SI_FTYPE_V2DF:
22878 type = v4si_ftype_v2df;
22880 case V4HI_FTYPE_V4HI:
22881 type = v4hi_ftype_v4hi;
22883 case V4DF_FTYPE_V4DF:
22884 type = v4df_ftype_v4df;
22886 case V4DF_FTYPE_V4SI:
22887 type = v4df_ftype_v4si;
22889 case V4DF_FTYPE_V4SF:
22890 type = v4df_ftype_v4sf;
22892 case V4DF_FTYPE_V2DF:
22893 type = v4df_ftype_v2df;
22895 case V4SF_FTYPE_V4SF:
22896 case V4SF_FTYPE_V4SF_VEC_MERGE:
22897 type = v4sf_ftype_v4sf;
22899 case V4SF_FTYPE_V8SF:
22900 type = v4sf_ftype_v8sf;
22902 case V4SF_FTYPE_V4SI:
22903 type = v4sf_ftype_v4si;
22905 case V4SF_FTYPE_V4DF:
22906 type = v4sf_ftype_v4df;
22908 case V4SF_FTYPE_V2DF:
22909 type = v4sf_ftype_v2df;
22911 case V2DI_FTYPE_V2DI:
22912 type = v2di_ftype_v2di;
22914 case V2DI_FTYPE_V16QI:
22915 type = v2di_ftype_v16qi;
22917 case V2DI_FTYPE_V8HI:
22918 type = v2di_ftype_v8hi;
22920 case V2DI_FTYPE_V4SI:
22921 type = v2di_ftype_v4si;
22923 case V2SI_FTYPE_V2SI:
22924 type = v2si_ftype_v2si;
22926 case V2SI_FTYPE_V4SF:
22927 type = v2si_ftype_v4sf;
22929 case V2SI_FTYPE_V2DF:
22930 type = v2si_ftype_v2df;
22932 case V2SI_FTYPE_V2SF:
22933 type = v2si_ftype_v2sf;
22935 case V2DF_FTYPE_V4DF:
22936 type = v2df_ftype_v4df;
22938 case V2DF_FTYPE_V4SF:
22939 type = v2df_ftype_v4sf;
22941 case V2DF_FTYPE_V2DF:
22942 case V2DF_FTYPE_V2DF_VEC_MERGE:
22943 type = v2df_ftype_v2df;
22945 case V2DF_FTYPE_V2SI:
22946 type = v2df_ftype_v2si;
22948 case V2DF_FTYPE_V4SI:
22949 type = v2df_ftype_v4si;
22951 case V2SF_FTYPE_V2SF:
22952 type = v2sf_ftype_v2sf;
22954 case V2SF_FTYPE_V2SI:
22955 type = v2sf_ftype_v2si;
22957 case V16QI_FTYPE_V16QI_V16QI:
22958 type = v16qi_ftype_v16qi_v16qi;
22960 case V16QI_FTYPE_V8HI_V8HI:
22961 type = v16qi_ftype_v8hi_v8hi;
22963 case V8QI_FTYPE_V8QI_V8QI:
22964 type = v8qi_ftype_v8qi_v8qi;
22966 case V8QI_FTYPE_V4HI_V4HI:
22967 type = v8qi_ftype_v4hi_v4hi;
22969 case V8HI_FTYPE_V8HI_V8HI:
22970 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22971 type = v8hi_ftype_v8hi_v8hi;
22973 case V8HI_FTYPE_V16QI_V16QI:
22974 type = v8hi_ftype_v16qi_v16qi;
22976 case V8HI_FTYPE_V4SI_V4SI:
22977 type = v8hi_ftype_v4si_v4si;
22979 case V8HI_FTYPE_V8HI_SI_COUNT:
22980 type = v8hi_ftype_v8hi_int;
22982 case V8SF_FTYPE_V8SF_V8SF:
22983 type = v8sf_ftype_v8sf_v8sf;
22985 case V8SF_FTYPE_V8SF_V8SI:
22986 type = v8sf_ftype_v8sf_v8si;
22988 case V4SI_FTYPE_V4SI_V4SI:
22989 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22990 type = v4si_ftype_v4si_v4si;
22992 case V4SI_FTYPE_V8HI_V8HI:
22993 type = v4si_ftype_v8hi_v8hi;
22995 case V4SI_FTYPE_V4SF_V4SF:
22996 type = v4si_ftype_v4sf_v4sf;
22998 case V4SI_FTYPE_V2DF_V2DF:
22999 type = v4si_ftype_v2df_v2df;
23001 case V4SI_FTYPE_V4SI_SI_COUNT:
23002 type = v4si_ftype_v4si_int;
23004 case V4HI_FTYPE_V4HI_V4HI:
23005 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23006 type = v4hi_ftype_v4hi_v4hi;
23008 case V4HI_FTYPE_V8QI_V8QI:
23009 type = v4hi_ftype_v8qi_v8qi;
23011 case V4HI_FTYPE_V2SI_V2SI:
23012 type = v4hi_ftype_v2si_v2si;
23014 case V4HI_FTYPE_V4HI_SI_COUNT:
23015 type = v4hi_ftype_v4hi_int;
23017 case V4DF_FTYPE_V4DF_V4DF:
23018 type = v4df_ftype_v4df_v4df;
23020 case V4DF_FTYPE_V4DF_V4DI:
23021 type = v4df_ftype_v4df_v4di;
23023 case V4SF_FTYPE_V4SF_V4SF:
23024 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23025 type = v4sf_ftype_v4sf_v4sf;
23027 case V4SF_FTYPE_V4SF_V4SI:
23028 type = v4sf_ftype_v4sf_v4si;
23030 case V4SF_FTYPE_V4SF_V2SI:
23031 type = v4sf_ftype_v4sf_v2si;
23033 case V4SF_FTYPE_V4SF_V2DF:
23034 type = v4sf_ftype_v4sf_v2df;
23036 case V4SF_FTYPE_V4SF_DI:
23037 type = v4sf_ftype_v4sf_int64;
23039 case V4SF_FTYPE_V4SF_SI:
23040 type = v4sf_ftype_v4sf_int;
23042 case V2DI_FTYPE_V2DI_V2DI:
23043 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23044 type = v2di_ftype_v2di_v2di;
23046 case V2DI_FTYPE_V16QI_V16QI:
23047 type = v2di_ftype_v16qi_v16qi;
23049 case V2DI_FTYPE_V4SI_V4SI:
23050 type = v2di_ftype_v4si_v4si;
23052 case V2DI_FTYPE_V2DI_V16QI:
23053 type = v2di_ftype_v2di_v16qi;
23055 case V2DI_FTYPE_V2DF_V2DF:
23056 type = v2di_ftype_v2df_v2df;
23058 case V2DI_FTYPE_V2DI_SI_COUNT:
23059 type = v2di_ftype_v2di_int;
23061 case V2SI_FTYPE_V2SI_V2SI:
23062 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23063 type = v2si_ftype_v2si_v2si;
23065 case V2SI_FTYPE_V4HI_V4HI:
23066 type = v2si_ftype_v4hi_v4hi;
23068 case V2SI_FTYPE_V2SF_V2SF:
23069 type = v2si_ftype_v2sf_v2sf;
23071 case V2SI_FTYPE_V2SI_SI_COUNT:
23072 type = v2si_ftype_v2si_int;
23074 case V2DF_FTYPE_V2DF_V2DF:
23075 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23076 type = v2df_ftype_v2df_v2df;
23078 case V2DF_FTYPE_V2DF_V4SF:
23079 type = v2df_ftype_v2df_v4sf;
23081 case V2DF_FTYPE_V2DF_V2DI:
23082 type = v2df_ftype_v2df_v2di;
23084 case V2DF_FTYPE_V2DF_DI:
23085 type = v2df_ftype_v2df_int64;
23087 case V2DF_FTYPE_V2DF_SI:
23088 type = v2df_ftype_v2df_int;
23090 case V2SF_FTYPE_V2SF_V2SF:
23091 type = v2sf_ftype_v2sf_v2sf;
23093 case V1DI_FTYPE_V1DI_V1DI:
23094 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23095 type = v1di_ftype_v1di_v1di;
23097 case V1DI_FTYPE_V8QI_V8QI:
23098 type = v1di_ftype_v8qi_v8qi;
23100 case V1DI_FTYPE_V2SI_V2SI:
23101 type = v1di_ftype_v2si_v2si;
23103 case V1DI_FTYPE_V1DI_SI_COUNT:
23104 type = v1di_ftype_v1di_int;
23106 case UINT64_FTYPE_UINT64_UINT64:
23107 type = uint64_ftype_uint64_uint64;
23109 case UINT_FTYPE_UINT_UINT:
23110 type = unsigned_ftype_unsigned_unsigned;
23112 case UINT_FTYPE_UINT_USHORT:
23113 type = unsigned_ftype_unsigned_ushort;
23115 case UINT_FTYPE_UINT_UCHAR:
23116 type = unsigned_ftype_unsigned_uchar;
23118 case V8HI_FTYPE_V8HI_INT:
23119 type = v8hi_ftype_v8hi_int;
23121 case V8SF_FTYPE_V8SF_INT:
23122 type = v8sf_ftype_v8sf_int;
23124 case V4SI_FTYPE_V4SI_INT:
23125 type = v4si_ftype_v4si_int;
23127 case V4SI_FTYPE_V8SI_INT:
23128 type = v4si_ftype_v8si_int;
23130 case V4HI_FTYPE_V4HI_INT:
23131 type = v4hi_ftype_v4hi_int;
23133 case V4DF_FTYPE_V4DF_INT:
23134 type = v4df_ftype_v4df_int;
23136 case V4SF_FTYPE_V4SF_INT:
23137 type = v4sf_ftype_v4sf_int;
23139 case V4SF_FTYPE_V8SF_INT:
23140 type = v4sf_ftype_v8sf_int;
23142 case V2DI_FTYPE_V2DI_INT:
23143 case V2DI2TI_FTYPE_V2DI_INT:
23144 type = v2di_ftype_v2di_int;
23146 case V2DF_FTYPE_V2DF_INT:
23147 type = v2df_ftype_v2df_int;
23149 case V2DF_FTYPE_V4DF_INT:
23150 type = v2df_ftype_v4df_int;
23152 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23153 type = v16qi_ftype_v16qi_v16qi_v16qi;
23155 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23156 type = v8sf_ftype_v8sf_v8sf_v8sf;
23158 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23159 type = v4df_ftype_v4df_v4df_v4df;
23161 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23162 type = v4sf_ftype_v4sf_v4sf_v4sf;
23164 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23165 type = v2df_ftype_v2df_v2df_v2df;
23167 case V16QI_FTYPE_V16QI_V16QI_INT:
23168 type = v16qi_ftype_v16qi_v16qi_int;
23170 case V8SI_FTYPE_V8SI_V8SI_INT:
23171 type = v8si_ftype_v8si_v8si_int;
23173 case V8SI_FTYPE_V8SI_V4SI_INT:
23174 type = v8si_ftype_v8si_v4si_int;
23176 case V8HI_FTYPE_V8HI_V8HI_INT:
23177 type = v8hi_ftype_v8hi_v8hi_int;
23179 case V8SF_FTYPE_V8SF_V8SF_INT:
23180 type = v8sf_ftype_v8sf_v8sf_int;
23182 case V8SF_FTYPE_V8SF_V4SF_INT:
23183 type = v8sf_ftype_v8sf_v4sf_int;
23185 case V4SI_FTYPE_V4SI_V4SI_INT:
23186 type = v4si_ftype_v4si_v4si_int;
23188 case V4DF_FTYPE_V4DF_V4DF_INT:
23189 type = v4df_ftype_v4df_v4df_int;
23191 case V4DF_FTYPE_V4DF_V2DF_INT:
23192 type = v4df_ftype_v4df_v2df_int;
23194 case V4SF_FTYPE_V4SF_V4SF_INT:
23195 type = v4sf_ftype_v4sf_v4sf_int;
23197 case V2DI_FTYPE_V2DI_V2DI_INT:
23198 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23199 type = v2di_ftype_v2di_v2di_int;
23201 case V2DF_FTYPE_V2DF_V2DF_INT:
23202 type = v2df_ftype_v2df_v2df_int;
23204 case V2DI_FTYPE_V2DI_UINT_UINT:
23205 type = v2di_ftype_v2di_unsigned_unsigned;
23207 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23208 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23210 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23211 type = v1di_ftype_v1di_v1di_int;
23214 gcc_unreachable ();
23217 def_builtin_const (d->mask, d->name, type, d->code);
23220 /* pcmpestr[im] insns. */
23221 for (i = 0, d = bdesc_pcmpestr;
23222 i < ARRAY_SIZE (bdesc_pcmpestr);
23225 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23226 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23228 ftype = int_ftype_v16qi_int_v16qi_int_int;
23229 def_builtin_const (d->mask, d->name, ftype, d->code);
23232 /* pcmpistr[im] insns. */
23233 for (i = 0, d = bdesc_pcmpistr;
23234 i < ARRAY_SIZE (bdesc_pcmpistr);
23237 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23238 ftype = v16qi_ftype_v16qi_v16qi_int;
23240 ftype = int_ftype_v16qi_v16qi_int;
23241 def_builtin_const (d->mask, d->name, ftype, d->code);
23244 /* comi/ucomi insns. */
23245 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23246 if (d->mask == OPTION_MASK_ISA_SSE2)
23247 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23249 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23252 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23253 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23255 /* SSE or 3DNow!A */
23256 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23259 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23261 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23262 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23265 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23266 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23269 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23270 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23271 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23272 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23273 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23274 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23277 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23280 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23281 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23283 /* Access to the vec_init patterns. */
23284 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23285 integer_type_node, NULL_TREE);
23286 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23288 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23289 short_integer_type_node,
23290 short_integer_type_node,
23291 short_integer_type_node, NULL_TREE);
23292 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23294 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23295 char_type_node, char_type_node,
23296 char_type_node, char_type_node,
23297 char_type_node, char_type_node,
23298 char_type_node, NULL_TREE);
23299 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23301 /* Access to the vec_extract patterns. */
23302 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23303 integer_type_node, NULL_TREE);
23304 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23306 ftype = build_function_type_list (long_long_integer_type_node,
23307 V2DI_type_node, integer_type_node,
23309 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23311 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23312 integer_type_node, NULL_TREE);
23313 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23315 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23316 integer_type_node, NULL_TREE);
23317 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23319 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23320 integer_type_node, NULL_TREE);
23321 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23323 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23324 integer_type_node, NULL_TREE);
23325 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23327 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23328 integer_type_node, NULL_TREE);
23329 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23331 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23332 integer_type_node, NULL_TREE);
23333 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23335 /* Access to the vec_set patterns. */
23336 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23338 integer_type_node, NULL_TREE);
23339 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23341 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23343 integer_type_node, NULL_TREE);
23344 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23346 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23348 integer_type_node, NULL_TREE);
23349 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23351 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23353 integer_type_node, NULL_TREE);
23354 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23356 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23358 integer_type_node, NULL_TREE);
23359 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23361 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23363 integer_type_node, NULL_TREE);
23364 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23366 /* Add SSE5 multi-arg argument instructions */
23367 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23369 tree mtype = NULL_TREE;
23374 switch ((enum multi_arg_type)d->flag)
23376 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23377 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23378 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23379 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23380 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23381 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23382 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23383 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23384 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23385 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23386 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23387 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23388 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23389 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23390 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23391 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23392 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23393 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23394 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23395 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23396 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23397 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23398 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23399 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23400 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23401 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23402 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23403 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23404 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23405 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23406 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23407 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23408 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23409 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23410 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23411 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23412 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23413 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23414 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23415 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23416 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23417 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23418 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23419 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23420 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23421 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23422 case MULTI_ARG_UNKNOWN:
23424 gcc_unreachable ();
23428 def_builtin_const (d->mask, d->name, mtype, d->code);
23432 /* Internal method for ix86_init_builtins. */
23435 ix86_init_builtins_va_builtins_abi (void)
23437 tree ms_va_ref, sysv_va_ref;
23438 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23439 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23440 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23441 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23445 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23446 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23447 ms_va_ref = build_reference_type (ms_va_list_type_node);
23449 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23452 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23453 fnvoid_va_start_ms =
23454 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23455 fnvoid_va_end_sysv =
23456 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23457 fnvoid_va_start_sysv =
23458 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23460 fnvoid_va_copy_ms =
23461 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23463 fnvoid_va_copy_sysv =
23464 build_function_type_list (void_type_node, sysv_va_ref,
23465 sysv_va_ref, NULL_TREE);
23467 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23468 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23469 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23470 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23471 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23472 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23473 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23474 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23475 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23476 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23477 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23478 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23482 ix86_init_builtins (void)
23484 tree float128_type_node = make_node (REAL_TYPE);
23487 /* The __float80 type. */
23488 if (TYPE_MODE (long_double_type_node) == XFmode)
23489 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23493 /* The __float80 type. */
23494 tree float80_type_node = make_node (REAL_TYPE);
23496 TYPE_PRECISION (float80_type_node) = 80;
23497 layout_type (float80_type_node);
23498 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23502 /* The __float128 type. */
23503 TYPE_PRECISION (float128_type_node) = 128;
23504 layout_type (float128_type_node);
23505 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23508 /* TFmode support builtins. */
23509 ftype = build_function_type (float128_type_node, void_list_node);
23510 decl = add_builtin_function ("__builtin_infq", ftype,
23511 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23513 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23515 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23516 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23518 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23520 /* We will expand them to normal call if SSE2 isn't available since
23521 they are used by libgcc. */
23522 ftype = build_function_type_list (float128_type_node,
23523 float128_type_node,
23525 decl = add_builtin_function ("__builtin_fabsq", ftype,
23526 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23527 "__fabstf2", NULL_TREE);
23528 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23529 TREE_READONLY (decl) = 1;
23531 ftype = build_function_type_list (float128_type_node,
23532 float128_type_node,
23533 float128_type_node,
23535 decl = add_builtin_function ("__builtin_copysignq", ftype,
23536 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23537 "__copysigntf3", NULL_TREE);
23538 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23539 TREE_READONLY (decl) = 1;
23541 ix86_init_mmx_sse_builtins ();
23543 ix86_init_builtins_va_builtins_abi ();
23546 /* Errors in the source file can cause expand_expr to return const0_rtx
23547 where we expect a vector. To avoid crashing, use one of the vector
23548 clear instructions. */
23550 safe_vector_operand (rtx x, enum machine_mode mode)
23552 if (x == const0_rtx)
23553 x = CONST0_RTX (mode);
23557 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23560 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23563 tree arg0 = CALL_EXPR_ARG (exp, 0);
23564 tree arg1 = CALL_EXPR_ARG (exp, 1);
23565 rtx op0 = expand_normal (arg0);
23566 rtx op1 = expand_normal (arg1);
23567 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23568 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23569 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23571 if (VECTOR_MODE_P (mode0))
23572 op0 = safe_vector_operand (op0, mode0);
23573 if (VECTOR_MODE_P (mode1))
23574 op1 = safe_vector_operand (op1, mode1);
23576 if (optimize || !target
23577 || GET_MODE (target) != tmode
23578 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23579 target = gen_reg_rtx (tmode);
23581 if (GET_MODE (op1) == SImode && mode1 == TImode)
23583 rtx x = gen_reg_rtx (V4SImode);
23584 emit_insn (gen_sse2_loadd (x, op1));
23585 op1 = gen_lowpart (TImode, x);
23588 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23589 op0 = copy_to_mode_reg (mode0, op0);
23590 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23591 op1 = copy_to_mode_reg (mode1, op1);
23593 pat = GEN_FCN (icode) (target, op0, op1);
23602 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23605 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23606 enum multi_arg_type m_type,
23607 enum insn_code sub_code)
23612 bool comparison_p = false;
23614 bool last_arg_constant = false;
23615 int num_memory = 0;
23618 enum machine_mode mode;
23621 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23625 case MULTI_ARG_3_SF:
23626 case MULTI_ARG_3_DF:
23627 case MULTI_ARG_3_DI:
23628 case MULTI_ARG_3_SI:
23629 case MULTI_ARG_3_SI_DI:
23630 case MULTI_ARG_3_HI:
23631 case MULTI_ARG_3_HI_SI:
23632 case MULTI_ARG_3_QI:
23633 case MULTI_ARG_3_PERMPS:
23634 case MULTI_ARG_3_PERMPD:
23638 case MULTI_ARG_2_SF:
23639 case MULTI_ARG_2_DF:
23640 case MULTI_ARG_2_DI:
23641 case MULTI_ARG_2_SI:
23642 case MULTI_ARG_2_HI:
23643 case MULTI_ARG_2_QI:
23647 case MULTI_ARG_2_DI_IMM:
23648 case MULTI_ARG_2_SI_IMM:
23649 case MULTI_ARG_2_HI_IMM:
23650 case MULTI_ARG_2_QI_IMM:
23652 last_arg_constant = true;
23655 case MULTI_ARG_1_SF:
23656 case MULTI_ARG_1_DF:
23657 case MULTI_ARG_1_DI:
23658 case MULTI_ARG_1_SI:
23659 case MULTI_ARG_1_HI:
23660 case MULTI_ARG_1_QI:
23661 case MULTI_ARG_1_SI_DI:
23662 case MULTI_ARG_1_HI_DI:
23663 case MULTI_ARG_1_HI_SI:
23664 case MULTI_ARG_1_QI_DI:
23665 case MULTI_ARG_1_QI_SI:
23666 case MULTI_ARG_1_QI_HI:
23667 case MULTI_ARG_1_PH2PS:
23668 case MULTI_ARG_1_PS2PH:
23672 case MULTI_ARG_2_SF_CMP:
23673 case MULTI_ARG_2_DF_CMP:
23674 case MULTI_ARG_2_DI_CMP:
23675 case MULTI_ARG_2_SI_CMP:
23676 case MULTI_ARG_2_HI_CMP:
23677 case MULTI_ARG_2_QI_CMP:
23679 comparison_p = true;
23682 case MULTI_ARG_2_SF_TF:
23683 case MULTI_ARG_2_DF_TF:
23684 case MULTI_ARG_2_DI_TF:
23685 case MULTI_ARG_2_SI_TF:
23686 case MULTI_ARG_2_HI_TF:
23687 case MULTI_ARG_2_QI_TF:
23692 case MULTI_ARG_UNKNOWN:
23694 gcc_unreachable ();
23697 if (optimize || !target
23698 || GET_MODE (target) != tmode
23699 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23700 target = gen_reg_rtx (tmode);
23702 gcc_assert (nargs <= 4);
23704 for (i = 0; i < nargs; i++)
23706 tree arg = CALL_EXPR_ARG (exp, i);
23707 rtx op = expand_normal (arg);
23708 int adjust = (comparison_p) ? 1 : 0;
23709 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23711 if (last_arg_constant && i == nargs-1)
23713 if (GET_CODE (op) != CONST_INT)
23715 error ("last argument must be an immediate");
23716 return gen_reg_rtx (tmode);
23721 if (VECTOR_MODE_P (mode))
23722 op = safe_vector_operand (op, mode);
23724 /* If we aren't optimizing, only allow one memory operand to be
23726 if (memory_operand (op, mode))
23729 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23732 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23734 op = force_reg (mode, op);
23738 args[i].mode = mode;
23744 pat = GEN_FCN (icode) (target, args[0].op);
23749 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23750 GEN_INT ((int)sub_code));
23751 else if (! comparison_p)
23752 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23755 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23759 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23764 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23768 gcc_unreachable ();
23778 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23779 insns with vec_merge. */
23782 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23786 tree arg0 = CALL_EXPR_ARG (exp, 0);
23787 rtx op1, op0 = expand_normal (arg0);
23788 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23789 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23791 if (optimize || !target
23792 || GET_MODE (target) != tmode
23793 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23794 target = gen_reg_rtx (tmode);
23796 if (VECTOR_MODE_P (mode0))
23797 op0 = safe_vector_operand (op0, mode0);
23799 if ((optimize && !register_operand (op0, mode0))
23800 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23801 op0 = copy_to_mode_reg (mode0, op0);
23804 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23805 op1 = copy_to_mode_reg (mode0, op1);
23807 pat = GEN_FCN (icode) (target, op0, op1);
23814 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23817 ix86_expand_sse_compare (const struct builtin_description *d,
23818 tree exp, rtx target, bool swap)
23821 tree arg0 = CALL_EXPR_ARG (exp, 0);
23822 tree arg1 = CALL_EXPR_ARG (exp, 1);
23823 rtx op0 = expand_normal (arg0);
23824 rtx op1 = expand_normal (arg1);
23826 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23827 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23828 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23829 enum rtx_code comparison = d->comparison;
23831 if (VECTOR_MODE_P (mode0))
23832 op0 = safe_vector_operand (op0, mode0);
23833 if (VECTOR_MODE_P (mode1))
23834 op1 = safe_vector_operand (op1, mode1);
23836 /* Swap operands if we have a comparison that isn't available in
23840 rtx tmp = gen_reg_rtx (mode1);
23841 emit_move_insn (tmp, op1);
23846 if (optimize || !target
23847 || GET_MODE (target) != tmode
23848 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23849 target = gen_reg_rtx (tmode);
23851 if ((optimize && !register_operand (op0, mode0))
23852 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23853 op0 = copy_to_mode_reg (mode0, op0);
23854 if ((optimize && !register_operand (op1, mode1))
23855 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23856 op1 = copy_to_mode_reg (mode1, op1);
23858 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23859 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23866 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23869 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23873 tree arg0 = CALL_EXPR_ARG (exp, 0);
23874 tree arg1 = CALL_EXPR_ARG (exp, 1);
23875 rtx op0 = expand_normal (arg0);
23876 rtx op1 = expand_normal (arg1);
23877 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23878 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23879 enum rtx_code comparison = d->comparison;
23881 if (VECTOR_MODE_P (mode0))
23882 op0 = safe_vector_operand (op0, mode0);
23883 if (VECTOR_MODE_P (mode1))
23884 op1 = safe_vector_operand (op1, mode1);
23886 /* Swap operands if we have a comparison that isn't available in
23888 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23895 target = gen_reg_rtx (SImode);
23896 emit_move_insn (target, const0_rtx);
23897 target = gen_rtx_SUBREG (QImode, target, 0);
23899 if ((optimize && !register_operand (op0, mode0))
23900 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23901 op0 = copy_to_mode_reg (mode0, op0);
23902 if ((optimize && !register_operand (op1, mode1))
23903 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23904 op1 = copy_to_mode_reg (mode1, op1);
23906 pat = GEN_FCN (d->icode) (op0, op1);
23910 emit_insn (gen_rtx_SET (VOIDmode,
23911 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23912 gen_rtx_fmt_ee (comparison, QImode,
23916 return SUBREG_REG (target);
23919 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23922 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23926 tree arg0 = CALL_EXPR_ARG (exp, 0);
23927 tree arg1 = CALL_EXPR_ARG (exp, 1);
23928 rtx op0 = expand_normal (arg0);
23929 rtx op1 = expand_normal (arg1);
23930 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23931 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23932 enum rtx_code comparison = d->comparison;
23934 if (VECTOR_MODE_P (mode0))
23935 op0 = safe_vector_operand (op0, mode0);
23936 if (VECTOR_MODE_P (mode1))
23937 op1 = safe_vector_operand (op1, mode1);
23939 target = gen_reg_rtx (SImode);
23940 emit_move_insn (target, const0_rtx);
23941 target = gen_rtx_SUBREG (QImode, target, 0);
23943 if ((optimize && !register_operand (op0, mode0))
23944 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23945 op0 = copy_to_mode_reg (mode0, op0);
23946 if ((optimize && !register_operand (op1, mode1))
23947 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23948 op1 = copy_to_mode_reg (mode1, op1);
23950 pat = GEN_FCN (d->icode) (op0, op1);
23954 emit_insn (gen_rtx_SET (VOIDmode,
23955 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23956 gen_rtx_fmt_ee (comparison, QImode,
23960 return SUBREG_REG (target);
23963 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23966 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23967 tree exp, rtx target)
23970 tree arg0 = CALL_EXPR_ARG (exp, 0);
23971 tree arg1 = CALL_EXPR_ARG (exp, 1);
23972 tree arg2 = CALL_EXPR_ARG (exp, 2);
23973 tree arg3 = CALL_EXPR_ARG (exp, 3);
23974 tree arg4 = CALL_EXPR_ARG (exp, 4);
23975 rtx scratch0, scratch1;
23976 rtx op0 = expand_normal (arg0);
23977 rtx op1 = expand_normal (arg1);
23978 rtx op2 = expand_normal (arg2);
23979 rtx op3 = expand_normal (arg3);
23980 rtx op4 = expand_normal (arg4);
23981 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23983 tmode0 = insn_data[d->icode].operand[0].mode;
23984 tmode1 = insn_data[d->icode].operand[1].mode;
23985 modev2 = insn_data[d->icode].operand[2].mode;
23986 modei3 = insn_data[d->icode].operand[3].mode;
23987 modev4 = insn_data[d->icode].operand[4].mode;
23988 modei5 = insn_data[d->icode].operand[5].mode;
23989 modeimm = insn_data[d->icode].operand[6].mode;
23991 if (VECTOR_MODE_P (modev2))
23992 op0 = safe_vector_operand (op0, modev2);
23993 if (VECTOR_MODE_P (modev4))
23994 op2 = safe_vector_operand (op2, modev4);
23996 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23997 op0 = copy_to_mode_reg (modev2, op0);
23998 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23999 op1 = copy_to_mode_reg (modei3, op1);
24000 if ((optimize && !register_operand (op2, modev4))
24001 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24002 op2 = copy_to_mode_reg (modev4, op2);
24003 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24004 op3 = copy_to_mode_reg (modei5, op3);
24006 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24008 error ("the fifth argument must be a 8-bit immediate");
24012 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24014 if (optimize || !target
24015 || GET_MODE (target) != tmode0
24016 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24017 target = gen_reg_rtx (tmode0);
24019 scratch1 = gen_reg_rtx (tmode1);
24021 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24023 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24025 if (optimize || !target
24026 || GET_MODE (target) != tmode1
24027 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24028 target = gen_reg_rtx (tmode1);
24030 scratch0 = gen_reg_rtx (tmode0);
24032 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24036 gcc_assert (d->flag);
24038 scratch0 = gen_reg_rtx (tmode0);
24039 scratch1 = gen_reg_rtx (tmode1);
24041 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24051 target = gen_reg_rtx (SImode);
24052 emit_move_insn (target, const0_rtx);
24053 target = gen_rtx_SUBREG (QImode, target, 0);
24056 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24057 gen_rtx_fmt_ee (EQ, QImode,
24058 gen_rtx_REG ((enum machine_mode) d->flag,
24061 return SUBREG_REG (target);
24068 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24071 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24072 tree exp, rtx target)
24075 tree arg0 = CALL_EXPR_ARG (exp, 0);
24076 tree arg1 = CALL_EXPR_ARG (exp, 1);
24077 tree arg2 = CALL_EXPR_ARG (exp, 2);
24078 rtx scratch0, scratch1;
24079 rtx op0 = expand_normal (arg0);
24080 rtx op1 = expand_normal (arg1);
24081 rtx op2 = expand_normal (arg2);
24082 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24084 tmode0 = insn_data[d->icode].operand[0].mode;
24085 tmode1 = insn_data[d->icode].operand[1].mode;
24086 modev2 = insn_data[d->icode].operand[2].mode;
24087 modev3 = insn_data[d->icode].operand[3].mode;
24088 modeimm = insn_data[d->icode].operand[4].mode;
24090 if (VECTOR_MODE_P (modev2))
24091 op0 = safe_vector_operand (op0, modev2);
24092 if (VECTOR_MODE_P (modev3))
24093 op1 = safe_vector_operand (op1, modev3);
24095 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24096 op0 = copy_to_mode_reg (modev2, op0);
24097 if ((optimize && !register_operand (op1, modev3))
24098 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24099 op1 = copy_to_mode_reg (modev3, op1);
24101 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24103 error ("the third argument must be a 8-bit immediate");
24107 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24109 if (optimize || !target
24110 || GET_MODE (target) != tmode0
24111 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24112 target = gen_reg_rtx (tmode0);
24114 scratch1 = gen_reg_rtx (tmode1);
24116 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24118 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24120 if (optimize || !target
24121 || GET_MODE (target) != tmode1
24122 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24123 target = gen_reg_rtx (tmode1);
24125 scratch0 = gen_reg_rtx (tmode0);
24127 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24131 gcc_assert (d->flag);
24133 scratch0 = gen_reg_rtx (tmode0);
24134 scratch1 = gen_reg_rtx (tmode1);
24136 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24146 target = gen_reg_rtx (SImode);
24147 emit_move_insn (target, const0_rtx);
24148 target = gen_rtx_SUBREG (QImode, target, 0);
24151 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24152 gen_rtx_fmt_ee (EQ, QImode,
24153 gen_rtx_REG ((enum machine_mode) d->flag,
24156 return SUBREG_REG (target);
24162 /* Subroutine of ix86_expand_builtin to take care of insns with
24163 variable number of operands. */
24166 ix86_expand_args_builtin (const struct builtin_description *d,
24167 tree exp, rtx target)
24169 rtx pat, real_target;
24170 unsigned int i, nargs;
24171 unsigned int nargs_constant = 0;
24172 int num_memory = 0;
24176 enum machine_mode mode;
24178 bool last_arg_count = false;
24179 enum insn_code icode = d->icode;
24180 const struct insn_data *insn_p = &insn_data[icode];
24181 enum machine_mode tmode = insn_p->operand[0].mode;
24182 enum machine_mode rmode = VOIDmode;
24184 enum rtx_code comparison = d->comparison;
24186 switch ((enum ix86_builtin_type) d->flag)
24188 case INT_FTYPE_V8SF_V8SF_PTEST:
24189 case INT_FTYPE_V4DI_V4DI_PTEST:
24190 case INT_FTYPE_V4DF_V4DF_PTEST:
24191 case INT_FTYPE_V4SF_V4SF_PTEST:
24192 case INT_FTYPE_V2DI_V2DI_PTEST:
24193 case INT_FTYPE_V2DF_V2DF_PTEST:
24194 return ix86_expand_sse_ptest (d, exp, target);
24195 case FLOAT128_FTYPE_FLOAT128:
24196 case FLOAT_FTYPE_FLOAT:
24197 case INT64_FTYPE_V4SF:
24198 case INT64_FTYPE_V2DF:
24199 case INT_FTYPE_V16QI:
24200 case INT_FTYPE_V8QI:
24201 case INT_FTYPE_V8SF:
24202 case INT_FTYPE_V4DF:
24203 case INT_FTYPE_V4SF:
24204 case INT_FTYPE_V2DF:
24205 case V16QI_FTYPE_V16QI:
24206 case V8SI_FTYPE_V8SF:
24207 case V8SI_FTYPE_V4SI:
24208 case V8HI_FTYPE_V8HI:
24209 case V8HI_FTYPE_V16QI:
24210 case V8QI_FTYPE_V8QI:
24211 case V8SF_FTYPE_V8SF:
24212 case V8SF_FTYPE_V8SI:
24213 case V8SF_FTYPE_V4SF:
24214 case V4SI_FTYPE_V4SI:
24215 case V4SI_FTYPE_V16QI:
24216 case V4SI_FTYPE_V4SF:
24217 case V4SI_FTYPE_V8SI:
24218 case V4SI_FTYPE_V8HI:
24219 case V4SI_FTYPE_V4DF:
24220 case V4SI_FTYPE_V2DF:
24221 case V4HI_FTYPE_V4HI:
24222 case V4DF_FTYPE_V4DF:
24223 case V4DF_FTYPE_V4SI:
24224 case V4DF_FTYPE_V4SF:
24225 case V4DF_FTYPE_V2DF:
24226 case V4SF_FTYPE_V4SF:
24227 case V4SF_FTYPE_V4SI:
24228 case V4SF_FTYPE_V8SF:
24229 case V4SF_FTYPE_V4DF:
24230 case V4SF_FTYPE_V2DF:
24231 case V2DI_FTYPE_V2DI:
24232 case V2DI_FTYPE_V16QI:
24233 case V2DI_FTYPE_V8HI:
24234 case V2DI_FTYPE_V4SI:
24235 case V2DF_FTYPE_V2DF:
24236 case V2DF_FTYPE_V4SI:
24237 case V2DF_FTYPE_V4DF:
24238 case V2DF_FTYPE_V4SF:
24239 case V2DF_FTYPE_V2SI:
24240 case V2SI_FTYPE_V2SI:
24241 case V2SI_FTYPE_V4SF:
24242 case V2SI_FTYPE_V2SF:
24243 case V2SI_FTYPE_V2DF:
24244 case V2SF_FTYPE_V2SF:
24245 case V2SF_FTYPE_V2SI:
24248 case V4SF_FTYPE_V4SF_VEC_MERGE:
24249 case V2DF_FTYPE_V2DF_VEC_MERGE:
24250 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24251 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24252 case V16QI_FTYPE_V16QI_V16QI:
24253 case V16QI_FTYPE_V8HI_V8HI:
24254 case V8QI_FTYPE_V8QI_V8QI:
24255 case V8QI_FTYPE_V4HI_V4HI:
24256 case V8HI_FTYPE_V8HI_V8HI:
24257 case V8HI_FTYPE_V16QI_V16QI:
24258 case V8HI_FTYPE_V4SI_V4SI:
24259 case V8SF_FTYPE_V8SF_V8SF:
24260 case V8SF_FTYPE_V8SF_V8SI:
24261 case V4SI_FTYPE_V4SI_V4SI:
24262 case V4SI_FTYPE_V8HI_V8HI:
24263 case V4SI_FTYPE_V4SF_V4SF:
24264 case V4SI_FTYPE_V2DF_V2DF:
24265 case V4HI_FTYPE_V4HI_V4HI:
24266 case V4HI_FTYPE_V8QI_V8QI:
24267 case V4HI_FTYPE_V2SI_V2SI:
24268 case V4DF_FTYPE_V4DF_V4DF:
24269 case V4DF_FTYPE_V4DF_V4DI:
24270 case V4SF_FTYPE_V4SF_V4SF:
24271 case V4SF_FTYPE_V4SF_V4SI:
24272 case V4SF_FTYPE_V4SF_V2SI:
24273 case V4SF_FTYPE_V4SF_V2DF:
24274 case V4SF_FTYPE_V4SF_DI:
24275 case V4SF_FTYPE_V4SF_SI:
24276 case V2DI_FTYPE_V2DI_V2DI:
24277 case V2DI_FTYPE_V16QI_V16QI:
24278 case V2DI_FTYPE_V4SI_V4SI:
24279 case V2DI_FTYPE_V2DI_V16QI:
24280 case V2DI_FTYPE_V2DF_V2DF:
24281 case V2SI_FTYPE_V2SI_V2SI:
24282 case V2SI_FTYPE_V4HI_V4HI:
24283 case V2SI_FTYPE_V2SF_V2SF:
24284 case V2DF_FTYPE_V2DF_V2DF:
24285 case V2DF_FTYPE_V2DF_V4SF:
24286 case V2DF_FTYPE_V2DF_V2DI:
24287 case V2DF_FTYPE_V2DF_DI:
24288 case V2DF_FTYPE_V2DF_SI:
24289 case V2SF_FTYPE_V2SF_V2SF:
24290 case V1DI_FTYPE_V1DI_V1DI:
24291 case V1DI_FTYPE_V8QI_V8QI:
24292 case V1DI_FTYPE_V2SI_V2SI:
24293 if (comparison == UNKNOWN)
24294 return ix86_expand_binop_builtin (icode, exp, target);
24297 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24298 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24299 gcc_assert (comparison != UNKNOWN);
24303 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24304 case V8HI_FTYPE_V8HI_SI_COUNT:
24305 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24306 case V4SI_FTYPE_V4SI_SI_COUNT:
24307 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24308 case V4HI_FTYPE_V4HI_SI_COUNT:
24309 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24310 case V2DI_FTYPE_V2DI_SI_COUNT:
24311 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24312 case V2SI_FTYPE_V2SI_SI_COUNT:
24313 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24314 case V1DI_FTYPE_V1DI_SI_COUNT:
24316 last_arg_count = true;
24318 case UINT64_FTYPE_UINT64_UINT64:
24319 case UINT_FTYPE_UINT_UINT:
24320 case UINT_FTYPE_UINT_USHORT:
24321 case UINT_FTYPE_UINT_UCHAR:
24324 case V2DI2TI_FTYPE_V2DI_INT:
24327 nargs_constant = 1;
24329 case V8HI_FTYPE_V8HI_INT:
24330 case V8SF_FTYPE_V8SF_INT:
24331 case V4SI_FTYPE_V4SI_INT:
24332 case V4SI_FTYPE_V8SI_INT:
24333 case V4HI_FTYPE_V4HI_INT:
24334 case V4DF_FTYPE_V4DF_INT:
24335 case V4SF_FTYPE_V4SF_INT:
24336 case V4SF_FTYPE_V8SF_INT:
24337 case V2DI_FTYPE_V2DI_INT:
24338 case V2DF_FTYPE_V2DF_INT:
24339 case V2DF_FTYPE_V4DF_INT:
24341 nargs_constant = 1;
24343 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24344 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24345 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24346 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24347 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24350 case V16QI_FTYPE_V16QI_V16QI_INT:
24351 case V8HI_FTYPE_V8HI_V8HI_INT:
24352 case V8SI_FTYPE_V8SI_V8SI_INT:
24353 case V8SI_FTYPE_V8SI_V4SI_INT:
24354 case V8SF_FTYPE_V8SF_V8SF_INT:
24355 case V8SF_FTYPE_V8SF_V4SF_INT:
24356 case V4SI_FTYPE_V4SI_V4SI_INT:
24357 case V4DF_FTYPE_V4DF_V4DF_INT:
24358 case V4DF_FTYPE_V4DF_V2DF_INT:
24359 case V4SF_FTYPE_V4SF_V4SF_INT:
24360 case V2DI_FTYPE_V2DI_V2DI_INT:
24361 case V2DF_FTYPE_V2DF_V2DF_INT:
24363 nargs_constant = 1;
24365 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24368 nargs_constant = 1;
24370 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24373 nargs_constant = 1;
24375 case V2DI_FTYPE_V2DI_UINT_UINT:
24377 nargs_constant = 2;
24379 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24381 nargs_constant = 2;
24384 gcc_unreachable ();
24387 gcc_assert (nargs <= ARRAY_SIZE (args));
24389 if (comparison != UNKNOWN)
24391 gcc_assert (nargs == 2);
24392 return ix86_expand_sse_compare (d, exp, target, swap);
24395 if (rmode == VOIDmode || rmode == tmode)
24399 || GET_MODE (target) != tmode
24400 || ! (*insn_p->operand[0].predicate) (target, tmode))
24401 target = gen_reg_rtx (tmode);
24402 real_target = target;
24406 target = gen_reg_rtx (rmode);
24407 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24410 for (i = 0; i < nargs; i++)
24412 tree arg = CALL_EXPR_ARG (exp, i);
24413 rtx op = expand_normal (arg);
24414 enum machine_mode mode = insn_p->operand[i + 1].mode;
24415 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24417 if (last_arg_count && (i + 1) == nargs)
24419 /* SIMD shift insns take either an 8-bit immediate or
24420 register as count. But builtin functions take int as
24421 count. If count doesn't match, we put it in register. */
24424 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24425 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24426 op = copy_to_reg (op);
24429 else if ((nargs - i) <= nargs_constant)
24434 case CODE_FOR_sse4_1_roundpd:
24435 case CODE_FOR_sse4_1_roundps:
24436 case CODE_FOR_sse4_1_roundsd:
24437 case CODE_FOR_sse4_1_roundss:
24438 case CODE_FOR_sse4_1_blendps:
24439 case CODE_FOR_avx_blendpd256:
24440 case CODE_FOR_avx_vpermilv4df:
24441 case CODE_FOR_avx_roundpd256:
24442 case CODE_FOR_avx_roundps256:
24443 error ("the last argument must be a 4-bit immediate");
24446 case CODE_FOR_sse4_1_blendpd:
24447 case CODE_FOR_avx_vpermilv2df:
24448 error ("the last argument must be a 2-bit immediate");
24451 case CODE_FOR_avx_vextractf128v4df:
24452 case CODE_FOR_avx_vextractf128v8sf:
24453 case CODE_FOR_avx_vextractf128v8si:
24454 case CODE_FOR_avx_vinsertf128v4df:
24455 case CODE_FOR_avx_vinsertf128v8sf:
24456 case CODE_FOR_avx_vinsertf128v8si:
24457 error ("the last argument must be a 1-bit immediate");
24460 case CODE_FOR_avx_cmpsdv2df3:
24461 case CODE_FOR_avx_cmpssv4sf3:
24462 case CODE_FOR_avx_cmppdv2df3:
24463 case CODE_FOR_avx_cmppsv4sf3:
24464 case CODE_FOR_avx_cmppdv4df3:
24465 case CODE_FOR_avx_cmppsv8sf3:
24466 error ("the last argument must be a 5-bit immediate");
24470 switch (nargs_constant)
24473 if ((nargs - i) == nargs_constant)
24475 error ("the next to last argument must be an 8-bit immediate");
24479 error ("the last argument must be an 8-bit immediate");
24482 gcc_unreachable ();
24489 if (VECTOR_MODE_P (mode))
24490 op = safe_vector_operand (op, mode);
24492 /* If we aren't optimizing, only allow one memory operand to
24494 if (memory_operand (op, mode))
24497 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24499 if (optimize || !match || num_memory > 1)
24500 op = copy_to_mode_reg (mode, op);
24504 op = copy_to_reg (op);
24505 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24510 args[i].mode = mode;
24516 pat = GEN_FCN (icode) (real_target, args[0].op);
24519 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24522 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24526 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24527 args[2].op, args[3].op);
24530 gcc_unreachable ();
24540 /* Subroutine of ix86_expand_builtin to take care of special insns
24541 with variable number of operands. */
24544 ix86_expand_special_args_builtin (const struct builtin_description *d,
24545 tree exp, rtx target)
24549 unsigned int i, nargs, arg_adjust, memory;
24553 enum machine_mode mode;
24555 enum insn_code icode = d->icode;
24556 bool last_arg_constant = false;
24557 const struct insn_data *insn_p = &insn_data[icode];
24558 enum machine_mode tmode = insn_p->operand[0].mode;
24559 enum { load, store } klass;
24561 switch ((enum ix86_special_builtin_type) d->flag)
24563 case VOID_FTYPE_VOID:
24564 emit_insn (GEN_FCN (icode) (target));
24566 case V2DI_FTYPE_PV2DI:
24567 case V32QI_FTYPE_PCCHAR:
24568 case V16QI_FTYPE_PCCHAR:
24569 case V8SF_FTYPE_PCV4SF:
24570 case V8SF_FTYPE_PCFLOAT:
24571 case V4SF_FTYPE_PCFLOAT:
24572 case V4DF_FTYPE_PCV2DF:
24573 case V4DF_FTYPE_PCDOUBLE:
24574 case V2DF_FTYPE_PCDOUBLE:
24579 case VOID_FTYPE_PV2SF_V4SF:
24580 case VOID_FTYPE_PV4DI_V4DI:
24581 case VOID_FTYPE_PV2DI_V2DI:
24582 case VOID_FTYPE_PCHAR_V32QI:
24583 case VOID_FTYPE_PCHAR_V16QI:
24584 case VOID_FTYPE_PFLOAT_V8SF:
24585 case VOID_FTYPE_PFLOAT_V4SF:
24586 case VOID_FTYPE_PDOUBLE_V4DF:
24587 case VOID_FTYPE_PDOUBLE_V2DF:
24588 case VOID_FTYPE_PDI_DI:
24589 case VOID_FTYPE_PINT_INT:
24592 /* Reserve memory operand for target. */
24593 memory = ARRAY_SIZE (args);
24595 case V4SF_FTYPE_V4SF_PCV2SF:
24596 case V2DF_FTYPE_V2DF_PCDOUBLE:
24601 case V8SF_FTYPE_PCV8SF_V8SF:
24602 case V4DF_FTYPE_PCV4DF_V4DF:
24603 case V4SF_FTYPE_PCV4SF_V4SF:
24604 case V2DF_FTYPE_PCV2DF_V2DF:
24609 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24610 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24611 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24612 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24615 /* Reserve memory operand for target. */
24616 memory = ARRAY_SIZE (args);
24619 gcc_unreachable ();
24622 gcc_assert (nargs <= ARRAY_SIZE (args));
24624 if (klass == store)
24626 arg = CALL_EXPR_ARG (exp, 0);
24627 op = expand_normal (arg);
24628 gcc_assert (target == 0);
24629 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24637 || GET_MODE (target) != tmode
24638 || ! (*insn_p->operand[0].predicate) (target, tmode))
24639 target = gen_reg_rtx (tmode);
24642 for (i = 0; i < nargs; i++)
24644 enum machine_mode mode = insn_p->operand[i + 1].mode;
24647 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24648 op = expand_normal (arg);
24649 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24651 if (last_arg_constant && (i + 1) == nargs)
24657 error ("the last argument must be an 8-bit immediate");
24665 /* This must be the memory operand. */
24666 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24667 gcc_assert (GET_MODE (op) == mode
24668 || GET_MODE (op) == VOIDmode);
24672 /* This must be register. */
24673 if (VECTOR_MODE_P (mode))
24674 op = safe_vector_operand (op, mode);
24676 gcc_assert (GET_MODE (op) == mode
24677 || GET_MODE (op) == VOIDmode);
24678 op = copy_to_mode_reg (mode, op);
24683 args[i].mode = mode;
24689 pat = GEN_FCN (icode) (target, args[0].op);
24692 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24695 gcc_unreachable ();
24701 return klass == store ? 0 : target;
24704 /* Return the integer constant in ARG. Constrain it to be in the range
24705 of the subparts of VEC_TYPE; issue an error if not. */
24708 get_element_number (tree vec_type, tree arg)
24710 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24712 if (!host_integerp (arg, 1)
24713 || (elt = tree_low_cst (arg, 1), elt > max))
24715 error ("selector must be an integer constant in the range 0..%wi", max);
24722 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24723 ix86_expand_vector_init. We DO have language-level syntax for this, in
24724 the form of (type){ init-list }. Except that since we can't place emms
24725 instructions from inside the compiler, we can't allow the use of MMX
24726 registers unless the user explicitly asks for it. So we do *not* define
24727 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24728 we have builtins invoked by mmintrin.h that gives us license to emit
24729 these sorts of instructions. */
24732 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24734 enum machine_mode tmode = TYPE_MODE (type);
24735 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24736 int i, n_elt = GET_MODE_NUNITS (tmode);
24737 rtvec v = rtvec_alloc (n_elt);
24739 gcc_assert (VECTOR_MODE_P (tmode));
24740 gcc_assert (call_expr_nargs (exp) == n_elt);
24742 for (i = 0; i < n_elt; ++i)
24744 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24745 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24748 if (!target || !register_operand (target, tmode))
24749 target = gen_reg_rtx (tmode);
24751 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24755 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24756 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24757 had a language-level syntax for referencing vector elements. */
24760 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24762 enum machine_mode tmode, mode0;
24767 arg0 = CALL_EXPR_ARG (exp, 0);
24768 arg1 = CALL_EXPR_ARG (exp, 1);
24770 op0 = expand_normal (arg0);
24771 elt = get_element_number (TREE_TYPE (arg0), arg1);
24773 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24774 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24775 gcc_assert (VECTOR_MODE_P (mode0));
24777 op0 = force_reg (mode0, op0);
24779 if (optimize || !target || !register_operand (target, tmode))
24780 target = gen_reg_rtx (tmode);
24782 ix86_expand_vector_extract (true, target, op0, elt);
24787 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24788 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24789 a language-level syntax for referencing vector elements. */
24792 ix86_expand_vec_set_builtin (tree exp)
24794 enum machine_mode tmode, mode1;
24795 tree arg0, arg1, arg2;
24797 rtx op0, op1, target;
24799 arg0 = CALL_EXPR_ARG (exp, 0);
24800 arg1 = CALL_EXPR_ARG (exp, 1);
24801 arg2 = CALL_EXPR_ARG (exp, 2);
24803 tmode = TYPE_MODE (TREE_TYPE (arg0));
24804 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24805 gcc_assert (VECTOR_MODE_P (tmode));
24807 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24808 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24809 elt = get_element_number (TREE_TYPE (arg0), arg2);
24811 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24812 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24814 op0 = force_reg (tmode, op0);
24815 op1 = force_reg (mode1, op1);
24817 /* OP0 is the source of these builtin functions and shouldn't be
24818 modified. Create a copy, use it and return it as target. */
24819 target = gen_reg_rtx (tmode);
24820 emit_move_insn (target, op0);
24821 ix86_expand_vector_set (true, target, op1, elt);
24826 /* Expand an expression EXP that calls a built-in function,
24827 with result going to TARGET if that's convenient
24828 (and in mode MODE if that's convenient).
24829 SUBTARGET may be used as the target for computing one of EXP's operands.
24830 IGNORE is nonzero if the value is to be ignored. */
24833 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24834 enum machine_mode mode ATTRIBUTE_UNUSED,
24835 int ignore ATTRIBUTE_UNUSED)
24837 const struct builtin_description *d;
24839 enum insn_code icode;
24840 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24841 tree arg0, arg1, arg2;
24842 rtx op0, op1, op2, pat;
24843 enum machine_mode mode0, mode1, mode2;
24844 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24846 /* Determine whether the builtin function is available under the current ISA.
24847 Originally the builtin was not created if it wasn't applicable to the
24848 current ISA based on the command line switches. With function specific
24849 options, we need to check in the context of the function making the call
24850 whether it is supported. */
24851 if (ix86_builtins_isa[fcode].isa
24852 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24854 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24855 NULL, NULL, false);
24858 error ("%qE needs unknown isa option", fndecl);
24861 gcc_assert (opts != NULL);
24862 error ("%qE needs isa option %s", fndecl, opts);
24870 case IX86_BUILTIN_MASKMOVQ:
24871 case IX86_BUILTIN_MASKMOVDQU:
24872 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24873 ? CODE_FOR_mmx_maskmovq
24874 : CODE_FOR_sse2_maskmovdqu);
24875 /* Note the arg order is different from the operand order. */
24876 arg1 = CALL_EXPR_ARG (exp, 0);
24877 arg2 = CALL_EXPR_ARG (exp, 1);
24878 arg0 = CALL_EXPR_ARG (exp, 2);
24879 op0 = expand_normal (arg0);
24880 op1 = expand_normal (arg1);
24881 op2 = expand_normal (arg2);
24882 mode0 = insn_data[icode].operand[0].mode;
24883 mode1 = insn_data[icode].operand[1].mode;
24884 mode2 = insn_data[icode].operand[2].mode;
24886 op0 = force_reg (Pmode, op0);
24887 op0 = gen_rtx_MEM (mode1, op0);
24889 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24890 op0 = copy_to_mode_reg (mode0, op0);
24891 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24892 op1 = copy_to_mode_reg (mode1, op1);
24893 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24894 op2 = copy_to_mode_reg (mode2, op2);
24895 pat = GEN_FCN (icode) (op0, op1, op2);
24901 case IX86_BUILTIN_LDMXCSR:
24902 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24903 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24904 emit_move_insn (target, op0);
24905 emit_insn (gen_sse_ldmxcsr (target));
24908 case IX86_BUILTIN_STMXCSR:
24909 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24910 emit_insn (gen_sse_stmxcsr (target));
24911 return copy_to_mode_reg (SImode, target);
24913 case IX86_BUILTIN_CLFLUSH:
24914 arg0 = CALL_EXPR_ARG (exp, 0);
24915 op0 = expand_normal (arg0);
24916 icode = CODE_FOR_sse2_clflush;
24917 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24918 op0 = copy_to_mode_reg (Pmode, op0);
24920 emit_insn (gen_sse2_clflush (op0));
24923 case IX86_BUILTIN_MONITOR:
24924 arg0 = CALL_EXPR_ARG (exp, 0);
24925 arg1 = CALL_EXPR_ARG (exp, 1);
24926 arg2 = CALL_EXPR_ARG (exp, 2);
24927 op0 = expand_normal (arg0);
24928 op1 = expand_normal (arg1);
24929 op2 = expand_normal (arg2);
24931 op0 = copy_to_mode_reg (Pmode, op0);
24933 op1 = copy_to_mode_reg (SImode, op1);
24935 op2 = copy_to_mode_reg (SImode, op2);
24936 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24939 case IX86_BUILTIN_MWAIT:
24940 arg0 = CALL_EXPR_ARG (exp, 0);
24941 arg1 = CALL_EXPR_ARG (exp, 1);
24942 op0 = expand_normal (arg0);
24943 op1 = expand_normal (arg1);
24945 op0 = copy_to_mode_reg (SImode, op0);
24947 op1 = copy_to_mode_reg (SImode, op1);
24948 emit_insn (gen_sse3_mwait (op0, op1));
24951 case IX86_BUILTIN_VEC_INIT_V2SI:
24952 case IX86_BUILTIN_VEC_INIT_V4HI:
24953 case IX86_BUILTIN_VEC_INIT_V8QI:
24954 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24956 case IX86_BUILTIN_VEC_EXT_V2DF:
24957 case IX86_BUILTIN_VEC_EXT_V2DI:
24958 case IX86_BUILTIN_VEC_EXT_V4SF:
24959 case IX86_BUILTIN_VEC_EXT_V4SI:
24960 case IX86_BUILTIN_VEC_EXT_V8HI:
24961 case IX86_BUILTIN_VEC_EXT_V2SI:
24962 case IX86_BUILTIN_VEC_EXT_V4HI:
24963 case IX86_BUILTIN_VEC_EXT_V16QI:
24964 return ix86_expand_vec_ext_builtin (exp, target);
24966 case IX86_BUILTIN_VEC_SET_V2DI:
24967 case IX86_BUILTIN_VEC_SET_V4SF:
24968 case IX86_BUILTIN_VEC_SET_V4SI:
24969 case IX86_BUILTIN_VEC_SET_V8HI:
24970 case IX86_BUILTIN_VEC_SET_V4HI:
24971 case IX86_BUILTIN_VEC_SET_V16QI:
24972 return ix86_expand_vec_set_builtin (exp);
24974 case IX86_BUILTIN_INFQ:
24975 case IX86_BUILTIN_HUGE_VALQ:
24977 REAL_VALUE_TYPE inf;
24981 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24983 tmp = validize_mem (force_const_mem (mode, tmp));
24986 target = gen_reg_rtx (mode);
24988 emit_move_insn (target, tmp);
24996 for (i = 0, d = bdesc_special_args;
24997 i < ARRAY_SIZE (bdesc_special_args);
24999 if (d->code == fcode)
25000 return ix86_expand_special_args_builtin (d, exp, target);
25002 for (i = 0, d = bdesc_args;
25003 i < ARRAY_SIZE (bdesc_args);
25005 if (d->code == fcode)
25008 case IX86_BUILTIN_FABSQ:
25009 case IX86_BUILTIN_COPYSIGNQ:
25011 /* Emit a normal call if SSE2 isn't available. */
25012 return expand_call (exp, target, ignore);
25014 return ix86_expand_args_builtin (d, exp, target);
25017 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25018 if (d->code == fcode)
25019 return ix86_expand_sse_comi (d, exp, target);
25021 for (i = 0, d = bdesc_pcmpestr;
25022 i < ARRAY_SIZE (bdesc_pcmpestr);
25024 if (d->code == fcode)
25025 return ix86_expand_sse_pcmpestr (d, exp, target);
25027 for (i = 0, d = bdesc_pcmpistr;
25028 i < ARRAY_SIZE (bdesc_pcmpistr);
25030 if (d->code == fcode)
25031 return ix86_expand_sse_pcmpistr (d, exp, target);
25033 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25034 if (d->code == fcode)
25035 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25036 (enum multi_arg_type)d->flag,
25039 gcc_unreachable ();
25042 /* Returns a function decl for a vectorized version of the builtin function
25043 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25044 if it is not available. */
25047 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25050 enum machine_mode in_mode, out_mode;
25053 if (TREE_CODE (type_out) != VECTOR_TYPE
25054 || TREE_CODE (type_in) != VECTOR_TYPE)
25057 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25058 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25059 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25060 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25064 case BUILT_IN_SQRT:
25065 if (out_mode == DFmode && out_n == 2
25066 && in_mode == DFmode && in_n == 2)
25067 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25070 case BUILT_IN_SQRTF:
25071 if (out_mode == SFmode && out_n == 4
25072 && in_mode == SFmode && in_n == 4)
25073 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25076 case BUILT_IN_LRINT:
25077 if (out_mode == SImode && out_n == 4
25078 && in_mode == DFmode && in_n == 2)
25079 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25082 case BUILT_IN_LRINTF:
25083 if (out_mode == SImode && out_n == 4
25084 && in_mode == SFmode && in_n == 4)
25085 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25092 /* Dispatch to a handler for a vectorization library. */
25093 if (ix86_veclib_handler)
25094 return (*ix86_veclib_handler)(fn, type_out, type_in);
25099 /* Handler for an SVML-style interface to
25100 a library with vectorized intrinsics. */
25103 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25106 tree fntype, new_fndecl, args;
25109 enum machine_mode el_mode, in_mode;
25112 /* The SVML is suitable for unsafe math only. */
25113 if (!flag_unsafe_math_optimizations)
25116 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25117 n = TYPE_VECTOR_SUBPARTS (type_out);
25118 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25119 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25120 if (el_mode != in_mode
25128 case BUILT_IN_LOG10:
25130 case BUILT_IN_TANH:
25132 case BUILT_IN_ATAN:
25133 case BUILT_IN_ATAN2:
25134 case BUILT_IN_ATANH:
25135 case BUILT_IN_CBRT:
25136 case BUILT_IN_SINH:
25138 case BUILT_IN_ASINH:
25139 case BUILT_IN_ASIN:
25140 case BUILT_IN_COSH:
25142 case BUILT_IN_ACOSH:
25143 case BUILT_IN_ACOS:
25144 if (el_mode != DFmode || n != 2)
25148 case BUILT_IN_EXPF:
25149 case BUILT_IN_LOGF:
25150 case BUILT_IN_LOG10F:
25151 case BUILT_IN_POWF:
25152 case BUILT_IN_TANHF:
25153 case BUILT_IN_TANF:
25154 case BUILT_IN_ATANF:
25155 case BUILT_IN_ATAN2F:
25156 case BUILT_IN_ATANHF:
25157 case BUILT_IN_CBRTF:
25158 case BUILT_IN_SINHF:
25159 case BUILT_IN_SINF:
25160 case BUILT_IN_ASINHF:
25161 case BUILT_IN_ASINF:
25162 case BUILT_IN_COSHF:
25163 case BUILT_IN_COSF:
25164 case BUILT_IN_ACOSHF:
25165 case BUILT_IN_ACOSF:
25166 if (el_mode != SFmode || n != 4)
25174 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25176 if (fn == BUILT_IN_LOGF)
25177 strcpy (name, "vmlsLn4");
25178 else if (fn == BUILT_IN_LOG)
25179 strcpy (name, "vmldLn2");
25182 sprintf (name, "vmls%s", bname+10);
25183 name[strlen (name)-1] = '4';
25186 sprintf (name, "vmld%s2", bname+10);
25188 /* Convert to uppercase. */
25192 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25193 args = TREE_CHAIN (args))
25197 fntype = build_function_type_list (type_out, type_in, NULL);
25199 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25201 /* Build a function declaration for the vectorized function. */
25202 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25203 TREE_PUBLIC (new_fndecl) = 1;
25204 DECL_EXTERNAL (new_fndecl) = 1;
25205 DECL_IS_NOVOPS (new_fndecl) = 1;
25206 TREE_READONLY (new_fndecl) = 1;
25211 /* Handler for an ACML-style interface to
25212 a library with vectorized intrinsics. */
25215 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25217 char name[20] = "__vr.._";
25218 tree fntype, new_fndecl, args;
25221 enum machine_mode el_mode, in_mode;
25224 /* The ACML is 64bits only and suitable for unsafe math only as
25225 it does not correctly support parts of IEEE with the required
25226 precision such as denormals. */
25228 || !flag_unsafe_math_optimizations)
25231 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25232 n = TYPE_VECTOR_SUBPARTS (type_out);
25233 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25234 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25235 if (el_mode != in_mode
25245 case BUILT_IN_LOG2:
25246 case BUILT_IN_LOG10:
25249 if (el_mode != DFmode
25254 case BUILT_IN_SINF:
25255 case BUILT_IN_COSF:
25256 case BUILT_IN_EXPF:
25257 case BUILT_IN_POWF:
25258 case BUILT_IN_LOGF:
25259 case BUILT_IN_LOG2F:
25260 case BUILT_IN_LOG10F:
25263 if (el_mode != SFmode
25272 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25273 sprintf (name + 7, "%s", bname+10);
25276 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25277 args = TREE_CHAIN (args))
25281 fntype = build_function_type_list (type_out, type_in, NULL);
25283 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25285 /* Build a function declaration for the vectorized function. */
25286 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25287 TREE_PUBLIC (new_fndecl) = 1;
25288 DECL_EXTERNAL (new_fndecl) = 1;
25289 DECL_IS_NOVOPS (new_fndecl) = 1;
25290 TREE_READONLY (new_fndecl) = 1;
25296 /* Returns a decl of a function that implements conversion of an integer vector
25297 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25298 side of the conversion.
25299 Return NULL_TREE if it is not available. */
25302 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25304 if (TREE_CODE (type) != VECTOR_TYPE)
25310 switch (TYPE_MODE (type))
25313 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25318 case FIX_TRUNC_EXPR:
25319 switch (TYPE_MODE (type))
25322 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25332 /* Returns a code for a target-specific builtin that implements
25333 reciprocal of the function, or NULL_TREE if not available. */
25336 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25337 bool sqrt ATTRIBUTE_UNUSED)
25339 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25340 && flag_finite_math_only && !flag_trapping_math
25341 && flag_unsafe_math_optimizations))
25345 /* Machine dependent builtins. */
25348 /* Vectorized version of sqrt to rsqrt conversion. */
25349 case IX86_BUILTIN_SQRTPS_NR:
25350 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25356 /* Normal builtins. */
25359 /* Sqrt to rsqrt conversion. */
25360 case BUILT_IN_SQRTF:
25361 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25368 /* Store OPERAND to the memory after reload is completed. This means
25369 that we can't easily use assign_stack_local. */
25371 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25375 gcc_assert (reload_completed);
25376 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25378 result = gen_rtx_MEM (mode,
25379 gen_rtx_PLUS (Pmode,
25381 GEN_INT (-RED_ZONE_SIZE)));
25382 emit_move_insn (result, operand);
25384 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25390 operand = gen_lowpart (DImode, operand);
25394 gen_rtx_SET (VOIDmode,
25395 gen_rtx_MEM (DImode,
25396 gen_rtx_PRE_DEC (DImode,
25397 stack_pointer_rtx)),
25401 gcc_unreachable ();
25403 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25412 split_di (&operand, 1, operands, operands + 1);
25414 gen_rtx_SET (VOIDmode,
25415 gen_rtx_MEM (SImode,
25416 gen_rtx_PRE_DEC (Pmode,
25417 stack_pointer_rtx)),
25420 gen_rtx_SET (VOIDmode,
25421 gen_rtx_MEM (SImode,
25422 gen_rtx_PRE_DEC (Pmode,
25423 stack_pointer_rtx)),
25428 /* Store HImodes as SImodes. */
25429 operand = gen_lowpart (SImode, operand);
25433 gen_rtx_SET (VOIDmode,
25434 gen_rtx_MEM (GET_MODE (operand),
25435 gen_rtx_PRE_DEC (SImode,
25436 stack_pointer_rtx)),
25440 gcc_unreachable ();
25442 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25447 /* Free operand from the memory. */
25449 ix86_free_from_memory (enum machine_mode mode)
25451 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25455 if (mode == DImode || TARGET_64BIT)
25459 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25460 to pop or add instruction if registers are available. */
25461 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25462 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25467 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25468 QImode must go into class Q_REGS.
25469 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25470 movdf to do mem-to-mem moves through integer regs. */
25472 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25474 enum machine_mode mode = GET_MODE (x);
25476 /* We're only allowed to return a subclass of CLASS. Many of the
25477 following checks fail for NO_REGS, so eliminate that early. */
25478 if (regclass == NO_REGS)
25481 /* All classes can load zeros. */
25482 if (x == CONST0_RTX (mode))
25485 /* Force constants into memory if we are loading a (nonzero) constant into
25486 an MMX or SSE register. This is because there are no MMX/SSE instructions
25487 to load from a constant. */
25489 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25492 /* Prefer SSE regs only, if we can use them for math. */
25493 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25494 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25496 /* Floating-point constants need more complex checks. */
25497 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25499 /* General regs can load everything. */
25500 if (reg_class_subset_p (regclass, GENERAL_REGS))
25503 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25504 zero above. We only want to wind up preferring 80387 registers if
25505 we plan on doing computation with them. */
25507 && standard_80387_constant_p (x))
25509 /* Limit class to non-sse. */
25510 if (regclass == FLOAT_SSE_REGS)
25512 if (regclass == FP_TOP_SSE_REGS)
25514 if (regclass == FP_SECOND_SSE_REGS)
25515 return FP_SECOND_REG;
25516 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25523 /* Generally when we see PLUS here, it's the function invariant
25524 (plus soft-fp const_int). Which can only be computed into general
25526 if (GET_CODE (x) == PLUS)
25527 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25529 /* QImode constants are easy to load, but non-constant QImode data
25530 must go into Q_REGS. */
25531 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25533 if (reg_class_subset_p (regclass, Q_REGS))
25535 if (reg_class_subset_p (Q_REGS, regclass))
25543 /* Discourage putting floating-point values in SSE registers unless
25544 SSE math is being used, and likewise for the 387 registers. */
25546 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25548 enum machine_mode mode = GET_MODE (x);
25550 /* Restrict the output reload class to the register bank that we are doing
25551 math on. If we would like not to return a subset of CLASS, reject this
25552 alternative: if reload cannot do this, it will still use its choice. */
25553 mode = GET_MODE (x);
25554 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25555 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25557 if (X87_FLOAT_MODE_P (mode))
25559 if (regclass == FP_TOP_SSE_REGS)
25561 else if (regclass == FP_SECOND_SSE_REGS)
25562 return FP_SECOND_REG;
25564 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25570 static enum reg_class
25571 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25572 enum machine_mode mode,
25573 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25575 /* QImode spills from non-QI registers require
25576 intermediate register on 32bit targets. */
25577 if (!in_p && mode == QImode && !TARGET_64BIT
25578 && (rclass == GENERAL_REGS
25579 || rclass == LEGACY_REGS
25580 || rclass == INDEX_REGS))
25589 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25590 regno = true_regnum (x);
25592 /* Return Q_REGS if the operand is in memory. */
25600 /* If we are copying between general and FP registers, we need a memory
25601 location. The same is true for SSE and MMX registers.
25603 To optimize register_move_cost performance, allow inline variant.
25605 The macro can't work reliably when one of the CLASSES is class containing
25606 registers from multiple units (SSE, MMX, integer). We avoid this by never
25607 combining those units in single alternative in the machine description.
25608 Ensure that this constraint holds to avoid unexpected surprises.
25610 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25611 enforce these sanity checks. */
25614 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25615 enum machine_mode mode, int strict)
25617 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25618 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25619 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25620 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25621 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25622 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25624 gcc_assert (!strict);
25628 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25631 /* ??? This is a lie. We do have moves between mmx/general, and for
25632 mmx/sse2. But by saying we need secondary memory we discourage the
25633 register allocator from using the mmx registers unless needed. */
25634 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25637 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25639 /* SSE1 doesn't have any direct moves from other classes. */
25643 /* If the target says that inter-unit moves are more expensive
25644 than moving through memory, then don't generate them. */
25645 if (!TARGET_INTER_UNIT_MOVES)
25648 /* Between SSE and general, we have moves no larger than word size. */
25649 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25657 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25658 enum machine_mode mode, int strict)
25660 return inline_secondary_memory_needed (class1, class2, mode, strict);
25663 /* Return true if the registers in CLASS cannot represent the change from
25664 modes FROM to TO. */
25667 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25668 enum reg_class regclass)
25673 /* x87 registers can't do subreg at all, as all values are reformatted
25674 to extended precision. */
25675 if (MAYBE_FLOAT_CLASS_P (regclass))
25678 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25680 /* Vector registers do not support QI or HImode loads. If we don't
25681 disallow a change to these modes, reload will assume it's ok to
25682 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25683 the vec_dupv4hi pattern. */
25684 if (GET_MODE_SIZE (from) < 4)
25687 /* Vector registers do not support subreg with nonzero offsets, which
25688 are otherwise valid for integer registers. Since we can't see
25689 whether we have a nonzero offset from here, prohibit all
25690 nonparadoxical subregs changing size. */
25691 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25698 /* Return the cost of moving data of mode M between a
25699 register and memory. A value of 2 is the default; this cost is
25700 relative to those in `REGISTER_MOVE_COST'.
25702 This function is used extensively by register_move_cost that is used to
25703 build tables at startup. Make it inline in this case.
25704 When IN is 2, return maximum of in and out move cost.
25706 If moving between registers and memory is more expensive than
25707 between two registers, you should define this macro to express the
25710 Model also increased moving costs of QImode registers in non
25714 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25718 if (FLOAT_CLASS_P (regclass))
25736 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25737 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25739 if (SSE_CLASS_P (regclass))
25742 switch (GET_MODE_SIZE (mode))
25757 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25758 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25760 if (MMX_CLASS_P (regclass))
25763 switch (GET_MODE_SIZE (mode))
25775 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25776 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25778 switch (GET_MODE_SIZE (mode))
25781 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25784 return ix86_cost->int_store[0];
25785 if (TARGET_PARTIAL_REG_DEPENDENCY
25786 && optimize_function_for_speed_p (cfun))
25787 cost = ix86_cost->movzbl_load;
25789 cost = ix86_cost->int_load[0];
25791 return MAX (cost, ix86_cost->int_store[0]);
25797 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25799 return ix86_cost->movzbl_load;
25801 return ix86_cost->int_store[0] + 4;
25806 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25807 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25809 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25810 if (mode == TFmode)
25813 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25815 cost = ix86_cost->int_load[2];
25817 cost = ix86_cost->int_store[2];
25818 return (cost * (((int) GET_MODE_SIZE (mode)
25819 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25824 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25826 return inline_memory_move_cost (mode, regclass, in);
25830 /* Return the cost of moving data from a register in class CLASS1 to
25831 one in class CLASS2.
25833 It is not required that the cost always equal 2 when FROM is the same as TO;
25834 on some machines it is expensive to move between registers if they are not
25835 general registers. */
25838 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25839 enum reg_class class2)
25841 /* In case we require secondary memory, compute cost of the store followed
25842 by load. In order to avoid bad register allocation choices, we need
25843 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25845 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25849 cost += inline_memory_move_cost (mode, class1, 2);
25850 cost += inline_memory_move_cost (mode, class2, 2);
25852 /* In case of copying from general_purpose_register we may emit multiple
25853 stores followed by single load causing memory size mismatch stall.
25854 Count this as arbitrarily high cost of 20. */
25855 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25858 /* In the case of FP/MMX moves, the registers actually overlap, and we
25859 have to switch modes in order to treat them differently. */
25860 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25861 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25867 /* Moves between SSE/MMX and integer unit are expensive. */
25868 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25869 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25871 /* ??? By keeping returned value relatively high, we limit the number
25872 of moves between integer and MMX/SSE registers for all targets.
25873 Additionally, high value prevents problem with x86_modes_tieable_p(),
25874 where integer modes in MMX/SSE registers are not tieable
25875 because of missing QImode and HImode moves to, from or between
25876 MMX/SSE registers. */
25877 return MAX (8, ix86_cost->mmxsse_to_integer);
25879 if (MAYBE_FLOAT_CLASS_P (class1))
25880 return ix86_cost->fp_move;
25881 if (MAYBE_SSE_CLASS_P (class1))
25882 return ix86_cost->sse_move;
25883 if (MAYBE_MMX_CLASS_P (class1))
25884 return ix86_cost->mmx_move;
25888 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25891 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25893 /* Flags and only flags can only hold CCmode values. */
25894 if (CC_REGNO_P (regno))
25895 return GET_MODE_CLASS (mode) == MODE_CC;
25896 if (GET_MODE_CLASS (mode) == MODE_CC
25897 || GET_MODE_CLASS (mode) == MODE_RANDOM
25898 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25900 if (FP_REGNO_P (regno))
25901 return VALID_FP_MODE_P (mode);
25902 if (SSE_REGNO_P (regno))
25904 /* We implement the move patterns for all vector modes into and
25905 out of SSE registers, even when no operation instructions
25906 are available. OImode move is available only when AVX is
25908 return ((TARGET_AVX && mode == OImode)
25909 || VALID_AVX256_REG_MODE (mode)
25910 || VALID_SSE_REG_MODE (mode)
25911 || VALID_SSE2_REG_MODE (mode)
25912 || VALID_MMX_REG_MODE (mode)
25913 || VALID_MMX_REG_MODE_3DNOW (mode));
25915 if (MMX_REGNO_P (regno))
25917 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25918 so if the register is available at all, then we can move data of
25919 the given mode into or out of it. */
25920 return (VALID_MMX_REG_MODE (mode)
25921 || VALID_MMX_REG_MODE_3DNOW (mode));
25924 if (mode == QImode)
25926 /* Take care for QImode values - they can be in non-QI regs,
25927 but then they do cause partial register stalls. */
25928 if (regno <= BX_REG || TARGET_64BIT)
25930 if (!TARGET_PARTIAL_REG_STALL)
25932 return reload_in_progress || reload_completed;
25934 /* We handle both integer and floats in the general purpose registers. */
25935 else if (VALID_INT_MODE_P (mode))
25937 else if (VALID_FP_MODE_P (mode))
25939 else if (VALID_DFP_MODE_P (mode))
25941 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25942 on to use that value in smaller contexts, this can easily force a
25943 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25944 supporting DImode, allow it. */
25945 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25951 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25952 tieable integer mode. */
25955 ix86_tieable_integer_mode_p (enum machine_mode mode)
25964 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25967 return TARGET_64BIT;
25974 /* Return true if MODE1 is accessible in a register that can hold MODE2
25975 without copying. That is, all register classes that can hold MODE2
25976 can also hold MODE1. */
25979 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25981 if (mode1 == mode2)
25984 if (ix86_tieable_integer_mode_p (mode1)
25985 && ix86_tieable_integer_mode_p (mode2))
25988 /* MODE2 being XFmode implies fp stack or general regs, which means we
25989 can tie any smaller floating point modes to it. Note that we do not
25990 tie this with TFmode. */
25991 if (mode2 == XFmode)
25992 return mode1 == SFmode || mode1 == DFmode;
25994 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25995 that we can tie it with SFmode. */
25996 if (mode2 == DFmode)
25997 return mode1 == SFmode;
25999 /* If MODE2 is only appropriate for an SSE register, then tie with
26000 any other mode acceptable to SSE registers. */
26001 if (GET_MODE_SIZE (mode2) == 16
26002 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26003 return (GET_MODE_SIZE (mode1) == 16
26004 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26006 /* If MODE2 is appropriate for an MMX register, then tie
26007 with any other mode acceptable to MMX registers. */
26008 if (GET_MODE_SIZE (mode2) == 8
26009 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26010 return (GET_MODE_SIZE (mode1) == 8
26011 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26016 /* Compute a (partial) cost for rtx X. Return true if the complete
26017 cost has been computed, and false if subexpressions should be
26018 scanned. In either case, *TOTAL contains the cost result. */
26021 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26023 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26024 enum machine_mode mode = GET_MODE (x);
26025 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26033 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26035 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26037 else if (flag_pic && SYMBOLIC_CONST (x)
26039 || (!GET_CODE (x) != LABEL_REF
26040 && (GET_CODE (x) != SYMBOL_REF
26041 || !SYMBOL_REF_LOCAL_P (x)))))
26048 if (mode == VOIDmode)
26051 switch (standard_80387_constant_p (x))
26056 default: /* Other constants */
26061 /* Start with (MEM (SYMBOL_REF)), since that's where
26062 it'll probably end up. Add a penalty for size. */
26063 *total = (COSTS_N_INSNS (1)
26064 + (flag_pic != 0 && !TARGET_64BIT)
26065 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26071 /* The zero extensions is often completely free on x86_64, so make
26072 it as cheap as possible. */
26073 if (TARGET_64BIT && mode == DImode
26074 && GET_MODE (XEXP (x, 0)) == SImode)
26076 else if (TARGET_ZERO_EXTEND_WITH_AND)
26077 *total = cost->add;
26079 *total = cost->movzx;
26083 *total = cost->movsx;
26087 if (CONST_INT_P (XEXP (x, 1))
26088 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26090 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26093 *total = cost->add;
26096 if ((value == 2 || value == 3)
26097 && cost->lea <= cost->shift_const)
26099 *total = cost->lea;
26109 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26111 if (CONST_INT_P (XEXP (x, 1)))
26113 if (INTVAL (XEXP (x, 1)) > 32)
26114 *total = cost->shift_const + COSTS_N_INSNS (2);
26116 *total = cost->shift_const * 2;
26120 if (GET_CODE (XEXP (x, 1)) == AND)
26121 *total = cost->shift_var * 2;
26123 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26128 if (CONST_INT_P (XEXP (x, 1)))
26129 *total = cost->shift_const;
26131 *total = cost->shift_var;
26136 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26138 /* ??? SSE scalar cost should be used here. */
26139 *total = cost->fmul;
26142 else if (X87_FLOAT_MODE_P (mode))
26144 *total = cost->fmul;
26147 else if (FLOAT_MODE_P (mode))
26149 /* ??? SSE vector cost should be used here. */
26150 *total = cost->fmul;
26155 rtx op0 = XEXP (x, 0);
26156 rtx op1 = XEXP (x, 1);
26158 if (CONST_INT_P (XEXP (x, 1)))
26160 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26161 for (nbits = 0; value != 0; value &= value - 1)
26165 /* This is arbitrary. */
26168 /* Compute costs correctly for widening multiplication. */
26169 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26170 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26171 == GET_MODE_SIZE (mode))
26173 int is_mulwiden = 0;
26174 enum machine_mode inner_mode = GET_MODE (op0);
26176 if (GET_CODE (op0) == GET_CODE (op1))
26177 is_mulwiden = 1, op1 = XEXP (op1, 0);
26178 else if (CONST_INT_P (op1))
26180 if (GET_CODE (op0) == SIGN_EXTEND)
26181 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26184 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26188 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26191 *total = (cost->mult_init[MODE_INDEX (mode)]
26192 + nbits * cost->mult_bit
26193 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26202 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26203 /* ??? SSE cost should be used here. */
26204 *total = cost->fdiv;
26205 else if (X87_FLOAT_MODE_P (mode))
26206 *total = cost->fdiv;
26207 else if (FLOAT_MODE_P (mode))
26208 /* ??? SSE vector cost should be used here. */
26209 *total = cost->fdiv;
26211 *total = cost->divide[MODE_INDEX (mode)];
26215 if (GET_MODE_CLASS (mode) == MODE_INT
26216 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26218 if (GET_CODE (XEXP (x, 0)) == PLUS
26219 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26220 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26221 && CONSTANT_P (XEXP (x, 1)))
26223 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26224 if (val == 2 || val == 4 || val == 8)
26226 *total = cost->lea;
26227 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26228 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26229 outer_code, speed);
26230 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26234 else if (GET_CODE (XEXP (x, 0)) == MULT
26235 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26237 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26238 if (val == 2 || val == 4 || val == 8)
26240 *total = cost->lea;
26241 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26242 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26246 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26248 *total = cost->lea;
26249 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26250 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26251 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26258 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26260 /* ??? SSE cost should be used here. */
26261 *total = cost->fadd;
26264 else if (X87_FLOAT_MODE_P (mode))
26266 *total = cost->fadd;
26269 else if (FLOAT_MODE_P (mode))
26271 /* ??? SSE vector cost should be used here. */
26272 *total = cost->fadd;
26280 if (!TARGET_64BIT && mode == DImode)
26282 *total = (cost->add * 2
26283 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26284 << (GET_MODE (XEXP (x, 0)) != DImode))
26285 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26286 << (GET_MODE (XEXP (x, 1)) != DImode)));
26292 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26294 /* ??? SSE cost should be used here. */
26295 *total = cost->fchs;
26298 else if (X87_FLOAT_MODE_P (mode))
26300 *total = cost->fchs;
26303 else if (FLOAT_MODE_P (mode))
26305 /* ??? SSE vector cost should be used here. */
26306 *total = cost->fchs;
26312 if (!TARGET_64BIT && mode == DImode)
26313 *total = cost->add * 2;
26315 *total = cost->add;
26319 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26320 && XEXP (XEXP (x, 0), 1) == const1_rtx
26321 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26322 && XEXP (x, 1) == const0_rtx)
26324 /* This kind of construct is implemented using test[bwl].
26325 Treat it as if we had an AND. */
26326 *total = (cost->add
26327 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26328 + rtx_cost (const1_rtx, outer_code, speed));
26334 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26339 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26340 /* ??? SSE cost should be used here. */
26341 *total = cost->fabs;
26342 else if (X87_FLOAT_MODE_P (mode))
26343 *total = cost->fabs;
26344 else if (FLOAT_MODE_P (mode))
26345 /* ??? SSE vector cost should be used here. */
26346 *total = cost->fabs;
26350 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26351 /* ??? SSE cost should be used here. */
26352 *total = cost->fsqrt;
26353 else if (X87_FLOAT_MODE_P (mode))
26354 *total = cost->fsqrt;
26355 else if (FLOAT_MODE_P (mode))
26356 /* ??? SSE vector cost should be used here. */
26357 *total = cost->fsqrt;
26361 if (XINT (x, 1) == UNSPEC_TP)
26372 static int current_machopic_label_num;
26374 /* Given a symbol name and its associated stub, write out the
26375 definition of the stub. */
26378 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26380 unsigned int length;
26381 char *binder_name, *symbol_name, lazy_ptr_name[32];
26382 int label = ++current_machopic_label_num;
26384 /* For 64-bit we shouldn't get here. */
26385 gcc_assert (!TARGET_64BIT);
26387 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26388 symb = (*targetm.strip_name_encoding) (symb);
26390 length = strlen (stub);
26391 binder_name = XALLOCAVEC (char, length + 32);
26392 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26394 length = strlen (symb);
26395 symbol_name = XALLOCAVEC (char, length + 32);
26396 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26398 sprintf (lazy_ptr_name, "L%d$lz", label);
26401 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26403 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26405 fprintf (file, "%s:\n", stub);
26406 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26410 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26411 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26412 fprintf (file, "\tjmp\t*%%edx\n");
26415 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26417 fprintf (file, "%s:\n", binder_name);
26421 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26422 fprintf (file, "\tpushl\t%%eax\n");
26425 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26427 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26429 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26430 fprintf (file, "%s:\n", lazy_ptr_name);
26431 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26432 fprintf (file, "\t.long %s\n", binder_name);
26436 darwin_x86_file_end (void)
26438 darwin_file_end ();
26441 #endif /* TARGET_MACHO */
26443 /* Order the registers for register allocator. */
26446 x86_order_regs_for_local_alloc (void)
26451 /* First allocate the local general purpose registers. */
26452 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26453 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26454 reg_alloc_order [pos++] = i;
26456 /* Global general purpose registers. */
26457 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26458 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26459 reg_alloc_order [pos++] = i;
26461 /* x87 registers come first in case we are doing FP math
26463 if (!TARGET_SSE_MATH)
26464 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26465 reg_alloc_order [pos++] = i;
26467 /* SSE registers. */
26468 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26469 reg_alloc_order [pos++] = i;
26470 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26471 reg_alloc_order [pos++] = i;
26473 /* x87 registers. */
26474 if (TARGET_SSE_MATH)
26475 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26476 reg_alloc_order [pos++] = i;
26478 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26479 reg_alloc_order [pos++] = i;
26481 /* Initialize the rest of array as we do not allocate some registers
26483 while (pos < FIRST_PSEUDO_REGISTER)
26484 reg_alloc_order [pos++] = 0;
26487 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26488 struct attribute_spec.handler. */
26490 ix86_handle_abi_attribute (tree *node, tree name,
26491 tree args ATTRIBUTE_UNUSED,
26492 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26494 if (TREE_CODE (*node) != FUNCTION_TYPE
26495 && TREE_CODE (*node) != METHOD_TYPE
26496 && TREE_CODE (*node) != FIELD_DECL
26497 && TREE_CODE (*node) != TYPE_DECL)
26499 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26500 IDENTIFIER_POINTER (name));
26501 *no_add_attrs = true;
26506 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26507 IDENTIFIER_POINTER (name));
26508 *no_add_attrs = true;
26512 /* Can combine regparm with all attributes but fastcall. */
26513 if (is_attribute_p ("ms_abi", name))
26515 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26517 error ("ms_abi and sysv_abi attributes are not compatible");
26522 else if (is_attribute_p ("sysv_abi", name))
26524 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26526 error ("ms_abi and sysv_abi attributes are not compatible");
26535 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26536 struct attribute_spec.handler. */
26538 ix86_handle_struct_attribute (tree *node, tree name,
26539 tree args ATTRIBUTE_UNUSED,
26540 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26543 if (DECL_P (*node))
26545 if (TREE_CODE (*node) == TYPE_DECL)
26546 type = &TREE_TYPE (*node);
26551 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26552 || TREE_CODE (*type) == UNION_TYPE)))
26554 warning (OPT_Wattributes, "%qs attribute ignored",
26555 IDENTIFIER_POINTER (name));
26556 *no_add_attrs = true;
26559 else if ((is_attribute_p ("ms_struct", name)
26560 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26561 || ((is_attribute_p ("gcc_struct", name)
26562 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26564 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26565 IDENTIFIER_POINTER (name));
26566 *no_add_attrs = true;
26573 ix86_ms_bitfield_layout_p (const_tree record_type)
26575 return (TARGET_MS_BITFIELD_LAYOUT &&
26576 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26577 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26580 /* Returns an expression indicating where the this parameter is
26581 located on entry to the FUNCTION. */
26584 x86_this_parameter (tree function)
26586 tree type = TREE_TYPE (function);
26587 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26592 const int *parm_regs;
26594 if (ix86_function_type_abi (type) == MS_ABI)
26595 parm_regs = x86_64_ms_abi_int_parameter_registers;
26597 parm_regs = x86_64_int_parameter_registers;
26598 return gen_rtx_REG (DImode, parm_regs[aggr]);
26601 nregs = ix86_function_regparm (type, function);
26603 if (nregs > 0 && !stdarg_p (type))
26607 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26608 regno = aggr ? DX_REG : CX_REG;
26616 return gen_rtx_MEM (SImode,
26617 plus_constant (stack_pointer_rtx, 4));
26620 return gen_rtx_REG (SImode, regno);
26623 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26626 /* Determine whether x86_output_mi_thunk can succeed. */
26629 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26630 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26631 HOST_WIDE_INT vcall_offset, const_tree function)
26633 /* 64-bit can handle anything. */
26637 /* For 32-bit, everything's fine if we have one free register. */
26638 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26641 /* Need a free register for vcall_offset. */
26645 /* Need a free register for GOT references. */
26646 if (flag_pic && !(*targetm.binds_local_p) (function))
26649 /* Otherwise ok. */
26653 /* Output the assembler code for a thunk function. THUNK_DECL is the
26654 declaration for the thunk function itself, FUNCTION is the decl for
26655 the target function. DELTA is an immediate constant offset to be
26656 added to THIS. If VCALL_OFFSET is nonzero, the word at
26657 *(*this + vcall_offset) should be added to THIS. */
26660 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26661 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26662 HOST_WIDE_INT vcall_offset, tree function)
26665 rtx this_param = x86_this_parameter (function);
26668 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26669 pull it in now and let DELTA benefit. */
26670 if (REG_P (this_param))
26671 this_reg = this_param;
26672 else if (vcall_offset)
26674 /* Put the this parameter into %eax. */
26675 xops[0] = this_param;
26676 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26677 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26680 this_reg = NULL_RTX;
26682 /* Adjust the this parameter by a fixed constant. */
26685 xops[0] = GEN_INT (delta);
26686 xops[1] = this_reg ? this_reg : this_param;
26689 if (!x86_64_general_operand (xops[0], DImode))
26691 tmp = gen_rtx_REG (DImode, R10_REG);
26693 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26695 xops[1] = this_param;
26697 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26700 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26703 /* Adjust the this parameter by a value stored in the vtable. */
26707 tmp = gen_rtx_REG (DImode, R10_REG);
26710 int tmp_regno = CX_REG;
26711 if (lookup_attribute ("fastcall",
26712 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26713 tmp_regno = AX_REG;
26714 tmp = gen_rtx_REG (SImode, tmp_regno);
26717 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26719 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26721 /* Adjust the this parameter. */
26722 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26723 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26725 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26726 xops[0] = GEN_INT (vcall_offset);
26728 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26729 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26731 xops[1] = this_reg;
26732 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26735 /* If necessary, drop THIS back to its stack slot. */
26736 if (this_reg && this_reg != this_param)
26738 xops[0] = this_reg;
26739 xops[1] = this_param;
26740 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26743 xops[0] = XEXP (DECL_RTL (function), 0);
26746 if (!flag_pic || (*targetm.binds_local_p) (function))
26747 output_asm_insn ("jmp\t%P0", xops);
26748 /* All thunks should be in the same object as their target,
26749 and thus binds_local_p should be true. */
26750 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26751 gcc_unreachable ();
26754 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26755 tmp = gen_rtx_CONST (Pmode, tmp);
26756 tmp = gen_rtx_MEM (QImode, tmp);
26758 output_asm_insn ("jmp\t%A0", xops);
26763 if (!flag_pic || (*targetm.binds_local_p) (function))
26764 output_asm_insn ("jmp\t%P0", xops);
26769 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26770 tmp = (gen_rtx_SYMBOL_REF
26772 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26773 tmp = gen_rtx_MEM (QImode, tmp);
26775 output_asm_insn ("jmp\t%0", xops);
26778 #endif /* TARGET_MACHO */
26780 tmp = gen_rtx_REG (SImode, CX_REG);
26781 output_set_got (tmp, NULL_RTX);
26784 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26785 output_asm_insn ("jmp\t{*}%1", xops);
26791 x86_file_start (void)
26793 default_file_start ();
26795 darwin_file_start ();
26797 if (X86_FILE_START_VERSION_DIRECTIVE)
26798 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26799 if (X86_FILE_START_FLTUSED)
26800 fputs ("\t.global\t__fltused\n", asm_out_file);
26801 if (ix86_asm_dialect == ASM_INTEL)
26802 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26806 x86_field_alignment (tree field, int computed)
26808 enum machine_mode mode;
26809 tree type = TREE_TYPE (field);
26811 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26813 mode = TYPE_MODE (strip_array_types (type));
26814 if (mode == DFmode || mode == DCmode
26815 || GET_MODE_CLASS (mode) == MODE_INT
26816 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26817 return MIN (32, computed);
26821 /* Output assembler code to FILE to increment profiler label # LABELNO
26822 for profiling a function entry. */
26824 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26828 #ifndef NO_PROFILE_COUNTERS
26829 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26832 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26833 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26835 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26839 #ifndef NO_PROFILE_COUNTERS
26840 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26841 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26843 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26847 #ifndef NO_PROFILE_COUNTERS
26848 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26849 PROFILE_COUNT_REGISTER);
26851 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26855 /* We don't have exact information about the insn sizes, but we may assume
26856 quite safely that we are informed about all 1 byte insns and memory
26857 address sizes. This is enough to eliminate unnecessary padding in
26861 min_insn_size (rtx insn)
26865 if (!INSN_P (insn) || !active_insn_p (insn))
26868 /* Discard alignments we've emit and jump instructions. */
26869 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26870 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26873 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26874 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26877 /* Important case - calls are always 5 bytes.
26878 It is common to have many calls in the row. */
26880 && symbolic_reference_mentioned_p (PATTERN (insn))
26881 && !SIBLING_CALL_P (insn))
26883 if (get_attr_length (insn) <= 1)
26886 /* For normal instructions we may rely on the sizes of addresses
26887 and the presence of symbol to require 4 bytes of encoding.
26888 This is not the case for jumps where references are PC relative. */
26889 if (!JUMP_P (insn))
26891 l = get_attr_length_address (insn);
26892 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26901 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26905 ix86_avoid_jump_misspredicts (void)
26907 rtx insn, start = get_insns ();
26908 int nbytes = 0, njumps = 0;
26911 /* Look for all minimal intervals of instructions containing 4 jumps.
26912 The intervals are bounded by START and INSN. NBYTES is the total
26913 size of instructions in the interval including INSN and not including
26914 START. When the NBYTES is smaller than 16 bytes, it is possible
26915 that the end of START and INSN ends up in the same 16byte page.
26917 The smallest offset in the page INSN can start is the case where START
26918 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26919 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26921 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26924 nbytes += min_insn_size (insn);
26926 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26927 INSN_UID (insn), min_insn_size (insn));
26929 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26930 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26938 start = NEXT_INSN (start);
26939 if ((JUMP_P (start)
26940 && GET_CODE (PATTERN (start)) != ADDR_VEC
26941 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26943 njumps--, isjump = 1;
26946 nbytes -= min_insn_size (start);
26948 gcc_assert (njumps >= 0);
26950 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26951 INSN_UID (start), INSN_UID (insn), nbytes);
26953 if (njumps == 3 && isjump && nbytes < 16)
26955 int padsize = 15 - nbytes + min_insn_size (insn);
26958 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26959 INSN_UID (insn), padsize);
26960 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26965 /* AMD Athlon works faster
26966 when RET is not destination of conditional jump or directly preceded
26967 by other jump instruction. We avoid the penalty by inserting NOP just
26968 before the RET instructions in such cases. */
26970 ix86_pad_returns (void)
26975 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26977 basic_block bb = e->src;
26978 rtx ret = BB_END (bb);
26980 bool replace = false;
26982 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26983 || optimize_bb_for_size_p (bb))
26985 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26986 if (active_insn_p (prev) || LABEL_P (prev))
26988 if (prev && LABEL_P (prev))
26993 FOR_EACH_EDGE (e, ei, bb->preds)
26994 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26995 && !(e->flags & EDGE_FALLTHRU))
27000 prev = prev_active_insn (ret);
27002 && ((JUMP_P (prev) && any_condjump_p (prev))
27005 /* Empty functions get branch mispredict even when the jump destination
27006 is not visible to us. */
27007 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27012 emit_insn_before (gen_return_internal_long (), ret);
27018 /* Implement machine specific optimizations. We implement padding of returns
27019 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27023 if (TARGET_PAD_RETURNS && optimize
27024 && optimize_function_for_speed_p (cfun))
27025 ix86_pad_returns ();
27026 if (TARGET_FOUR_JUMP_LIMIT && optimize
27027 && optimize_function_for_speed_p (cfun))
27028 ix86_avoid_jump_misspredicts ();
27031 /* Return nonzero when QImode register that must be represented via REX prefix
27034 x86_extended_QIreg_mentioned_p (rtx insn)
27037 extract_insn_cached (insn);
27038 for (i = 0; i < recog_data.n_operands; i++)
27039 if (REG_P (recog_data.operand[i])
27040 && REGNO (recog_data.operand[i]) > BX_REG)
27045 /* Return nonzero when P points to register encoded via REX prefix.
27046 Called via for_each_rtx. */
27048 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27050 unsigned int regno;
27053 regno = REGNO (*p);
27054 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27057 /* Return true when INSN mentions register that must be encoded using REX
27060 x86_extended_reg_mentioned_p (rtx insn)
27062 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27063 extended_reg_mentioned_1, NULL);
27066 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27067 optabs would emit if we didn't have TFmode patterns. */
27070 x86_emit_floatuns (rtx operands[2])
27072 rtx neglab, donelab, i0, i1, f0, in, out;
27073 enum machine_mode mode, inmode;
27075 inmode = GET_MODE (operands[1]);
27076 gcc_assert (inmode == SImode || inmode == DImode);
27079 in = force_reg (inmode, operands[1]);
27080 mode = GET_MODE (out);
27081 neglab = gen_label_rtx ();
27082 donelab = gen_label_rtx ();
27083 f0 = gen_reg_rtx (mode);
27085 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27087 expand_float (out, in, 0);
27089 emit_jump_insn (gen_jump (donelab));
27092 emit_label (neglab);
27094 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27096 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27098 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27100 expand_float (f0, i0, 0);
27102 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27104 emit_label (donelab);
27107 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27108 with all elements equal to VAR. Return true if successful. */
27111 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27112 rtx target, rtx val)
27114 enum machine_mode hmode, smode, wsmode, wvmode;
27129 val = force_reg (GET_MODE_INNER (mode), val);
27130 x = gen_rtx_VEC_DUPLICATE (mode, val);
27131 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27137 if (TARGET_SSE || TARGET_3DNOW_A)
27139 val = gen_lowpart (SImode, val);
27140 x = gen_rtx_TRUNCATE (HImode, val);
27141 x = gen_rtx_VEC_DUPLICATE (mode, x);
27142 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27164 /* Extend HImode to SImode using a paradoxical SUBREG. */
27165 tmp1 = gen_reg_rtx (SImode);
27166 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27167 /* Insert the SImode value as low element of V4SImode vector. */
27168 tmp2 = gen_reg_rtx (V4SImode);
27169 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27170 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27171 CONST0_RTX (V4SImode),
27173 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27174 /* Cast the V4SImode vector back to a V8HImode vector. */
27175 tmp1 = gen_reg_rtx (V8HImode);
27176 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27177 /* Duplicate the low short through the whole low SImode word. */
27178 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27179 /* Cast the V8HImode vector back to a V4SImode vector. */
27180 tmp2 = gen_reg_rtx (V4SImode);
27181 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27182 /* Replicate the low element of the V4SImode vector. */
27183 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27184 /* Cast the V2SImode back to V8HImode, and store in target. */
27185 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27196 /* Extend QImode to SImode using a paradoxical SUBREG. */
27197 tmp1 = gen_reg_rtx (SImode);
27198 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27199 /* Insert the SImode value as low element of V4SImode vector. */
27200 tmp2 = gen_reg_rtx (V4SImode);
27201 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27202 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27203 CONST0_RTX (V4SImode),
27205 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27206 /* Cast the V4SImode vector back to a V16QImode vector. */
27207 tmp1 = gen_reg_rtx (V16QImode);
27208 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27209 /* Duplicate the low byte through the whole low SImode word. */
27210 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27211 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27212 /* Cast the V16QImode vector back to a V4SImode vector. */
27213 tmp2 = gen_reg_rtx (V4SImode);
27214 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27215 /* Replicate the low element of the V4SImode vector. */
27216 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27217 /* Cast the V2SImode back to V16QImode, and store in target. */
27218 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27226 /* Replicate the value once into the next wider mode and recurse. */
27227 val = convert_modes (wsmode, smode, val, true);
27228 x = expand_simple_binop (wsmode, ASHIFT, val,
27229 GEN_INT (GET_MODE_BITSIZE (smode)),
27230 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27231 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27233 x = gen_reg_rtx (wvmode);
27234 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27235 gcc_unreachable ();
27236 emit_move_insn (target, gen_lowpart (mode, x));
27259 rtx tmp = gen_reg_rtx (hmode);
27260 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27261 emit_insn (gen_rtx_SET (VOIDmode, target,
27262 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27271 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27272 whose ONE_VAR element is VAR, and other elements are zero. Return true
27276 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27277 rtx target, rtx var, int one_var)
27279 enum machine_mode vsimode;
27282 bool use_vector_set = false;
27287 /* For SSE4.1, we normally use vector set. But if the second
27288 element is zero and inter-unit moves are OK, we use movq
27290 use_vector_set = (TARGET_64BIT
27292 && !(TARGET_INTER_UNIT_MOVES
27298 use_vector_set = TARGET_SSE4_1;
27301 use_vector_set = TARGET_SSE2;
27304 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27311 use_vector_set = TARGET_AVX;
27314 /* Use ix86_expand_vector_set in 64bit mode only. */
27315 use_vector_set = TARGET_AVX && TARGET_64BIT;
27321 if (use_vector_set)
27323 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27324 var = force_reg (GET_MODE_INNER (mode), var);
27325 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27341 var = force_reg (GET_MODE_INNER (mode), var);
27342 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27343 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27348 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27349 new_target = gen_reg_rtx (mode);
27351 new_target = target;
27352 var = force_reg (GET_MODE_INNER (mode), var);
27353 x = gen_rtx_VEC_DUPLICATE (mode, var);
27354 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27355 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27358 /* We need to shuffle the value to the correct position, so
27359 create a new pseudo to store the intermediate result. */
27361 /* With SSE2, we can use the integer shuffle insns. */
27362 if (mode != V4SFmode && TARGET_SSE2)
27364 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27366 GEN_INT (one_var == 1 ? 0 : 1),
27367 GEN_INT (one_var == 2 ? 0 : 1),
27368 GEN_INT (one_var == 3 ? 0 : 1)));
27369 if (target != new_target)
27370 emit_move_insn (target, new_target);
27374 /* Otherwise convert the intermediate result to V4SFmode and
27375 use the SSE1 shuffle instructions. */
27376 if (mode != V4SFmode)
27378 tmp = gen_reg_rtx (V4SFmode);
27379 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27384 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27386 GEN_INT (one_var == 1 ? 0 : 1),
27387 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27388 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27390 if (mode != V4SFmode)
27391 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27392 else if (tmp != target)
27393 emit_move_insn (target, tmp);
27395 else if (target != new_target)
27396 emit_move_insn (target, new_target);
27401 vsimode = V4SImode;
27407 vsimode = V2SImode;
27413 /* Zero extend the variable element to SImode and recurse. */
27414 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27416 x = gen_reg_rtx (vsimode);
27417 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27419 gcc_unreachable ();
27421 emit_move_insn (target, gen_lowpart (mode, x));
27429 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27430 consisting of the values in VALS. It is known that all elements
27431 except ONE_VAR are constants. Return true if successful. */
27434 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27435 rtx target, rtx vals, int one_var)
27437 rtx var = XVECEXP (vals, 0, one_var);
27438 enum machine_mode wmode;
27441 const_vec = copy_rtx (vals);
27442 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27443 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27451 /* For the two element vectors, it's just as easy to use
27452 the general case. */
27456 /* Use ix86_expand_vector_set in 64bit mode only. */
27479 /* There's no way to set one QImode entry easily. Combine
27480 the variable value with its adjacent constant value, and
27481 promote to an HImode set. */
27482 x = XVECEXP (vals, 0, one_var ^ 1);
27485 var = convert_modes (HImode, QImode, var, true);
27486 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27487 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27488 x = GEN_INT (INTVAL (x) & 0xff);
27492 var = convert_modes (HImode, QImode, var, true);
27493 x = gen_int_mode (INTVAL (x) << 8, HImode);
27495 if (x != const0_rtx)
27496 var = expand_simple_binop (HImode, IOR, var, x, var,
27497 1, OPTAB_LIB_WIDEN);
27499 x = gen_reg_rtx (wmode);
27500 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27501 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27503 emit_move_insn (target, gen_lowpart (mode, x));
27510 emit_move_insn (target, const_vec);
27511 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27515 /* A subroutine of ix86_expand_vector_init_general. Use vector
27516 concatenate to handle the most general case: all values variable,
27517 and none identical. */
27520 ix86_expand_vector_init_concat (enum machine_mode mode,
27521 rtx target, rtx *ops, int n)
27523 enum machine_mode cmode, hmode = VOIDmode;
27524 rtx first[8], second[4];
27564 gcc_unreachable ();
27567 if (!register_operand (ops[1], cmode))
27568 ops[1] = force_reg (cmode, ops[1]);
27569 if (!register_operand (ops[0], cmode))
27570 ops[0] = force_reg (cmode, ops[0]);
27571 emit_insn (gen_rtx_SET (VOIDmode, target,
27572 gen_rtx_VEC_CONCAT (mode, ops[0],
27592 gcc_unreachable ();
27608 gcc_unreachable ();
27613 /* FIXME: We process inputs backward to help RA. PR 36222. */
27616 for (; i > 0; i -= 2, j--)
27618 first[j] = gen_reg_rtx (cmode);
27619 v = gen_rtvec (2, ops[i - 1], ops[i]);
27620 ix86_expand_vector_init (false, first[j],
27621 gen_rtx_PARALLEL (cmode, v));
27627 gcc_assert (hmode != VOIDmode);
27628 for (i = j = 0; i < n; i += 2, j++)
27630 second[j] = gen_reg_rtx (hmode);
27631 ix86_expand_vector_init_concat (hmode, second [j],
27635 ix86_expand_vector_init_concat (mode, target, second, n);
27638 ix86_expand_vector_init_concat (mode, target, first, n);
27642 gcc_unreachable ();
27646 /* A subroutine of ix86_expand_vector_init_general. Use vector
27647 interleave to handle the most general case: all values variable,
27648 and none identical. */
27651 ix86_expand_vector_init_interleave (enum machine_mode mode,
27652 rtx target, rtx *ops, int n)
27654 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27657 rtx (*gen_load_even) (rtx, rtx, rtx);
27658 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27659 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27664 gen_load_even = gen_vec_setv8hi;
27665 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27666 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27667 inner_mode = HImode;
27668 first_imode = V4SImode;
27669 second_imode = V2DImode;
27670 third_imode = VOIDmode;
27673 gen_load_even = gen_vec_setv16qi;
27674 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27675 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27676 inner_mode = QImode;
27677 first_imode = V8HImode;
27678 second_imode = V4SImode;
27679 third_imode = V2DImode;
27682 gcc_unreachable ();
27685 for (i = 0; i < n; i++)
27687 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27688 op0 = gen_reg_rtx (SImode);
27689 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27691 /* Insert the SImode value as low element of V4SImode vector. */
27692 op1 = gen_reg_rtx (V4SImode);
27693 op0 = gen_rtx_VEC_MERGE (V4SImode,
27694 gen_rtx_VEC_DUPLICATE (V4SImode,
27696 CONST0_RTX (V4SImode),
27698 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27700 /* Cast the V4SImode vector back to a vector in orignal mode. */
27701 op0 = gen_reg_rtx (mode);
27702 emit_move_insn (op0, gen_lowpart (mode, op1));
27704 /* Load even elements into the second positon. */
27705 emit_insn ((*gen_load_even) (op0,
27706 force_reg (inner_mode,
27710 /* Cast vector to FIRST_IMODE vector. */
27711 ops[i] = gen_reg_rtx (first_imode);
27712 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27715 /* Interleave low FIRST_IMODE vectors. */
27716 for (i = j = 0; i < n; i += 2, j++)
27718 op0 = gen_reg_rtx (first_imode);
27719 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27721 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27722 ops[j] = gen_reg_rtx (second_imode);
27723 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27726 /* Interleave low SECOND_IMODE vectors. */
27727 switch (second_imode)
27730 for (i = j = 0; i < n / 2; i += 2, j++)
27732 op0 = gen_reg_rtx (second_imode);
27733 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27736 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27738 ops[j] = gen_reg_rtx (third_imode);
27739 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27741 second_imode = V2DImode;
27742 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27746 op0 = gen_reg_rtx (second_imode);
27747 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27750 /* Cast the SECOND_IMODE vector back to a vector on original
27752 emit_insn (gen_rtx_SET (VOIDmode, target,
27753 gen_lowpart (mode, op0)));
27757 gcc_unreachable ();
27761 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27762 all values variable, and none identical. */
27765 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27766 rtx target, rtx vals)
27768 rtx ops[32], op0, op1;
27769 enum machine_mode half_mode = VOIDmode;
27776 if (!mmx_ok && !TARGET_SSE)
27788 n = GET_MODE_NUNITS (mode);
27789 for (i = 0; i < n; i++)
27790 ops[i] = XVECEXP (vals, 0, i);
27791 ix86_expand_vector_init_concat (mode, target, ops, n);
27795 half_mode = V16QImode;
27799 half_mode = V8HImode;
27803 n = GET_MODE_NUNITS (mode);
27804 for (i = 0; i < n; i++)
27805 ops[i] = XVECEXP (vals, 0, i);
27806 op0 = gen_reg_rtx (half_mode);
27807 op1 = gen_reg_rtx (half_mode);
27808 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27810 ix86_expand_vector_init_interleave (half_mode, op1,
27811 &ops [n >> 1], n >> 2);
27812 emit_insn (gen_rtx_SET (VOIDmode, target,
27813 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27817 if (!TARGET_SSE4_1)
27825 /* Don't use ix86_expand_vector_init_interleave if we can't
27826 move from GPR to SSE register directly. */
27827 if (!TARGET_INTER_UNIT_MOVES)
27830 n = GET_MODE_NUNITS (mode);
27831 for (i = 0; i < n; i++)
27832 ops[i] = XVECEXP (vals, 0, i);
27833 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27841 gcc_unreachable ();
27845 int i, j, n_elts, n_words, n_elt_per_word;
27846 enum machine_mode inner_mode;
27847 rtx words[4], shift;
27849 inner_mode = GET_MODE_INNER (mode);
27850 n_elts = GET_MODE_NUNITS (mode);
27851 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27852 n_elt_per_word = n_elts / n_words;
27853 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27855 for (i = 0; i < n_words; ++i)
27857 rtx word = NULL_RTX;
27859 for (j = 0; j < n_elt_per_word; ++j)
27861 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27862 elt = convert_modes (word_mode, inner_mode, elt, true);
27868 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27869 word, 1, OPTAB_LIB_WIDEN);
27870 word = expand_simple_binop (word_mode, IOR, word, elt,
27871 word, 1, OPTAB_LIB_WIDEN);
27879 emit_move_insn (target, gen_lowpart (mode, words[0]));
27880 else if (n_words == 2)
27882 rtx tmp = gen_reg_rtx (mode);
27883 emit_clobber (tmp);
27884 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27885 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27886 emit_move_insn (target, tmp);
27888 else if (n_words == 4)
27890 rtx tmp = gen_reg_rtx (V4SImode);
27891 gcc_assert (word_mode == SImode);
27892 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27893 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27894 emit_move_insn (target, gen_lowpart (mode, tmp));
27897 gcc_unreachable ();
27901 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27902 instructions unless MMX_OK is true. */
27905 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27907 enum machine_mode mode = GET_MODE (target);
27908 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27909 int n_elts = GET_MODE_NUNITS (mode);
27910 int n_var = 0, one_var = -1;
27911 bool all_same = true, all_const_zero = true;
27915 for (i = 0; i < n_elts; ++i)
27917 x = XVECEXP (vals, 0, i);
27918 if (!(CONST_INT_P (x)
27919 || GET_CODE (x) == CONST_DOUBLE
27920 || GET_CODE (x) == CONST_FIXED))
27921 n_var++, one_var = i;
27922 else if (x != CONST0_RTX (inner_mode))
27923 all_const_zero = false;
27924 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27928 /* Constants are best loaded from the constant pool. */
27931 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27935 /* If all values are identical, broadcast the value. */
27937 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27938 XVECEXP (vals, 0, 0)))
27941 /* Values where only one field is non-constant are best loaded from
27942 the pool and overwritten via move later. */
27946 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27947 XVECEXP (vals, 0, one_var),
27951 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27955 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27959 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27961 enum machine_mode mode = GET_MODE (target);
27962 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27963 enum machine_mode half_mode;
27964 bool use_vec_merge = false;
27966 static rtx (*gen_extract[6][2]) (rtx, rtx)
27968 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27969 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27970 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27971 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27972 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27973 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27975 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27977 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27978 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27979 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27980 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27981 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27982 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27992 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27993 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27995 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27997 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27998 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28004 use_vec_merge = TARGET_SSE4_1;
28012 /* For the two element vectors, we implement a VEC_CONCAT with
28013 the extraction of the other element. */
28015 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28016 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28019 op0 = val, op1 = tmp;
28021 op0 = tmp, op1 = val;
28023 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28024 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28029 use_vec_merge = TARGET_SSE4_1;
28036 use_vec_merge = true;
28040 /* tmp = target = A B C D */
28041 tmp = copy_to_reg (target);
28042 /* target = A A B B */
28043 emit_insn (gen_sse_unpcklps (target, target, target));
28044 /* target = X A B B */
28045 ix86_expand_vector_set (false, target, val, 0);
28046 /* target = A X C D */
28047 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28048 GEN_INT (1), GEN_INT (0),
28049 GEN_INT (2+4), GEN_INT (3+4)));
28053 /* tmp = target = A B C D */
28054 tmp = copy_to_reg (target);
28055 /* tmp = X B C D */
28056 ix86_expand_vector_set (false, tmp, val, 0);
28057 /* target = A B X D */
28058 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28059 GEN_INT (0), GEN_INT (1),
28060 GEN_INT (0+4), GEN_INT (3+4)));
28064 /* tmp = target = A B C D */
28065 tmp = copy_to_reg (target);
28066 /* tmp = X B C D */
28067 ix86_expand_vector_set (false, tmp, val, 0);
28068 /* target = A B X D */
28069 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28070 GEN_INT (0), GEN_INT (1),
28071 GEN_INT (2+4), GEN_INT (0+4)));
28075 gcc_unreachable ();
28080 use_vec_merge = TARGET_SSE4_1;
28084 /* Element 0 handled by vec_merge below. */
28087 use_vec_merge = true;
28093 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28094 store into element 0, then shuffle them back. */
28098 order[0] = GEN_INT (elt);
28099 order[1] = const1_rtx;
28100 order[2] = const2_rtx;
28101 order[3] = GEN_INT (3);
28102 order[elt] = const0_rtx;
28104 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28105 order[1], order[2], order[3]));
28107 ix86_expand_vector_set (false, target, val, 0);
28109 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28110 order[1], order[2], order[3]));
28114 /* For SSE1, we have to reuse the V4SF code. */
28115 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28116 gen_lowpart (SFmode, val), elt);
28121 use_vec_merge = TARGET_SSE2;
28124 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28128 use_vec_merge = TARGET_SSE4_1;
28135 half_mode = V16QImode;
28141 half_mode = V8HImode;
28147 half_mode = V4SImode;
28153 half_mode = V2DImode;
28159 half_mode = V4SFmode;
28165 half_mode = V2DFmode;
28171 /* Compute offset. */
28175 gcc_assert (i <= 1);
28177 /* Extract the half. */
28178 tmp = gen_reg_rtx (half_mode);
28179 emit_insn ((*gen_extract[j][i]) (tmp, target));
28181 /* Put val in tmp at elt. */
28182 ix86_expand_vector_set (false, tmp, val, elt);
28185 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28194 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28195 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28196 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28200 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28202 emit_move_insn (mem, target);
28204 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28205 emit_move_insn (tmp, val);
28207 emit_move_insn (target, mem);
28212 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28214 enum machine_mode mode = GET_MODE (vec);
28215 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28216 bool use_vec_extr = false;
28229 use_vec_extr = true;
28233 use_vec_extr = TARGET_SSE4_1;
28245 tmp = gen_reg_rtx (mode);
28246 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28247 GEN_INT (elt), GEN_INT (elt),
28248 GEN_INT (elt+4), GEN_INT (elt+4)));
28252 tmp = gen_reg_rtx (mode);
28253 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28257 gcc_unreachable ();
28260 use_vec_extr = true;
28265 use_vec_extr = TARGET_SSE4_1;
28279 tmp = gen_reg_rtx (mode);
28280 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28281 GEN_INT (elt), GEN_INT (elt),
28282 GEN_INT (elt), GEN_INT (elt)));
28286 tmp = gen_reg_rtx (mode);
28287 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28291 gcc_unreachable ();
28294 use_vec_extr = true;
28299 /* For SSE1, we have to reuse the V4SF code. */
28300 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28301 gen_lowpart (V4SFmode, vec), elt);
28307 use_vec_extr = TARGET_SSE2;
28310 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28314 use_vec_extr = TARGET_SSE4_1;
28318 /* ??? Could extract the appropriate HImode element and shift. */
28325 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28326 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28328 /* Let the rtl optimizers know about the zero extension performed. */
28329 if (inner_mode == QImode || inner_mode == HImode)
28331 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28332 target = gen_lowpart (SImode, target);
28335 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28339 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28341 emit_move_insn (mem, vec);
28343 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28344 emit_move_insn (target, tmp);
28348 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28349 pattern to reduce; DEST is the destination; IN is the input vector. */
28352 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28354 rtx tmp1, tmp2, tmp3;
28356 tmp1 = gen_reg_rtx (V4SFmode);
28357 tmp2 = gen_reg_rtx (V4SFmode);
28358 tmp3 = gen_reg_rtx (V4SFmode);
28360 emit_insn (gen_sse_movhlps (tmp1, in, in));
28361 emit_insn (fn (tmp2, tmp1, in));
28363 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28364 GEN_INT (1), GEN_INT (1),
28365 GEN_INT (1+4), GEN_INT (1+4)));
28366 emit_insn (fn (dest, tmp2, tmp3));
28369 /* Target hook for scalar_mode_supported_p. */
28371 ix86_scalar_mode_supported_p (enum machine_mode mode)
28373 if (DECIMAL_FLOAT_MODE_P (mode))
28375 else if (mode == TFmode)
28378 return default_scalar_mode_supported_p (mode);
28381 /* Implements target hook vector_mode_supported_p. */
28383 ix86_vector_mode_supported_p (enum machine_mode mode)
28385 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28387 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28389 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28391 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28393 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28398 /* Target hook for c_mode_for_suffix. */
28399 static enum machine_mode
28400 ix86_c_mode_for_suffix (char suffix)
28410 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28412 We do this in the new i386 backend to maintain source compatibility
28413 with the old cc0-based compiler. */
28416 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28417 tree inputs ATTRIBUTE_UNUSED,
28420 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28422 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28427 /* Implements target vector targetm.asm.encode_section_info. This
28428 is not used by netware. */
28430 static void ATTRIBUTE_UNUSED
28431 ix86_encode_section_info (tree decl, rtx rtl, int first)
28433 default_encode_section_info (decl, rtl, first);
28435 if (TREE_CODE (decl) == VAR_DECL
28436 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28437 && ix86_in_large_data_p (decl))
28438 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28441 /* Worker function for REVERSE_CONDITION. */
28444 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28446 return (mode != CCFPmode && mode != CCFPUmode
28447 ? reverse_condition (code)
28448 : reverse_condition_maybe_unordered (code));
28451 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28455 output_387_reg_move (rtx insn, rtx *operands)
28457 if (REG_P (operands[0]))
28459 if (REG_P (operands[1])
28460 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28462 if (REGNO (operands[0]) == FIRST_STACK_REG)
28463 return output_387_ffreep (operands, 0);
28464 return "fstp\t%y0";
28466 if (STACK_TOP_P (operands[0]))
28467 return "fld%z1\t%y1";
28470 else if (MEM_P (operands[0]))
28472 gcc_assert (REG_P (operands[1]));
28473 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28474 return "fstp%z0\t%y0";
28477 /* There is no non-popping store to memory for XFmode.
28478 So if we need one, follow the store with a load. */
28479 if (GET_MODE (operands[0]) == XFmode)
28480 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28482 return "fst%z0\t%y0";
28489 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28490 FP status register is set. */
28493 ix86_emit_fp_unordered_jump (rtx label)
28495 rtx reg = gen_reg_rtx (HImode);
28498 emit_insn (gen_x86_fnstsw_1 (reg));
28500 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28502 emit_insn (gen_x86_sahf_1 (reg));
28504 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28505 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28509 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28511 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28512 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28515 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28516 gen_rtx_LABEL_REF (VOIDmode, label),
28518 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28520 emit_jump_insn (temp);
28521 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28524 /* Output code to perform a log1p XFmode calculation. */
28526 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28528 rtx label1 = gen_label_rtx ();
28529 rtx label2 = gen_label_rtx ();
28531 rtx tmp = gen_reg_rtx (XFmode);
28532 rtx tmp2 = gen_reg_rtx (XFmode);
28534 emit_insn (gen_absxf2 (tmp, op1));
28535 emit_insn (gen_cmpxf (tmp,
28536 CONST_DOUBLE_FROM_REAL_VALUE (
28537 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28539 emit_jump_insn (gen_bge (label1));
28541 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28542 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28543 emit_jump (label2);
28545 emit_label (label1);
28546 emit_move_insn (tmp, CONST1_RTX (XFmode));
28547 emit_insn (gen_addxf3 (tmp, op1, tmp));
28548 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28549 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28551 emit_label (label2);
28554 /* Output code to perform a Newton-Rhapson approximation of a single precision
28555 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28557 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28559 rtx x0, x1, e0, e1, two;
28561 x0 = gen_reg_rtx (mode);
28562 e0 = gen_reg_rtx (mode);
28563 e1 = gen_reg_rtx (mode);
28564 x1 = gen_reg_rtx (mode);
28566 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28568 if (VECTOR_MODE_P (mode))
28569 two = ix86_build_const_vector (SFmode, true, two);
28571 two = force_reg (mode, two);
28573 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28575 /* x0 = rcp(b) estimate */
28576 emit_insn (gen_rtx_SET (VOIDmode, x0,
28577 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28580 emit_insn (gen_rtx_SET (VOIDmode, e0,
28581 gen_rtx_MULT (mode, x0, b)));
28583 emit_insn (gen_rtx_SET (VOIDmode, e1,
28584 gen_rtx_MINUS (mode, two, e0)));
28586 emit_insn (gen_rtx_SET (VOIDmode, x1,
28587 gen_rtx_MULT (mode, x0, e1)));
28589 emit_insn (gen_rtx_SET (VOIDmode, res,
28590 gen_rtx_MULT (mode, a, x1)));
28593 /* Output code to perform a Newton-Rhapson approximation of a
28594 single precision floating point [reciprocal] square root. */
28596 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28599 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28602 x0 = gen_reg_rtx (mode);
28603 e0 = gen_reg_rtx (mode);
28604 e1 = gen_reg_rtx (mode);
28605 e2 = gen_reg_rtx (mode);
28606 e3 = gen_reg_rtx (mode);
28608 real_from_integer (&r, VOIDmode, -3, -1, 0);
28609 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28611 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28612 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28614 if (VECTOR_MODE_P (mode))
28616 mthree = ix86_build_const_vector (SFmode, true, mthree);
28617 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28620 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28621 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28623 /* x0 = rsqrt(a) estimate */
28624 emit_insn (gen_rtx_SET (VOIDmode, x0,
28625 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28628 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28633 zero = gen_reg_rtx (mode);
28634 mask = gen_reg_rtx (mode);
28636 zero = force_reg (mode, CONST0_RTX(mode));
28637 emit_insn (gen_rtx_SET (VOIDmode, mask,
28638 gen_rtx_NE (mode, zero, a)));
28640 emit_insn (gen_rtx_SET (VOIDmode, x0,
28641 gen_rtx_AND (mode, x0, mask)));
28645 emit_insn (gen_rtx_SET (VOIDmode, e0,
28646 gen_rtx_MULT (mode, x0, a)));
28648 emit_insn (gen_rtx_SET (VOIDmode, e1,
28649 gen_rtx_MULT (mode, e0, x0)));
28652 mthree = force_reg (mode, mthree);
28653 emit_insn (gen_rtx_SET (VOIDmode, e2,
28654 gen_rtx_PLUS (mode, e1, mthree)));
28656 mhalf = force_reg (mode, mhalf);
28658 /* e3 = -.5 * x0 */
28659 emit_insn (gen_rtx_SET (VOIDmode, e3,
28660 gen_rtx_MULT (mode, x0, mhalf)));
28662 /* e3 = -.5 * e0 */
28663 emit_insn (gen_rtx_SET (VOIDmode, e3,
28664 gen_rtx_MULT (mode, e0, mhalf)));
28665 /* ret = e2 * e3 */
28666 emit_insn (gen_rtx_SET (VOIDmode, res,
28667 gen_rtx_MULT (mode, e2, e3)));
28670 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28672 static void ATTRIBUTE_UNUSED
28673 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28676 /* With Binutils 2.15, the "@unwind" marker must be specified on
28677 every occurrence of the ".eh_frame" section, not just the first
28680 && strcmp (name, ".eh_frame") == 0)
28682 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28683 flags & SECTION_WRITE ? "aw" : "a");
28686 default_elf_asm_named_section (name, flags, decl);
28689 /* Return the mangling of TYPE if it is an extended fundamental type. */
28691 static const char *
28692 ix86_mangle_type (const_tree type)
28694 type = TYPE_MAIN_VARIANT (type);
28696 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28697 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28700 switch (TYPE_MODE (type))
28703 /* __float128 is "g". */
28706 /* "long double" or __float80 is "e". */
28713 /* For 32-bit code we can save PIC register setup by using
28714 __stack_chk_fail_local hidden function instead of calling
28715 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28716 register, so it is better to call __stack_chk_fail directly. */
28719 ix86_stack_protect_fail (void)
28721 return TARGET_64BIT
28722 ? default_external_stack_protect_fail ()
28723 : default_hidden_stack_protect_fail ();
28726 /* Select a format to encode pointers in exception handling data. CODE
28727 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28728 true if the symbol may be affected by dynamic relocations.
28730 ??? All x86 object file formats are capable of representing this.
28731 After all, the relocation needed is the same as for the call insn.
28732 Whether or not a particular assembler allows us to enter such, I
28733 guess we'll have to see. */
28735 asm_preferred_eh_data_format (int code, int global)
28739 int type = DW_EH_PE_sdata8;
28741 || ix86_cmodel == CM_SMALL_PIC
28742 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28743 type = DW_EH_PE_sdata4;
28744 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28746 if (ix86_cmodel == CM_SMALL
28747 || (ix86_cmodel == CM_MEDIUM && code))
28748 return DW_EH_PE_udata4;
28749 return DW_EH_PE_absptr;
28752 /* Expand copysign from SIGN to the positive value ABS_VALUE
28753 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28756 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28758 enum machine_mode mode = GET_MODE (sign);
28759 rtx sgn = gen_reg_rtx (mode);
28760 if (mask == NULL_RTX)
28762 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28763 if (!VECTOR_MODE_P (mode))
28765 /* We need to generate a scalar mode mask in this case. */
28766 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28767 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28768 mask = gen_reg_rtx (mode);
28769 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28773 mask = gen_rtx_NOT (mode, mask);
28774 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28775 gen_rtx_AND (mode, mask, sign)));
28776 emit_insn (gen_rtx_SET (VOIDmode, result,
28777 gen_rtx_IOR (mode, abs_value, sgn)));
28780 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28781 mask for masking out the sign-bit is stored in *SMASK, if that is
28784 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28786 enum machine_mode mode = GET_MODE (op0);
28789 xa = gen_reg_rtx (mode);
28790 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28791 if (!VECTOR_MODE_P (mode))
28793 /* We need to generate a scalar mode mask in this case. */
28794 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28795 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28796 mask = gen_reg_rtx (mode);
28797 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28799 emit_insn (gen_rtx_SET (VOIDmode, xa,
28800 gen_rtx_AND (mode, op0, mask)));
28808 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28809 swapping the operands if SWAP_OPERANDS is true. The expanded
28810 code is a forward jump to a newly created label in case the
28811 comparison is true. The generated label rtx is returned. */
28813 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28814 bool swap_operands)
28825 label = gen_label_rtx ();
28826 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28827 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28828 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28829 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28830 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28831 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28832 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28833 JUMP_LABEL (tmp) = label;
28838 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28839 using comparison code CODE. Operands are swapped for the comparison if
28840 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28842 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28843 bool swap_operands)
28845 enum machine_mode mode = GET_MODE (op0);
28846 rtx mask = gen_reg_rtx (mode);
28855 if (mode == DFmode)
28856 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28857 gen_rtx_fmt_ee (code, mode, op0, op1)));
28859 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28860 gen_rtx_fmt_ee (code, mode, op0, op1)));
28865 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28866 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28868 ix86_gen_TWO52 (enum machine_mode mode)
28870 REAL_VALUE_TYPE TWO52r;
28873 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28874 TWO52 = const_double_from_real_value (TWO52r, mode);
28875 TWO52 = force_reg (mode, TWO52);
28880 /* Expand SSE sequence for computing lround from OP1 storing
28883 ix86_expand_lround (rtx op0, rtx op1)
28885 /* C code for the stuff we're doing below:
28886 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28889 enum machine_mode mode = GET_MODE (op1);
28890 const struct real_format *fmt;
28891 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28894 /* load nextafter (0.5, 0.0) */
28895 fmt = REAL_MODE_FORMAT (mode);
28896 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28897 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28899 /* adj = copysign (0.5, op1) */
28900 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28901 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28903 /* adj = op1 + adj */
28904 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28906 /* op0 = (imode)adj */
28907 expand_fix (op0, adj, 0);
28910 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28913 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28915 /* C code for the stuff we're doing below (for do_floor):
28917 xi -= (double)xi > op1 ? 1 : 0;
28920 enum machine_mode fmode = GET_MODE (op1);
28921 enum machine_mode imode = GET_MODE (op0);
28922 rtx ireg, freg, label, tmp;
28924 /* reg = (long)op1 */
28925 ireg = gen_reg_rtx (imode);
28926 expand_fix (ireg, op1, 0);
28928 /* freg = (double)reg */
28929 freg = gen_reg_rtx (fmode);
28930 expand_float (freg, ireg, 0);
28932 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28933 label = ix86_expand_sse_compare_and_jump (UNLE,
28934 freg, op1, !do_floor);
28935 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28936 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28937 emit_move_insn (ireg, tmp);
28939 emit_label (label);
28940 LABEL_NUSES (label) = 1;
28942 emit_move_insn (op0, ireg);
28945 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28946 result in OPERAND0. */
28948 ix86_expand_rint (rtx operand0, rtx operand1)
28950 /* C code for the stuff we're doing below:
28951 xa = fabs (operand1);
28952 if (!isless (xa, 2**52))
28954 xa = xa + 2**52 - 2**52;
28955 return copysign (xa, operand1);
28957 enum machine_mode mode = GET_MODE (operand0);
28958 rtx res, xa, label, TWO52, mask;
28960 res = gen_reg_rtx (mode);
28961 emit_move_insn (res, operand1);
28963 /* xa = abs (operand1) */
28964 xa = ix86_expand_sse_fabs (res, &mask);
28966 /* if (!isless (xa, TWO52)) goto label; */
28967 TWO52 = ix86_gen_TWO52 (mode);
28968 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28970 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28971 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28973 ix86_sse_copysign_to_positive (res, xa, res, mask);
28975 emit_label (label);
28976 LABEL_NUSES (label) = 1;
28978 emit_move_insn (operand0, res);
28981 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28984 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28986 /* C code for the stuff we expand below.
28987 double xa = fabs (x), x2;
28988 if (!isless (xa, TWO52))
28990 xa = xa + TWO52 - TWO52;
28991 x2 = copysign (xa, x);
29000 enum machine_mode mode = GET_MODE (operand0);
29001 rtx xa, TWO52, tmp, label, one, res, mask;
29003 TWO52 = ix86_gen_TWO52 (mode);
29005 /* Temporary for holding the result, initialized to the input
29006 operand to ease control flow. */
29007 res = gen_reg_rtx (mode);
29008 emit_move_insn (res, operand1);
29010 /* xa = abs (operand1) */
29011 xa = ix86_expand_sse_fabs (res, &mask);
29013 /* if (!isless (xa, TWO52)) goto label; */
29014 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29016 /* xa = xa + TWO52 - TWO52; */
29017 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29018 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29020 /* xa = copysign (xa, operand1) */
29021 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29023 /* generate 1.0 or -1.0 */
29024 one = force_reg (mode,
29025 const_double_from_real_value (do_floor
29026 ? dconst1 : dconstm1, mode));
29028 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29029 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29030 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29031 gen_rtx_AND (mode, one, tmp)));
29032 /* We always need to subtract here to preserve signed zero. */
29033 tmp = expand_simple_binop (mode, MINUS,
29034 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29035 emit_move_insn (res, tmp);
29037 emit_label (label);
29038 LABEL_NUSES (label) = 1;
29040 emit_move_insn (operand0, res);
29043 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29046 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29048 /* C code for the stuff we expand below.
29049 double xa = fabs (x), x2;
29050 if (!isless (xa, TWO52))
29052 x2 = (double)(long)x;
29059 if (HONOR_SIGNED_ZEROS (mode))
29060 return copysign (x2, x);
29063 enum machine_mode mode = GET_MODE (operand0);
29064 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29066 TWO52 = ix86_gen_TWO52 (mode);
29068 /* Temporary for holding the result, initialized to the input
29069 operand to ease control flow. */
29070 res = gen_reg_rtx (mode);
29071 emit_move_insn (res, operand1);
29073 /* xa = abs (operand1) */
29074 xa = ix86_expand_sse_fabs (res, &mask);
29076 /* if (!isless (xa, TWO52)) goto label; */
29077 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29079 /* xa = (double)(long)x */
29080 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29081 expand_fix (xi, res, 0);
29082 expand_float (xa, xi, 0);
29085 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29087 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29088 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29089 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29090 gen_rtx_AND (mode, one, tmp)));
29091 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29092 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29093 emit_move_insn (res, tmp);
29095 if (HONOR_SIGNED_ZEROS (mode))
29096 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29098 emit_label (label);
29099 LABEL_NUSES (label) = 1;
29101 emit_move_insn (operand0, res);
29104 /* Expand SSE sequence for computing round from OPERAND1 storing
29105 into OPERAND0. Sequence that works without relying on DImode truncation
29106 via cvttsd2siq that is only available on 64bit targets. */
29108 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29110 /* C code for the stuff we expand below.
29111 double xa = fabs (x), xa2, x2;
29112 if (!isless (xa, TWO52))
29114 Using the absolute value and copying back sign makes
29115 -0.0 -> -0.0 correct.
29116 xa2 = xa + TWO52 - TWO52;
29121 else if (dxa > 0.5)
29123 x2 = copysign (xa2, x);
29126 enum machine_mode mode = GET_MODE (operand0);
29127 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29129 TWO52 = ix86_gen_TWO52 (mode);
29131 /* Temporary for holding the result, initialized to the input
29132 operand to ease control flow. */
29133 res = gen_reg_rtx (mode);
29134 emit_move_insn (res, operand1);
29136 /* xa = abs (operand1) */
29137 xa = ix86_expand_sse_fabs (res, &mask);
29139 /* if (!isless (xa, TWO52)) goto label; */
29140 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29142 /* xa2 = xa + TWO52 - TWO52; */
29143 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29144 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29146 /* dxa = xa2 - xa; */
29147 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29149 /* generate 0.5, 1.0 and -0.5 */
29150 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29151 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29152 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29156 tmp = gen_reg_rtx (mode);
29157 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29158 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29159 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29160 gen_rtx_AND (mode, one, tmp)));
29161 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29162 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29163 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29164 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29165 gen_rtx_AND (mode, one, tmp)));
29166 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29168 /* res = copysign (xa2, operand1) */
29169 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29171 emit_label (label);
29172 LABEL_NUSES (label) = 1;
29174 emit_move_insn (operand0, res);
29177 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29180 ix86_expand_trunc (rtx operand0, rtx operand1)
29182 /* C code for SSE variant we expand below.
29183 double xa = fabs (x), x2;
29184 if (!isless (xa, TWO52))
29186 x2 = (double)(long)x;
29187 if (HONOR_SIGNED_ZEROS (mode))
29188 return copysign (x2, x);
29191 enum machine_mode mode = GET_MODE (operand0);
29192 rtx xa, xi, TWO52, label, res, mask;
29194 TWO52 = ix86_gen_TWO52 (mode);
29196 /* Temporary for holding the result, initialized to the input
29197 operand to ease control flow. */
29198 res = gen_reg_rtx (mode);
29199 emit_move_insn (res, operand1);
29201 /* xa = abs (operand1) */
29202 xa = ix86_expand_sse_fabs (res, &mask);
29204 /* if (!isless (xa, TWO52)) goto label; */
29205 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29207 /* x = (double)(long)x */
29208 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29209 expand_fix (xi, res, 0);
29210 expand_float (res, xi, 0);
29212 if (HONOR_SIGNED_ZEROS (mode))
29213 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29215 emit_label (label);
29216 LABEL_NUSES (label) = 1;
29218 emit_move_insn (operand0, res);
29221 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29224 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29226 enum machine_mode mode = GET_MODE (operand0);
29227 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29229 /* C code for SSE variant we expand below.
29230 double xa = fabs (x), x2;
29231 if (!isless (xa, TWO52))
29233 xa2 = xa + TWO52 - TWO52;
29237 x2 = copysign (xa2, x);
29241 TWO52 = ix86_gen_TWO52 (mode);
29243 /* Temporary for holding the result, initialized to the input
29244 operand to ease control flow. */
29245 res = gen_reg_rtx (mode);
29246 emit_move_insn (res, operand1);
29248 /* xa = abs (operand1) */
29249 xa = ix86_expand_sse_fabs (res, &smask);
29251 /* if (!isless (xa, TWO52)) goto label; */
29252 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29254 /* res = xa + TWO52 - TWO52; */
29255 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29256 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29257 emit_move_insn (res, tmp);
29260 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29262 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29263 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29264 emit_insn (gen_rtx_SET (VOIDmode, mask,
29265 gen_rtx_AND (mode, mask, one)));
29266 tmp = expand_simple_binop (mode, MINUS,
29267 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29268 emit_move_insn (res, tmp);
29270 /* res = copysign (res, operand1) */
29271 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29273 emit_label (label);
29274 LABEL_NUSES (label) = 1;
29276 emit_move_insn (operand0, res);
29279 /* Expand SSE sequence for computing round from OPERAND1 storing
29282 ix86_expand_round (rtx operand0, rtx operand1)
29284 /* C code for the stuff we're doing below:
29285 double xa = fabs (x);
29286 if (!isless (xa, TWO52))
29288 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29289 return copysign (xa, x);
29291 enum machine_mode mode = GET_MODE (operand0);
29292 rtx res, TWO52, xa, label, xi, half, mask;
29293 const struct real_format *fmt;
29294 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29296 /* Temporary for holding the result, initialized to the input
29297 operand to ease control flow. */
29298 res = gen_reg_rtx (mode);
29299 emit_move_insn (res, operand1);
29301 TWO52 = ix86_gen_TWO52 (mode);
29302 xa = ix86_expand_sse_fabs (res, &mask);
29303 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29305 /* load nextafter (0.5, 0.0) */
29306 fmt = REAL_MODE_FORMAT (mode);
29307 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29308 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29310 /* xa = xa + 0.5 */
29311 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29312 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29314 /* xa = (double)(int64_t)xa */
29315 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29316 expand_fix (xi, xa, 0);
29317 expand_float (xa, xi, 0);
29319 /* res = copysign (xa, operand1) */
29320 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29322 emit_label (label);
29323 LABEL_NUSES (label) = 1;
29325 emit_move_insn (operand0, res);
29329 /* Validate whether a SSE5 instruction is valid or not.
29330 OPERANDS is the array of operands.
29331 NUM is the number of operands.
29332 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29333 NUM_MEMORY is the maximum number of memory operands to accept.
29334 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29337 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29338 bool uses_oc0, int num_memory, bool commutative)
29344 /* Count the number of memory arguments */
29347 for (i = 0; i < num; i++)
29349 enum machine_mode mode = GET_MODE (operands[i]);
29350 if (register_operand (operands[i], mode))
29353 else if (memory_operand (operands[i], mode))
29355 mem_mask |= (1 << i);
29361 rtx pattern = PATTERN (insn);
29363 /* allow 0 for pcmov */
29364 if (GET_CODE (pattern) != SET
29365 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29367 || operands[i] != CONST0_RTX (mode))
29372 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29373 a memory operation. */
29374 if (num_memory < 0)
29376 num_memory = -num_memory;
29377 if ((mem_mask & (1 << (num-1))) != 0)
29379 mem_mask &= ~(1 << (num-1));
29384 /* If there were no memory operations, allow the insn */
29388 /* Do not allow the destination register to be a memory operand. */
29389 else if (mem_mask & (1 << 0))
29392 /* If there are too many memory operations, disallow the instruction. While
29393 the hardware only allows 1 memory reference, before register allocation
29394 for some insns, we allow two memory operations sometimes in order to allow
29395 code like the following to be optimized:
29397 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29399 or similar cases that are vectorized into using the fmaddss
29401 else if (mem_count > num_memory)
29404 /* Don't allow more than one memory operation if not optimizing. */
29405 else if (mem_count > 1 && !optimize)
29408 else if (num == 4 && mem_count == 1)
29410 /* formats (destination is the first argument), example fmaddss:
29411 xmm1, xmm1, xmm2, xmm3/mem
29412 xmm1, xmm1, xmm2/mem, xmm3
29413 xmm1, xmm2, xmm3/mem, xmm1
29414 xmm1, xmm2/mem, xmm3, xmm1 */
29416 return ((mem_mask == (1 << 1))
29417 || (mem_mask == (1 << 2))
29418 || (mem_mask == (1 << 3)));
29420 /* format, example pmacsdd:
29421 xmm1, xmm2, xmm3/mem, xmm1 */
29423 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29425 return (mem_mask == (1 << 2));
29428 else if (num == 4 && num_memory == 2)
29430 /* If there are two memory operations, we can load one of the memory ops
29431 into the destination register. This is for optimizing the
29432 multiply/add ops, which the combiner has optimized both the multiply
29433 and the add insns to have a memory operation. We have to be careful
29434 that the destination doesn't overlap with the inputs. */
29435 rtx op0 = operands[0];
29437 if (reg_mentioned_p (op0, operands[1])
29438 || reg_mentioned_p (op0, operands[2])
29439 || reg_mentioned_p (op0, operands[3]))
29442 /* formats (destination is the first argument), example fmaddss:
29443 xmm1, xmm1, xmm2, xmm3/mem
29444 xmm1, xmm1, xmm2/mem, xmm3
29445 xmm1, xmm2, xmm3/mem, xmm1
29446 xmm1, xmm2/mem, xmm3, xmm1
29448 For the oc0 case, we will load either operands[1] or operands[3] into
29449 operands[0], so any combination of 2 memory operands is ok. */
29453 /* format, example pmacsdd:
29454 xmm1, xmm2, xmm3/mem, xmm1
29456 For the integer multiply/add instructions be more restrictive and
29457 require operands[2] and operands[3] to be the memory operands. */
29459 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29461 return (mem_mask == ((1 << 2) | (1 << 3)));
29464 else if (num == 3 && num_memory == 1)
29466 /* formats, example protb:
29467 xmm1, xmm2, xmm3/mem
29468 xmm1, xmm2/mem, xmm3 */
29470 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29472 /* format, example comeq:
29473 xmm1, xmm2, xmm3/mem */
29475 return (mem_mask == (1 << 2));
29479 gcc_unreachable ();
29485 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29486 hardware will allow by using the destination register to load one of the
29487 memory operations. Presently this is used by the multiply/add routines to
29488 allow 2 memory references. */
29491 ix86_expand_sse5_multiple_memory (rtx operands[],
29493 enum machine_mode mode)
29495 rtx op0 = operands[0];
29497 || memory_operand (op0, mode)
29498 || reg_mentioned_p (op0, operands[1])
29499 || reg_mentioned_p (op0, operands[2])
29500 || reg_mentioned_p (op0, operands[3]))
29501 gcc_unreachable ();
29503 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29504 the destination register. */
29505 if (memory_operand (operands[1], mode))
29507 emit_move_insn (op0, operands[1]);
29510 else if (memory_operand (operands[3], mode))
29512 emit_move_insn (op0, operands[3]);
29516 gcc_unreachable ();
29522 /* Table of valid machine attributes. */
29523 static const struct attribute_spec ix86_attribute_table[] =
29525 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29526 /* Stdcall attribute says callee is responsible for popping arguments
29527 if they are not variable. */
29528 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29529 /* Fastcall attribute says callee is responsible for popping arguments
29530 if they are not variable. */
29531 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29532 /* Cdecl attribute says the callee is a normal C declaration */
29533 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29534 /* Regparm attribute specifies how many integer arguments are to be
29535 passed in registers. */
29536 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29537 /* Sseregparm attribute says we are using x86_64 calling conventions
29538 for FP arguments. */
29539 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29540 /* force_align_arg_pointer says this function realigns the stack at entry. */
29541 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29542 false, true, true, ix86_handle_cconv_attribute },
29543 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29544 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29545 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29546 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29548 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29549 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29550 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29551 SUBTARGET_ATTRIBUTE_TABLE,
29553 /* ms_abi and sysv_abi calling convention function attributes. */
29554 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29555 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29557 { NULL, 0, 0, false, false, false, NULL }
29560 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29562 x86_builtin_vectorization_cost (bool runtime_test)
29564 /* If the branch of the runtime test is taken - i.e. - the vectorized
29565 version is skipped - this incurs a misprediction cost (because the
29566 vectorized version is expected to be the fall-through). So we subtract
29567 the latency of a mispredicted branch from the costs that are incured
29568 when the vectorized version is executed.
29570 TODO: The values in individual target tables have to be tuned or new
29571 fields may be needed. For eg. on K8, the default branch path is the
29572 not-taken path. If the taken path is predicted correctly, the minimum
29573 penalty of going down the taken-path is 1 cycle. If the taken-path is
29574 not predicted correctly, then the minimum penalty is 10 cycles. */
29578 return (-(ix86_cost->cond_taken_branch_cost));
29584 /* This function returns the calling abi specific va_list type node.
29585 It returns the FNDECL specific va_list type. */
29588 ix86_fn_abi_va_list (tree fndecl)
29591 return va_list_type_node;
29592 gcc_assert (fndecl != NULL_TREE);
29594 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29595 return ms_va_list_type_node;
29597 return sysv_va_list_type_node;
29600 /* Returns the canonical va_list type specified by TYPE. If there
29601 is no valid TYPE provided, it return NULL_TREE. */
29604 ix86_canonical_va_list_type (tree type)
29608 /* Resolve references and pointers to va_list type. */
29609 if (INDIRECT_REF_P (type))
29610 type = TREE_TYPE (type);
29611 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29612 type = TREE_TYPE (type);
29616 wtype = va_list_type_node;
29617 gcc_assert (wtype != NULL_TREE);
29619 if (TREE_CODE (wtype) == ARRAY_TYPE)
29621 /* If va_list is an array type, the argument may have decayed
29622 to a pointer type, e.g. by being passed to another function.
29623 In that case, unwrap both types so that we can compare the
29624 underlying records. */
29625 if (TREE_CODE (htype) == ARRAY_TYPE
29626 || POINTER_TYPE_P (htype))
29628 wtype = TREE_TYPE (wtype);
29629 htype = TREE_TYPE (htype);
29632 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29633 return va_list_type_node;
29634 wtype = sysv_va_list_type_node;
29635 gcc_assert (wtype != NULL_TREE);
29637 if (TREE_CODE (wtype) == ARRAY_TYPE)
29639 /* If va_list is an array type, the argument may have decayed
29640 to a pointer type, e.g. by being passed to another function.
29641 In that case, unwrap both types so that we can compare the
29642 underlying records. */
29643 if (TREE_CODE (htype) == ARRAY_TYPE
29644 || POINTER_TYPE_P (htype))
29646 wtype = TREE_TYPE (wtype);
29647 htype = TREE_TYPE (htype);
29650 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29651 return sysv_va_list_type_node;
29652 wtype = ms_va_list_type_node;
29653 gcc_assert (wtype != NULL_TREE);
29655 if (TREE_CODE (wtype) == ARRAY_TYPE)
29657 /* If va_list is an array type, the argument may have decayed
29658 to a pointer type, e.g. by being passed to another function.
29659 In that case, unwrap both types so that we can compare the
29660 underlying records. */
29661 if (TREE_CODE (htype) == ARRAY_TYPE
29662 || POINTER_TYPE_P (htype))
29664 wtype = TREE_TYPE (wtype);
29665 htype = TREE_TYPE (htype);
29668 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29669 return ms_va_list_type_node;
29672 return std_canonical_va_list_type (type);
29675 /* Iterate through the target-specific builtin types for va_list.
29676 IDX denotes the iterator, *PTREE is set to the result type of
29677 the va_list builtin, and *PNAME to its internal type.
29678 Returns zero if there is no element for this index, otherwise
29679 IDX should be increased upon the next call.
29680 Note, do not iterate a base builtin's name like __builtin_va_list.
29681 Used from c_common_nodes_and_builtins. */
29684 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29690 *ptree = ms_va_list_type_node;
29691 *pname = "__builtin_ms_va_list";
29694 *ptree = sysv_va_list_type_node;
29695 *pname = "__builtin_sysv_va_list";
29703 /* Initialize the GCC target structure. */
29704 #undef TARGET_RETURN_IN_MEMORY
29705 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29707 #undef TARGET_ATTRIBUTE_TABLE
29708 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29709 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29710 # undef TARGET_MERGE_DECL_ATTRIBUTES
29711 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29714 #undef TARGET_COMP_TYPE_ATTRIBUTES
29715 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29717 #undef TARGET_INIT_BUILTINS
29718 #define TARGET_INIT_BUILTINS ix86_init_builtins
29719 #undef TARGET_EXPAND_BUILTIN
29720 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29722 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29723 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29724 ix86_builtin_vectorized_function
29726 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29727 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29729 #undef TARGET_BUILTIN_RECIPROCAL
29730 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29732 #undef TARGET_ASM_FUNCTION_EPILOGUE
29733 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29735 #undef TARGET_ENCODE_SECTION_INFO
29736 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29737 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29739 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29742 #undef TARGET_ASM_OPEN_PAREN
29743 #define TARGET_ASM_OPEN_PAREN ""
29744 #undef TARGET_ASM_CLOSE_PAREN
29745 #define TARGET_ASM_CLOSE_PAREN ""
29747 #undef TARGET_ASM_ALIGNED_HI_OP
29748 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29749 #undef TARGET_ASM_ALIGNED_SI_OP
29750 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29752 #undef TARGET_ASM_ALIGNED_DI_OP
29753 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29756 #undef TARGET_ASM_UNALIGNED_HI_OP
29757 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29758 #undef TARGET_ASM_UNALIGNED_SI_OP
29759 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29760 #undef TARGET_ASM_UNALIGNED_DI_OP
29761 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29763 #undef TARGET_SCHED_ADJUST_COST
29764 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29765 #undef TARGET_SCHED_ISSUE_RATE
29766 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29767 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29768 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29769 ia32_multipass_dfa_lookahead
29771 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29772 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29775 #undef TARGET_HAVE_TLS
29776 #define TARGET_HAVE_TLS true
29778 #undef TARGET_CANNOT_FORCE_CONST_MEM
29779 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29780 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29781 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29783 #undef TARGET_DELEGITIMIZE_ADDRESS
29784 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29786 #undef TARGET_MS_BITFIELD_LAYOUT_P
29787 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29790 #undef TARGET_BINDS_LOCAL_P
29791 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29793 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29794 #undef TARGET_BINDS_LOCAL_P
29795 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29798 #undef TARGET_ASM_OUTPUT_MI_THUNK
29799 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29800 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29801 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29803 #undef TARGET_ASM_FILE_START
29804 #define TARGET_ASM_FILE_START x86_file_start
29806 #undef TARGET_DEFAULT_TARGET_FLAGS
29807 #define TARGET_DEFAULT_TARGET_FLAGS \
29809 | TARGET_SUBTARGET_DEFAULT \
29810 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29812 #undef TARGET_HANDLE_OPTION
29813 #define TARGET_HANDLE_OPTION ix86_handle_option
29815 #undef TARGET_RTX_COSTS
29816 #define TARGET_RTX_COSTS ix86_rtx_costs
29817 #undef TARGET_ADDRESS_COST
29818 #define TARGET_ADDRESS_COST ix86_address_cost
29820 #undef TARGET_FIXED_CONDITION_CODE_REGS
29821 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29822 #undef TARGET_CC_MODES_COMPATIBLE
29823 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29825 #undef TARGET_MACHINE_DEPENDENT_REORG
29826 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29828 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29829 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29831 #undef TARGET_BUILD_BUILTIN_VA_LIST
29832 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29834 #undef TARGET_FN_ABI_VA_LIST
29835 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29837 #undef TARGET_CANONICAL_VA_LIST_TYPE
29838 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29840 #undef TARGET_EXPAND_BUILTIN_VA_START
29841 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29843 #undef TARGET_MD_ASM_CLOBBERS
29844 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29846 #undef TARGET_PROMOTE_PROTOTYPES
29847 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29848 #undef TARGET_STRUCT_VALUE_RTX
29849 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29850 #undef TARGET_SETUP_INCOMING_VARARGS
29851 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29852 #undef TARGET_MUST_PASS_IN_STACK
29853 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29854 #undef TARGET_PASS_BY_REFERENCE
29855 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29856 #undef TARGET_INTERNAL_ARG_POINTER
29857 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29858 #undef TARGET_UPDATE_STACK_BOUNDARY
29859 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29860 #undef TARGET_GET_DRAP_RTX
29861 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29862 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29863 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29864 #undef TARGET_STRICT_ARGUMENT_NAMING
29865 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29867 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29868 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29870 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29871 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29873 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29874 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29876 #undef TARGET_C_MODE_FOR_SUFFIX
29877 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29880 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29881 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29884 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29885 #undef TARGET_INSERT_ATTRIBUTES
29886 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29889 #undef TARGET_MANGLE_TYPE
29890 #define TARGET_MANGLE_TYPE ix86_mangle_type
29892 #undef TARGET_STACK_PROTECT_FAIL
29893 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29895 #undef TARGET_FUNCTION_VALUE
29896 #define TARGET_FUNCTION_VALUE ix86_function_value
29898 #undef TARGET_SECONDARY_RELOAD
29899 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29901 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29902 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29904 #undef TARGET_SET_CURRENT_FUNCTION
29905 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29907 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29908 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29910 #undef TARGET_OPTION_SAVE
29911 #define TARGET_OPTION_SAVE ix86_function_specific_save
29913 #undef TARGET_OPTION_RESTORE
29914 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29916 #undef TARGET_OPTION_PRINT
29917 #define TARGET_OPTION_PRINT ix86_function_specific_print
29919 #undef TARGET_OPTION_CAN_INLINE_P
29920 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29922 #undef TARGET_EXPAND_TO_RTL_HOOK
29923 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
29925 struct gcc_target targetm = TARGET_INITIALIZER;
29927 #include "gt-i386.h"